From d8229a55316183b523c61f28f88d64e6c8f3a96a Mon Sep 17 00:00:00 2001 From: madmaxoft Date: Fri, 4 Oct 2013 09:30:15 +0200 Subject: Added cEnvelopeParser and cMultipartParser. --- source/HTTPServer/EnvelopeParser.cpp | 132 ++++++++++++++++++ source/HTTPServer/EnvelopeParser.h | 69 +++++++++ source/HTTPServer/MultipartParser.cpp | 255 ++++++++++++++++++++++++++++++++++ source/HTTPServer/MultipartParser.h | 76 ++++++++++ 4 files changed, 532 insertions(+) create mode 100644 source/HTTPServer/EnvelopeParser.cpp create mode 100644 source/HTTPServer/EnvelopeParser.h create mode 100644 source/HTTPServer/MultipartParser.cpp create mode 100644 source/HTTPServer/MultipartParser.h diff --git a/source/HTTPServer/EnvelopeParser.cpp b/source/HTTPServer/EnvelopeParser.cpp new file mode 100644 index 000000000..8dbe05f14 --- /dev/null +++ b/source/HTTPServer/EnvelopeParser.cpp @@ -0,0 +1,132 @@ + +// EnvelopeParser.cpp + +// Implements the cEnvelopeParser class representing a parser for RFC-822 envelope headers, used both in HTTP and in MIME + +#include "Globals.h" +#include "EnvelopeParser.h" + + + + + +cEnvelopeParser::cEnvelopeParser(cCallbacks & a_Callbacks) : + m_Callbacks(a_Callbacks), + m_IsInHeaders(true) +{ +} + + + + + +int cEnvelopeParser::Parse(const char * a_Data, int a_Size) +{ + if (!m_IsInHeaders) + { + return 0; + } + + // Start searching 1 char from the end of the already received data, if available: + size_t SearchStart = m_IncomingData.size(); + SearchStart = (SearchStart > 1) ? SearchStart - 1 : 0; + + m_IncomingData.append(a_Data, a_Size); + + size_t idxCRLF = m_IncomingData.find("\r\n", SearchStart); + if (idxCRLF == AString::npos) + { + // Not a complete line yet, all input consumed: + return a_Size; + } + + // Parse as many lines as found: + size_t Last = 0; + do + { + if (idxCRLF == Last) + { + // This was the last line of the data. Finish whatever value has been cached and return: + NotifyLast(); + m_IsInHeaders = false; + return a_Size - (m_IncomingData.size() - idxCRLF) + 2; + } + if (!ParseLine(m_IncomingData.c_str() + Last, idxCRLF - Last)) + { + // An error has occurred + m_IsInHeaders = false; + return -1; + } + Last = idxCRLF + 2; + idxCRLF = m_IncomingData.find("\r\n", idxCRLF + 2); + } while (idxCRLF != AString::npos); + m_IncomingData.erase(0, Last); + + // Parsed all lines and still expecting more + return a_Size; +} + + + + + +void cEnvelopeParser::Reset(void) +{ + m_IsInHeaders = true; + m_IncomingData.clear(); + m_LastKey.clear(); + m_LastValue.clear(); +} + + + + + +void cEnvelopeParser::NotifyLast(void) +{ + if (!m_LastKey.empty()) + { + m_Callbacks.OnHeaderLine(m_LastKey, m_LastValue); + m_LastKey.clear(); + } + m_LastValue.clear(); +} + + + + + +bool cEnvelopeParser::ParseLine(const char * a_Data, size_t a_Size) +{ + ASSERT(a_Size > 0); + if (a_Data[0] <= ' ') + { + // This line is a continuation for the previous line + if (m_LastKey.empty()) + { + return false; + } + // Append, including the whitespace in a_Data[0] + m_LastValue.append(a_Data, a_Size); + return true; + } + + // This is a line with a new key: + NotifyLast(); + for (size_t i = 0; i < a_Size; i++) + { + if (a_Data[i] == ':') + { + m_LastKey.assign(a_Data, i); + m_LastValue.assign(a_Data + i + 2, a_Size - i - 2); + return true; + } + } // for i - a_Data[] + + // No colon was found, key-less header?? + return false; +} + + + + diff --git a/source/HTTPServer/EnvelopeParser.h b/source/HTTPServer/EnvelopeParser.h new file mode 100644 index 000000000..6430fbebf --- /dev/null +++ b/source/HTTPServer/EnvelopeParser.h @@ -0,0 +1,69 @@ + +// EnvelopeParser.h + +// Declares the cEnvelopeParser class representing a parser for RFC-822 envelope headers, used both in HTTP and in MIME + + + + + +#pragma once + + + + + +class cEnvelopeParser +{ +public: + class cCallbacks + { + public: + /// Called when a full header line is parsed + virtual void OnHeaderLine(const AString & a_Key, const AString & a_Value) = 0; + } ; + + + cEnvelopeParser(cCallbacks & a_Callbacks); + + /** Parses the incoming data. + Returns the number of bytes consumed from the input. The bytes not consumed are not part of the envelope header + */ + int Parse(const char * a_Data, int a_Size); + + /// Makes the parser forget everything parsed so far, so that it can be reused for parsing another datastream + void Reset(void); + + /// Returns true if more input is expected for the envelope header + bool IsInHeaders(void) const { return m_IsInHeaders; } + + /// Sets the IsInHeaders flag; used by cMultipartParser to simplify the parser initial conditions + void SetIsInHeaders(bool a_IsInHeaders) { m_IsInHeaders = a_IsInHeaders; } + +public: + /// Callbacks to call for the various events + cCallbacks & m_Callbacks; + + /// Set to true while the parser is still parsing the envelope headers. Once set to true, the parser will not consume any more data. + bool m_IsInHeaders; + + /// Buffer for the incoming data until it is parsed + AString m_IncomingData; + + /// Holds the last parsed key; used for line-wrapped values + AString m_LastKey; + + /// Holds the last parsed value; used for line-wrapped values + AString m_LastValue; + + + /// Notifies the callback of the key/value stored in m_LastKey/m_LastValue, then erases them + void NotifyLast(void); + + /// Parses one line of header data. Returns true if successful + bool ParseLine(const char * a_Data, size_t a_Size); +} ; + + + + diff --git a/source/HTTPServer/MultipartParser.cpp b/source/HTTPServer/MultipartParser.cpp new file mode 100644 index 000000000..7df151dc3 --- /dev/null +++ b/source/HTTPServer/MultipartParser.cpp @@ -0,0 +1,255 @@ + +// MultipartParser.cpp + +// Implements the cMultipartParser class that parses messages in "multipart/*" encoding into the separate parts + +#include "Globals.h" +#include "MultipartParser.h" +#include "NameValueParser.h" + + + + + +// Disable MSVC warnings: +#if defined(_MSC_VER) + #pragma warning(push) + #pragma warning(disable:4355) // 'this' : used in base member initializer list +#endif + + + + + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// self-test: + +#if 0 + +class cMultipartParserTest : + public cMultipartParser::cCallbacks +{ +public: + cMultipartParserTest(void) + { + cMultipartParser Parser("multipart/mixed; boundary=\"MyBoundaryString\"; foo=bar", *this); + const char Data[] = +"ThisIsIgnoredPrologue\r\n\ +--MyBoundaryString\r\n\ +\r\n\ +Body with confusing strings\r\n\ +--NotABoundary\r\n\ +--MyBoundaryStringWithPostfix\r\n\ +--\r\n\ +--MyBoundaryString\r\n\ +content-disposition: inline\r\n\ +\r\n\ +This is body\r\n\ +--MyBoundaryString\r\n\ +\r\n\ +Headerless body with trailing CRLF\r\n\ +\r\n\ +--MyBoundaryString--\r\n\ +ThisIsIgnoredEpilogue"; + printf("Multipart parsing test commencing.\n"); + Parser.Parse(Data, sizeof(Data) - 1); + // DEBUG: Check if the onscreen output corresponds with the data above + printf("Multipart parsing test finished\n"); + } + + virtual void OnPartStart(void) override + { + printf("Starting a new part\n"); + } + + + virtual void OnPartHeader(const AString & a_Key, const AString & a_Value) override + { + printf(" Hdr: \"%s\"=\"%s\"\n", a_Key.c_str(), a_Value.c_str()); + } + + + virtual void OnPartData(const char * a_Data, int a_Size) override + { + printf(" Data: %d bytes, \"%.*s\"\n", a_Size, a_Size, a_Data); + } + + + virtual void OnPartEnd(void) override + { + printf("Part end\n"); + } +} g_Test; + +#endif + + + + + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// cMultipartParser: + + +cMultipartParser::cMultipartParser(const AString & a_ContentType, cCallbacks & a_Callbacks) : + m_Callbacks(a_Callbacks), + m_IsValid(true), + m_EnvelopeParser(*this), + m_HasHadData(false) +{ + static AString s_Multipart = "multipart/"; + + // Check that the content type is multipart: + AString ContentType(a_ContentType); + if (strncmp(ContentType.c_str(), "multipart/", 10) != 0) + { + m_IsValid = false; + return; + } + size_t idxSC = ContentType.find(';', 10); + if (idxSC == AString::npos) + { + m_IsValid = false; + return; + } + + // Find the multipart boundary: + ContentType.erase(0, idxSC + 1); + cNameValueParser CTParser(ContentType.c_str(), ContentType.size()); + if (!CTParser.IsValid()) + { + m_IsValid = false; + return; + } + m_Boundary = CTParser["boundary"]; + m_IsValid = !m_Boundary.empty(); + if (!m_IsValid) + { + return; + } + + // Set the envelope parser for parsing the body, so that our Parse() function parses the ignored prefix data as a body + m_EnvelopeParser.SetIsInHeaders(false); + + // Append an initial CRLF to the incoming data, so that a body starting with the boundary line will get caught + m_IncomingData.assign("\r\n"); + + /* + m_Boundary = AString("\r\n--") + m_Boundary + m_BoundaryEnd = m_Boundary + "--\r\n"; + m_Boundary = m_Boundary + "\r\n"; + */ +} + + + + + +void cMultipartParser::Parse(const char * a_Data, int a_Size) +{ + // Skip parsing if invalid + if (!m_IsValid) + { + return; + } + + // Append to buffer, then parse it: + m_IncomingData.append(a_Data, a_Size); + while (true) + { + if (m_EnvelopeParser.IsInHeaders()) + { + int BytesConsumed = m_EnvelopeParser.Parse(m_IncomingData.data(), m_IncomingData.size()); + if (BytesConsumed < 0) + { + m_IsValid = false; + return; + } + if ((BytesConsumed == a_Size) && m_EnvelopeParser.IsInHeaders()) + { + // All the incoming data has been consumed and still waiting for more + return; + } + m_IncomingData.erase(0, BytesConsumed); + } + + // Search for boundary / boundary end: + size_t idxBoundary = m_IncomingData.find("\r\n--"); + if (idxBoundary == AString::npos) + { + // Boundary string start not present, present as much data to the part callback as possible + if (m_IncomingData.size() > m_Boundary.size() + 8) + { + size_t BytesToReport = m_IncomingData.size() - m_Boundary.size() - 8; + m_Callbacks.OnPartData(m_IncomingData.data(), BytesToReport); + m_IncomingData.erase(0, BytesToReport); + } + return; + } + if (idxBoundary > 0) + { + m_Callbacks.OnPartData(m_IncomingData.data(), idxBoundary); + m_IncomingData.erase(0, idxBoundary); + } + idxBoundary = 4; + size_t LineEnd = m_IncomingData.find("\r\n", idxBoundary); + if (LineEnd == AString::npos) + { + // Not a complete line yet, present as much data to the part callback as possible + if (m_IncomingData.size() > m_Boundary.size() + 8) + { + size_t BytesToReport = m_IncomingData.size() - m_Boundary.size() - 8; + m_Callbacks.OnPartData(m_IncomingData.data(), BytesToReport); + m_IncomingData.erase(0, BytesToReport); + } + return; + } + if ( + (LineEnd - idxBoundary != m_Boundary.size()) && // Line length not equal to boundary + (LineEnd - idxBoundary != m_Boundary.size() + 2) // Line length not equal to boundary end + ) + { + // Got a line, but it's not a boundary, report it as data: + m_Callbacks.OnPartData(m_IncomingData.data(), LineEnd); + m_IncomingData.erase(0, LineEnd); + continue; + } + + if (strncmp(m_IncomingData.c_str() + idxBoundary, m_Boundary.c_str(), m_Boundary.size()) == 0) + { + // Boundary or BoundaryEnd found: + m_Callbacks.OnPartEnd(); + size_t idxSlash = idxBoundary + m_Boundary.size(); + if ((m_IncomingData[idxSlash] == '-') && (m_IncomingData[idxSlash + 1] == '-')) + { + // This was the last part + m_Callbacks.OnPartData(m_IncomingData.data() + idxSlash + 4, m_IncomingData.size() - idxSlash - 4); + m_IncomingData.clear(); + return; + } + m_Callbacks.OnPartStart(); + m_IncomingData.erase(0, LineEnd + 2); + + // Keep parsing for the headers that may have come with this data: + m_EnvelopeParser.Reset(); + continue; + } + + // It's a line, but not a boundary. It can be fully sent to the data receiver, since a boundary cannot cross lines + m_Callbacks.OnPartData(m_IncomingData.c_str(), LineEnd); + m_IncomingData.erase(0, LineEnd); + } // while (true) +} + + + + + +void cMultipartParser::OnHeaderLine(const AString & a_Key, const AString & a_Value) +{ + m_Callbacks.OnPartHeader(a_Key, a_Value); +} + + + + diff --git a/source/HTTPServer/MultipartParser.h b/source/HTTPServer/MultipartParser.h new file mode 100644 index 000000000..d853929ed --- /dev/null +++ b/source/HTTPServer/MultipartParser.h @@ -0,0 +1,76 @@ + +// MultipartParser.h + +// Declares the cMultipartParser class that parses messages in "multipart/*" encoding into the separate parts + + + + + +#pragma once + +#include "EnvelopeParser.h" + + + + + +class cMultipartParser : + protected cEnvelopeParser::cCallbacks +{ +public: + class cCallbacks + { + public: + /// Called when a new part starts + virtual void OnPartStart(void) = 0; + + /// Called when a complete header line is received for a part + virtual void OnPartHeader(const AString & a_Key, const AString & a_Value) = 0; + + /// Called when body for a part is received + virtual void OnPartData(const char * a_Data, int a_Size) = 0; + + /// Called when the current part ends + virtual void OnPartEnd(void) = 0; + } ; + + /// Creates the parser, expects to find the boundary in a_ContentType + cMultipartParser(const AString & a_ContentType, cCallbacks & a_Callbacks); + + /// Parses more incoming data + void Parse(const char * a_Data, int a_Size); + +protected: + /// The callbacks to call for various parsing events + cCallbacks & m_Callbacks; + + /// True if the data parsed so far is valid; if false, further parsing is skipped + bool m_IsValid; + + /// Parser for each part's envelope + cEnvelopeParser m_EnvelopeParser; + + /// Buffer for the incoming data until it is parsed + AString m_IncomingData; + + /// The boundary, excluding both the initial "--" and the terminating CRLF + AString m_Boundary; + + /// Set to true if some data for the current part has already been signalized to m_Callbacks. Used for proper CRLF inserting. + bool m_HasHadData; + + + /// Parse one line of incoming data. The CRLF has already been stripped from a_Data / a_Size + void ParseLine(const char * a_Data, int a_Size); + + /// Parse one line of incoming data in the headers section of a part. The CRLF has already been stripped from a_Data / a_Size + void ParseHeaderLine(const char * a_Data, int a_Size); + + // cEnvelopeParser overrides: + virtual void OnHeaderLine(const AString & a_Key, const AString & a_Value) override; +} ; + + + + -- cgit v1.2.3