From b3b723b4531a3781ab847a2652cf192303e78941 Mon Sep 17 00:00:00 2001 From: Mattes D Date: Sun, 25 Dec 2016 18:29:21 +0100 Subject: Refactored to put URL Encoding / Decoding in a single place. (#3491) --- Server/Plugins/APIDump/APIDesc.lua | 38 +++++ Server/Plugins/APIDump/Classes/WebAdmin.lua | 3 +- src/Bindings/ManualBindings.cpp | 85 ++++++++--- src/HTTP/HTTPFormParser.cpp | 13 +- src/StringUtils.cpp | 220 +++++++++++++++++++++------- src/StringUtils.h | 15 +- src/WebAdmin.cpp | 25 +--- 7 files changed, 295 insertions(+), 104 deletions(-) diff --git a/Server/Plugins/APIDump/APIDesc.lua b/Server/Plugins/APIDump/APIDesc.lua index 3fc21b6a3..d4a814cb9 100644 --- a/Server/Plugins/APIDump/APIDesc.lua +++ b/Server/Plugins/APIDump/APIDesc.lua @@ -13534,6 +13534,44 @@ local CompressedString = cStringCompression.CompressStringGZIP("DataToCompress") }, Notes = "Parses the Authority part of the URL. Parts that are not explicitly specified in the AuthPart are returned empty, the port is returned zero. If parsing fails, the function returns nil and an error message.", }, + UrlDecode = + { + IsStatic = true, + Params = + { + { + Name = "Text", + Type = "string", + }, + }, + Returns = + { + { + Name = "Decoded", + Type = "string", + }, + }, + Notes = "Returns the Text, URL-decoded. Returns nil if there is a problem while decoding (invalid input).", + }, + UrlEncode = + { + IsStatic = true, + Params = + { + { + Name = "Text", + Type = "string", + }, + }, + Returns = + { + { + Name = "Encoded", + Type = "string", + }, + }, + Notes = "Returns the Text, URL-encoded.", + }, }, AdditionalInfo = { diff --git a/Server/Plugins/APIDump/Classes/WebAdmin.lua b/Server/Plugins/APIDump/Classes/WebAdmin.lua index 583278e5f..f6ebe9da7 100644 --- a/Server/Plugins/APIDump/Classes/WebAdmin.lua +++ b/Server/Plugins/APIDump/Classes/WebAdmin.lua @@ -121,6 +121,7 @@ return GetURLEncodedString = { IsStatic = true, + ObsoletedBy = "cUrlParser:UrlEncode", Params = { { @@ -134,7 +135,7 @@ return Type = "string", }, }, - Notes = "Returns the string given to it escaped by URL encoding, which makes the string suitable for transmission in an URL. Invalid characters are turned into \"%xy\" values.", + Notes = "OBSOLETE - use {{cUrlParser}}:UrlEncode() instead.
Returns the string given to it escaped by URL encoding, which makes the string suitable for transmission in an URL. Invalid characters are turned into \"%xy\" values.", }, Reload = { diff --git a/src/Bindings/ManualBindings.cpp b/src/Bindings/ManualBindings.cpp index 1fe3c0f9f..34357eb65 100644 --- a/src/Bindings/ManualBindings.cpp +++ b/src/Bindings/ManualBindings.cpp @@ -2112,6 +2112,66 @@ static int tolua_cUrlParser_ParseAuthorityPart(lua_State * a_LuaState) +static int tolua_cUrlParser_UrlDecode(lua_State * tolua_S) +{ + // Check the param types: + cLuaState S(tolua_S); + if ( + // Don't care about the first param + !S.CheckParamString(2) || + !S.CheckParamEnd(3) + ) + { + return 0; + } + + // Get the parameters: + AString Input; + S.GetStackValue(2, Input); + + // Convert and return: + auto res = URLDecode(Input); + if (res.first) + { + S.Push(res.second); + } + else + { + S.Push(cLuaState::Nil); + } + return 1; +} + + + + + +static int tolua_cUrlParser_UrlEncode(lua_State * tolua_S) +{ + // Check the param types: + cLuaState S(tolua_S); + if ( + // Don't care about the first param + !S.CheckParamString(2) || + !S.CheckParamEnd(3) + ) + { + return 0; + } + + // Get the parameters: + AString Input; + S.GetStackValue(2, Input); + + // Convert and return: + S.Push(URLEncode(Input)); + return 1; +} + + + + + static int tolua_cWebAdmin_AddWebTab(lua_State * tolua_S) { // Function signatures: @@ -2324,28 +2384,15 @@ static int tolua_cWebAdmin_GetPage(lua_State * tolua_S) -/** Binding for cWebAdmin::GetURLEncodedString. -Manual code required because ToLua generates an extra return value */ +/** Binding for cWebAdmin::GetURLEncodedString. */ static int tolua_cWebAdmin_GetURLEncodedString(lua_State * tolua_S) { - // Check the param types: + // Emit the obsoletion warning: cLuaState S(tolua_S); - if ( - // Don't care whether the first param is a cWebAdmin instance or class - !S.CheckParamString(2) || - !S.CheckParamEnd(3) - ) - { - return 0; - } - - // Get the parameters: - AString Input; - S.GetStackValue(2, Input); + LOGWARNING("cWebAdmin:GetURLEncodedString() is obsolete, use cUrlParser:UrlEncode() instead."); + S.LogStackTrace(); - // Convert and return: - S.Push(cWebAdmin::GetURLEncodedString(Input)); - return 1; + return tolua_cUrlParser_UrlEncode(tolua_S); } @@ -4042,6 +4089,8 @@ void cManualBindings::Bind(lua_State * tolua_S) tolua_function(tolua_S, "IsKnownScheme", tolua_cUrlParser_IsKnownScheme); tolua_function(tolua_S, "Parse", tolua_cUrlParser_Parse); tolua_function(tolua_S, "ParseAuthorityPart", tolua_cUrlParser_ParseAuthorityPart); + tolua_function(tolua_S, "UrlDecode", tolua_cUrlParser_UrlDecode); + tolua_function(tolua_S, "UrlEncode", tolua_cUrlParser_UrlEncode); tolua_endmodule(tolua_S); tolua_beginmodule(tolua_S, "cWebAdmin"); diff --git a/src/HTTP/HTTPFormParser.cpp b/src/HTTP/HTTPFormParser.cpp index ea5da3c18..6ad5d2592 100644 --- a/src/HTTP/HTTPFormParser.cpp +++ b/src/HTTP/HTTPFormParser.cpp @@ -167,13 +167,22 @@ void cHTTPFormParser::ParseFormUrlEncoded(void) case 1: { // Only name present - (*this)[URLDecode(ReplaceAllCharOccurrences(Components[0], '+', ' '))] = ""; + auto name = URLDecode(ReplaceAllCharOccurrences(Components[0], '+', ' ')); + if (name.first) + { + (*this)[name.second] = ""; + } break; } case 2: { // name=value format: - (*this)[URLDecode(ReplaceAllCharOccurrences(Components[0], '+', ' '))] = URLDecode(ReplaceAllCharOccurrences(Components[1], '+', ' ')); + auto name = URLDecode(Components[0]); + auto value = URLDecode(Components[1]); + if (name.first && value.first) + { + (*this)[name.second] = value.second; + } break; } } diff --git a/src/StringUtils.cpp b/src/StringUtils.cpp index 81999a35d..2fb4f6a9a 100644 --- a/src/StringUtils.cpp +++ b/src/StringUtils.cpp @@ -14,6 +14,42 @@ +/** Returns the value of the single hex digit. +Returns 0xff on failure. */ +static unsigned char HexToDec(char a_HexChar) +{ + switch (a_HexChar) + { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'a': return 10; + case 'b': return 11; + case 'c': return 12; + case 'd': return 13; + case 'e': return 14; + case 'f': return 15; + case 'A': return 10; + case 'B': return 11; + case 'C': return 12; + case 'D': return 13; + case 'E': return 14; + case 'F': return 15; + } + return 0xff; +} + + + + + AString & AppendVPrintf(AString & str, const char * format, va_list args) { ASSERT(format != nullptr); @@ -353,44 +389,63 @@ AString & RawBEToUTF8(const char * a_RawData, size_t a_NumShorts, AString & a_UT a_UTF8.reserve(3 * a_NumShorts / 2); // a quick guess of the resulting size for (size_t i = 0; i < a_NumShorts; i++) { - int c = GetBEShort(&a_RawData[i * 2]); - if (c < 0x80) - { - a_UTF8.push_back(static_cast(c)); - } - else if (c < 0x800) - { - a_UTF8.push_back(static_cast(192 + c / 64)); - a_UTF8.push_back(static_cast(128 + c % 64)); - } - else if (c - 0xd800 < 0x800) - { - // Error, silently drop - } - else if (c < 0x10000) + a_UTF8.append(UnicodeCharToUtf8(GetBEUShort(&a_RawData[i * 2]))); + } + return a_UTF8; +} + + + + +AString UnicodeCharToUtf8(unsigned a_UnicodeChar) +{ + if (a_UnicodeChar < 0x80) + { + return AString{static_cast(a_UnicodeChar)}; + } + else if (a_UnicodeChar < 0x800) + { + return AString { - a_UTF8.push_back(static_cast(224 + c / 4096)); - a_UTF8.push_back(static_cast(128 + (c / 64) % 64)); - a_UTF8.push_back(static_cast(128 + c % 64)); - } - else if (c < 0x110000) + static_cast(192 + a_UnicodeChar / 64), + static_cast(128 + a_UnicodeChar % 64), + }; + } + else if (a_UnicodeChar - 0xd800 < 0x800) + { + // Error + return AString(); + } + else if (a_UnicodeChar < 0x10000) + { + return AString { - a_UTF8.push_back(static_cast(240 + c / 262144)); - a_UTF8.push_back(static_cast(128 + (c / 4096) % 64)); - a_UTF8.push_back(static_cast(128 + (c / 64) % 64)); - a_UTF8.push_back(static_cast(128 + c % 64)); - } - else + static_cast(224 + a_UnicodeChar / 4096), + static_cast(128 + (a_UnicodeChar / 64) % 64), + static_cast(128 + a_UnicodeChar % 64) + }; + } + else if (a_UnicodeChar < 0x110000) + { + return AString { - // Error, silently drop - } + static_cast(240 + a_UnicodeChar / 262144), + static_cast(128 + (a_UnicodeChar / 4096) % 64), + static_cast(128 + (a_UnicodeChar / 64) % 64), + static_cast(128 + a_UnicodeChar % 64), + }; + } + else + { + // Error + return AString(); } - return a_UTF8; } + // UTF-8 conversion code adapted from: // https://stackoverflow.com/questions/2867123/convert-utf-16-to-utf-8-under-windows-and-linux-in-c @@ -708,58 +763,99 @@ AString StripColorCodes(const AString & a_Message) -AString URLDecode(const AString & a_String) +std::pair URLDecode(const AString & a_Text) { AString res; - size_t len = a_String.length(); + auto len = a_Text.size(); res.reserve(len); for (size_t i = 0; i < len; i++) { - char ch = a_String[i]; - if ((ch != '%') || (i > len - 3)) + if (a_Text[i] == '+') { - res.push_back(ch); + res.push_back(' '); continue; } - // Decode the hex value: - char hi = a_String[i + 1], lo = a_String[i + 2]; - if ((hi >= '0') && (hi <= '9')) + if (a_Text[i] != '%') { - hi = hi - '0'; + res.push_back(a_Text[i]); + continue; } - else if ((hi >= 'a') && (hi <= 'f')) + if (i + 1 >= len) { - hi = hi - 'a' + 10; + // String too short for an encoded value + return std::make_pair(false, AString()); } - else if ((hi >= 'A') && (hi <= 'F')) + if ((a_Text[i + 1] == 'u') || (a_Text[i + 1] == 'U')) { - hi = hi - 'F' + 10; + // Unicode char "%u0xxxx" + if (i + 6 >= len) + { + return std::make_pair(false, AString()); + } + if (a_Text[i + 2] != '0') + { + return std::make_pair(false, AString()); + } + unsigned v1 = HexToDec(a_Text[i + 3]); + unsigned v2 = HexToDec(a_Text[i + 4]); + unsigned v3 = HexToDec(a_Text[i + 5]); + unsigned v4 = HexToDec(a_Text[i + 6]); + if ((v1 == 0xff) || (v2 == 0xff) || (v4 == 0xff) || (v3 == 0xff)) + { + // Invalid hex numbers + return std::make_pair(false, AString()); + } + res.append(UnicodeCharToUtf8((v1 << 12) | (v2 << 8) | (v3 << 4) | v4)); + i = i + 6; } else { - res.push_back(ch); - continue; - } - if ((lo >= '0') && (lo <= '9')) - { - lo = lo - '0'; + // Regular char "%xx": + if (i + 2 >= len) + { + return std::make_pair(false, AString()); + } + auto v1 = HexToDec(a_Text[i + 1]); + auto v2 = HexToDec(a_Text[i + 2]); + if ((v1 == 0xff) || (v2 == 0xff)) + { + // Invalid hex numbers + return std::make_pair(false, AString()); + } + res.push_back(static_cast((v1 << 4) | v2)); + i = i + 2; } - else if ((lo >= 'a') && (lo <= 'f')) + } // for i - a_Text[i] + return std::make_pair(true, res); +} + + + + + +AString URLEncode(const AString & a_Text) +{ + AString res; + auto len = a_Text.size(); + res.reserve(len); + static const char HEX[] = "0123456789abcdef"; + for (size_t i = 0; i < len; ++i) + { + if (isalnum(a_Text[i])) { - lo = lo - 'a' + 10; + res.push_back(a_Text[i]); } - else if ((lo >= 'A') && (lo <= 'F')) + else if (a_Text[i] == ' ') { - lo = lo - 'A' + 10; + res.push_back('+'); } else { - res.push_back(ch); - continue; + res.push_back('%'); + res.push_back(HEX[static_cast(a_Text[i]) >> 4]); + res.push_back(HEX[static_cast(a_Text[i]) & 0x0f]); } - res.push_back(static_cast((hi << 4) | lo)); - i += 2; - } // for i - a_String[] + } return res; } @@ -907,6 +1003,16 @@ short GetBEShort(const char * a_Mem) +unsigned short GetBEUShort(const char * a_Mem) +{ + const Byte * Bytes = reinterpret_cast(a_Mem); + return static_cast((Bytes[0] << 8) | Bytes[1]); +} + + + + + int GetBEInt(const char * a_Mem) { const Byte * Bytes = reinterpret_cast(a_Mem); diff --git a/src/StringUtils.h b/src/StringUtils.h index 8c1925115..e2be2b9c0 100644 --- a/src/StringUtils.h +++ b/src/StringUtils.h @@ -85,6 +85,9 @@ extern void ReplaceString(AString & iHayStack, const AString & iNeedle, const AS /** Converts a stream of BE shorts into UTF-8 string; returns a_UTF8. */ extern AString & RawBEToUTF8(const char * a_RawData, size_t a_NumShorts, AString & a_UTF8); +/** Converts a unicode character to its UTF8 representation. */ +extern AString UnicodeCharToUtf8(unsigned a_UnicodeChar); + /** Converts a UTF-8 string into a UTF-16 BE string. */ extern std::u16string UTF8ToRawBEUTF16(const AString & a_String); @@ -98,8 +101,13 @@ extern AString EscapeString(const AString & a_Message); // tolua_export /** Removes all control codes used by MC for colors and styles. */ extern AString StripColorCodes(const AString & a_Message); // tolua_export -/** URL-Decodes the given string, replacing all "%HH" into the correct characters. Invalid % sequences are left intact */ -extern AString URLDecode(const AString & a_String); // Cannot export to Lua automatically - would generated an extra return value +/** URL-Decodes the given string. +The first value specifies whether the decoding was successful. +The second value is the decoded string, if successful. */ +extern std::pair URLDecode(const AString & a_String); // Exported to Lua as cUrlParser::UrlDecode() + +/** URL-encodes the given string. */ +extern AString URLEncode(const AString & a_Text); /** Replaces all occurrences of char a_From inside a_String with char a_To. */ extern AString ReplaceAllCharOccurrences(const AString & a_String, char a_From, char a_To); // Needn't export to Lua, since Lua doesn't have chars anyway @@ -113,6 +121,9 @@ extern AString Base64Encode(const AString & a_Input); // Exported manually due /** Reads two bytes from the specified memory location and interprets them as BigEndian short */ extern short GetBEShort(const char * a_Mem); +/** Reads two bytes from the specified memory location and interprets them as BigEndian unsigned short */ +extern unsigned short GetBEUShort(const char * a_Mem); + /** Reads four bytes from the specified memory location and interprets them as BigEndian int */ extern int GetBEInt(const char * a_Mem); diff --git a/src/WebAdmin.cpp b/src/WebAdmin.cpp index 5c08deb0d..1e60b7c0e 100644 --- a/src/WebAdmin.cpp +++ b/src/WebAdmin.cpp @@ -602,30 +602,7 @@ AString cWebAdmin::GetHTMLEscapedString(const AString & a_Input) AString cWebAdmin::GetURLEncodedString(const AString & a_Input) { - // Translation table from nibble to hex: - static const char Hex[] = "0123456789abcdef"; - - // Preallocate the output to match input: - AString dst; - size_t len = a_Input.length(); - dst.reserve(len); - - // Loop over input and substitute whatever is needed: - for (size_t i = 0; i < len; i++) - { - char ch = a_Input[i]; - if (isalnum(ch) || (ch == '-') || (ch == '_') || (ch == '.') || (ch == '~')) - { - dst.push_back(ch); - } - else - { - dst.push_back('%'); - dst.push_back(Hex[(ch >> 4) & 0x0f]); - dst.push_back(Hex[ch & 0x0f]); - } - } // for i - a_Input[] - return dst; + return URLEncode(a_Input); } -- cgit v1.2.3