From 38394f36d78f22ca75acf8275e86d25faecd0e8d Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 1 Oct 2023 16:21:23 -0400 Subject: gdbserver: use numeric character references for unicode --- src/common/string_util.cpp | 5 +++++ src/common/string_util.h | 1 + src/core/debugger/gdbstub.cpp | 17 +++++++++++++++-- 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index feab1653d..4c7aba3f5 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -135,6 +135,11 @@ std::u16string UTF8ToUTF16(std::string_view input) { return convert.from_bytes(input.data(), input.data() + input.size()); } +std::u32string UTF8ToUTF32(std::string_view input) { + std::wstring_convert, char32_t> convert; + return convert.from_bytes(input.data(), input.data() + input.size()); +} + #ifdef _WIN32 static std::wstring CPToUTF16(u32 code_page, std::string_view input) { const auto size = diff --git a/src/common/string_util.h b/src/common/string_util.h index c351f1a0c..9da1ca4e9 100644 --- a/src/common/string_util.h +++ b/src/common/string_util.h @@ -38,6 +38,7 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _ [[nodiscard]] std::string UTF16ToUTF8(std::u16string_view input); [[nodiscard]] std::u16string UTF8ToUTF16(std::string_view input); +[[nodiscard]] std::u32string UTF8ToUTF32(std::string_view input); #ifdef _WIN32 [[nodiscard]] std::string UTF16ToUTF8(std::wstring_view input); diff --git a/src/core/debugger/gdbstub.cpp b/src/core/debugger/gdbstub.cpp index e55831f27..82964f0a1 100644 --- a/src/core/debugger/gdbstub.cpp +++ b/src/core/debugger/gdbstub.cpp @@ -2,6 +2,8 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include +#include #include #include #include @@ -12,6 +14,7 @@ #include "common/logging/log.h" #include "common/scope_exit.h" #include "common/settings.h" +#include "common/string_util.h" #include "core/arm/arm_interface.h" #include "core/core.h" #include "core/debugger/gdbstub.h" @@ -68,10 +71,16 @@ static std::string EscapeGDB(std::string_view data) { } static std::string EscapeXML(std::string_view data) { + std::u32string converted = U"[Encoding error]"; + try { + converted = Common::UTF8ToUTF32(data); + } catch (std::range_error&) { + } + std::string escaped; escaped.reserve(data.size()); - for (char c : data) { + for (char32_t c : converted) { switch (c) { case '&': escaped += "&"; @@ -86,7 +95,11 @@ static std::string EscapeXML(std::string_view data) { escaped += ">"; break; default: - escaped += c; + if (c > 0x7f) { + escaped += fmt::format("&#{};", static_cast(c)); + } else { + escaped += static_cast(c); + } break; } } -- cgit v1.2.3