From eeca5ca0c8d7275069d45adf40546ea83edf46cc Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Tue, 30 Apr 2019 19:16:11 +0000 Subject: [PATCH 1/2] epee: support unicode in parsed strings --- .../include/storages/parserse_base_utils.h | 59 +++++++++++++++++++ contrib/epee/include/string_tools.h | 24 +------- tests/unit_tests/epee_utils.cpp | 17 ++++++ 3 files changed, 78 insertions(+), 22 deletions(-) diff --git a/contrib/epee/include/storages/parserse_base_utils.h b/contrib/epee/include/storages/parserse_base_utils.h index b5c4138c5..fe53628a5 100644 --- a/contrib/epee/include/storages/parserse_base_utils.h +++ b/contrib/epee/include/storages/parserse_base_utils.h @@ -31,6 +31,9 @@ #include #include +#undef MONERO_DEFAULT_LOG_CATEGORY +#define MONERO_DEFAULT_LOG_CATEGORY "serialization" + namespace epee { namespace misc_utils @@ -62,6 +65,26 @@ namespace misc_utils 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; + static const constexpr unsigned char isx[256] = + { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; + inline bool isspace(char c) { return lut[(uint8_t)c] & 8; @@ -162,6 +185,42 @@ namespace misc_utils val.push_back('\\');break; case '/': //Slash character val.push_back('/');break; + case 'u': //Unicode code point + if (buf_end - it < 4) + { + ASSERT_MES_AND_THROW("Invalid Unicode escape sequence"); + } + else + { + uint32_t dst = 0; + for (int i = 0; i < 4; ++i) + { + const unsigned char tmp = isx[(int)*++it]; + CHECK_AND_ASSERT_THROW_MES(tmp != 0xff, "Bad Unicode encoding"); + dst = dst << 4 | tmp; + } + // encode as UTF-8 + if (dst <= 0x7f) + { + val.push_back(dst); + } + else if (dst <= 0x7ff) + { + val.push_back(0xc0 | (dst >> 6)); + val.push_back(0x80 | (dst & 0x3f)); + } + else if (dst <= 0xffff) + { + val.push_back(0xe0 | (dst >> 12)); + val.push_back(0x80 | ((dst >> 6) & 0x3f)); + val.push_back(0x80 | (dst & 0x3f)); + } + else + { + ASSERT_MES_AND_THROW("Unicode code point is out or range"); + } + } + break; default: val.push_back(*it); LOG_PRINT_L0("Unknown escape sequence :\"\\" << *it << "\""); diff --git a/contrib/epee/include/string_tools.h b/contrib/epee/include/string_tools.h index da47b7d55..1be5eb5e1 100644 --- a/contrib/epee/include/string_tools.h +++ b/contrib/epee/include/string_tools.h @@ -59,26 +59,6 @@ #pragma comment (lib, "Rpcrt4.lib") #endif -static const constexpr unsigned char isx[256] = -{ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -}; - namespace epee { namespace string_tools @@ -99,10 +79,10 @@ namespace string_tools for(size_t i = 0; i < s.size(); i += 2) { int tmp = *src++; - tmp = isx[tmp]; + tmp = epee::misc_utils::parse::isx[tmp]; if (tmp == 0xff) return false; int t2 = *src++; - t2 = isx[t2]; + t2 = epee::misc_utils::parse::isx[t2]; if (t2 == 0xff) return false; *dst++ = (tmp << 4) | t2; } diff --git a/tests/unit_tests/epee_utils.cpp b/tests/unit_tests/epee_utils.cpp index 946731826..c5f9a42d0 100644 --- a/tests/unit_tests/epee_utils.cpp +++ b/tests/unit_tests/epee_utils.cpp @@ -946,3 +946,20 @@ TEST(parsing, number) epee::misc_utils::parse::match_number(i, s.end(), val); ASSERT_EQ(val, "+9.34e+03"); } + +TEST(parsing, unicode) +{ + std::string bs; + std::string s; + std::string::const_iterator si; + + s = "\"\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, ""); + s = "\"\\u0000\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, std::string(1, '\0')); + s = "\"\\u0020\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, " "); + s = "\"\\u1\""; si = s.begin(); ASSERT_FALSE(epee::misc_utils::parse::match_string(si, s.end(), bs)); + s = "\"\\u12\""; si = s.begin(); ASSERT_FALSE(epee::misc_utils::parse::match_string(si, s.end(), bs)); + s = "\"\\u123\""; si = s.begin(); ASSERT_FALSE(epee::misc_utils::parse::match_string(si, s.end(), bs)); + s = "\"\\u1234\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "ሴ"); + s = "\"foo\\u1234bar\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "fooሴbar"); + s = "\"\\u3042\\u307e\\u3084\\u304b\\u3059\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "あまやかす"); +} From 3e11bb540e1014e7e7c1d8b6cb811ea3b5e87752 Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Tue, 30 Apr 2019 21:06:13 +0000 Subject: [PATCH 2/2] functional_tests: test creating wallets with local language names --- tests/functional_tests/wallet_address.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/functional_tests/wallet_address.py b/tests/functional_tests/wallet_address.py index 4ff059a6f..eda52b432 100755 --- a/tests/functional_tests/wallet_address.py +++ b/tests/functional_tests/wallet_address.py @@ -198,8 +198,9 @@ class WalletAddressTest(): try: wallet.close_wallet() except: pass languages = res.languages - for language in languages: - print('Creating ' + str(language) + ' wallet') + languages_local = res.languages_local + for language in languages + languages_local: + print('Creating ' + language.encode('utf8') + ' wallet') wallet.create_wallet(filename = '', language = language) res = wallet.query_key('mnemonic') wallet.close_wallet()