mnemonics: fix prefix extraction with non ASCII text
This commit is contained in:
parent
1720affbd6
commit
ea33cadebf
|
@ -99,7 +99,7 @@ namespace
|
||||||
{
|
{
|
||||||
if (has_checksum)
|
if (has_checksum)
|
||||||
{
|
{
|
||||||
trimmed_word = it2->substr(0, (*it1)->get_unique_prefix_length());
|
trimmed_word = Language::utf8prefix(*it2, (*it1)->get_unique_prefix_length());
|
||||||
// Use the trimmed words and map
|
// Use the trimmed words and map
|
||||||
if (trimmed_word_map.count(trimmed_word) == 0)
|
if (trimmed_word_map.count(trimmed_word) == 0)
|
||||||
{
|
{
|
||||||
|
@ -144,7 +144,7 @@ namespace
|
||||||
{
|
{
|
||||||
if (it->length() > unique_prefix_length)
|
if (it->length() > unique_prefix_length)
|
||||||
{
|
{
|
||||||
trimmed_words += it->substr(0, unique_prefix_length);
|
trimmed_words += Language::utf8prefix(*it, unique_prefix_length);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -170,9 +170,9 @@ namespace
|
||||||
|
|
||||||
std::string checksum = seed[create_checksum_index(seed, unique_prefix_length)];
|
std::string checksum = seed[create_checksum_index(seed, unique_prefix_length)];
|
||||||
|
|
||||||
std::string trimmed_checksum = checksum.length() > unique_prefix_length ? checksum.substr(0, unique_prefix_length) :
|
std::string trimmed_checksum = checksum.length() > unique_prefix_length ? Language::utf8prefix(checksum, unique_prefix_length) :
|
||||||
checksum;
|
checksum;
|
||||||
std::string trimmed_last_word = last_word.length() > unique_prefix_length ? last_word.substr(0, unique_prefix_length) :
|
std::string trimmed_last_word = last_word.length() > unique_prefix_length ? Language::utf8prefix(last_word, unique_prefix_length) :
|
||||||
last_word;
|
last_word;
|
||||||
return trimmed_checksum == trimmed_last_word;
|
return trimmed_checksum == trimmed_last_word;
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,6 +45,26 @@
|
||||||
*/
|
*/
|
||||||
namespace Language
|
namespace Language
|
||||||
{
|
{
|
||||||
|
/*!
|
||||||
|
* \brief Returns a string made of (at most) the first count characters in s.
|
||||||
|
* Assumes well formedness. No check is made for this.
|
||||||
|
* \param s The string from which to return the first count characters.
|
||||||
|
* \param count How many characters to return.
|
||||||
|
* \return A string consisting of the first count characters in s.
|
||||||
|
*/
|
||||||
|
std::string utf8prefix(const std::string &s, size_t count)
|
||||||
|
{
|
||||||
|
std::string prefix = "";
|
||||||
|
const char *ptr = s.c_str();
|
||||||
|
while (count-- && *ptr)
|
||||||
|
{
|
||||||
|
prefix += *ptr++;
|
||||||
|
while (((*ptr) & 0xc0) == 0x80)
|
||||||
|
prefix += *ptr++;
|
||||||
|
}
|
||||||
|
return prefix;
|
||||||
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \class Base
|
* \class Base
|
||||||
* \brief A base language class which all languages have to inherit from for
|
* \brief A base language class which all languages have to inherit from for
|
||||||
|
@ -70,7 +90,7 @@ namespace Language
|
||||||
(*word_map)[*it] = ii;
|
(*word_map)[*it] = ii;
|
||||||
if (it->length() > unique_prefix_length)
|
if (it->length() > unique_prefix_length)
|
||||||
{
|
{
|
||||||
(*trimmed_word_map)[it->substr(0, unique_prefix_length)] = ii;
|
(*trimmed_word_map)[utf8prefix(*it, unique_prefix_length)] = ii;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue