Merge pull request #5017
21777daf
epee: speedup word/number matching (moneromooo-monero)
This commit is contained in:
commit
b65106ce93
|
@ -29,6 +29,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <boost/utility/string_ref.hpp>
|
||||
|
||||
namespace epee
|
||||
{
|
||||
|
@ -36,6 +37,40 @@ namespace misc_utils
|
|||
{
|
||||
namespace parse
|
||||
{
|
||||
// 1: digit
|
||||
// 2: .eE (floating point)
|
||||
// 4: alpha
|
||||
// 8: whitespace
|
||||
// 16: allowed in float but doesn't necessarily mean it's a float
|
||||
static const constexpr uint8_t lut[256]={
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 0, 0, // 16
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32
|
||||
8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 16, 18, 0, // 48
|
||||
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 0, 0, 0, 0, 0, 0, // 64
|
||||
0, 4, 4, 4, 4, 22, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 80
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 96
|
||||
0, 4, 4, 4, 4, 22, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 112
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 128
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
inline bool isspace(char c)
|
||||
{
|
||||
return lut[(uint8_t)c] & 8;
|
||||
}
|
||||
|
||||
inline bool isdigit(char c)
|
||||
{
|
||||
return lut[(uint8_t)c] & 1;
|
||||
}
|
||||
|
||||
inline std::string transform_to_escape_sequence(const std::string& src)
|
||||
{
|
||||
static const char escaped[] = "\b\f\n\r\t\v\"\\/";
|
||||
|
@ -159,25 +194,34 @@ namespace misc_utils
|
|||
return false;
|
||||
}
|
||||
}
|
||||
inline void match_number2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val, bool& is_float_val, bool& is_signed_val)
|
||||
inline void match_number2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val, bool& is_float_val, bool& is_signed_val)
|
||||
{
|
||||
val.clear();
|
||||
is_float_val = false;
|
||||
for(std::string::const_iterator it = star_end_string;it != buf_end;it++)
|
||||
uint8_t float_flag = 0;
|
||||
is_signed_val = false;
|
||||
size_t chars = 0;
|
||||
std::string::const_iterator it = star_end_string;
|
||||
if (it != buf_end && *it == '-')
|
||||
{
|
||||
if(isdigit(*it) || (it == star_end_string && *it == '-') || (val.size() && *it == '.' ) || (is_float_val && (*it == 'e' || *it == 'E' || *it == '-' || *it == '+' )) )
|
||||
{
|
||||
if(!val.size() && *it == '-')
|
||||
is_signed_val = true;
|
||||
if(*it == '.' )
|
||||
is_float_val = true;
|
||||
val.push_back(*it);
|
||||
++chars;
|
||||
++it;
|
||||
}
|
||||
for(;it != buf_end;it++)
|
||||
{
|
||||
const uint8_t flags = lut[(uint8_t)*it];
|
||||
if (flags & 16)
|
||||
{
|
||||
float_flag |= flags;
|
||||
++chars;
|
||||
}
|
||||
else
|
||||
{
|
||||
val = boost::string_ref(&*star_end_string, chars);
|
||||
if(val.size())
|
||||
{
|
||||
star_end_string = --it;
|
||||
is_float_val = !!(float_flag & 2);
|
||||
return;
|
||||
}
|
||||
else
|
||||
|
@ -186,7 +230,7 @@ namespace misc_utils
|
|||
}
|
||||
ASSERT_MES_AND_THROW("wrong number in json entry: " << std::string(star_end_string, buf_end));
|
||||
}
|
||||
inline bool match_number(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val)
|
||||
inline bool match_number(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val)
|
||||
{
|
||||
try
|
||||
{
|
||||
|
@ -199,15 +243,15 @@ namespace misc_utils
|
|||
return false;
|
||||
}
|
||||
}
|
||||
inline void match_word2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val)
|
||||
inline void match_word2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val)
|
||||
{
|
||||
val.clear();
|
||||
|
||||
for(std::string::const_iterator it = star_end_string;it != buf_end;it++)
|
||||
{
|
||||
if(!isalpha(*it))
|
||||
if (!(lut[(uint8_t)*it] & 4))
|
||||
{
|
||||
val.assign(star_end_string, it);
|
||||
val = boost::string_ref(&*star_end_string, std::distance(star_end_string, it));
|
||||
if(val.size())
|
||||
{
|
||||
star_end_string = --it;
|
||||
|
@ -218,7 +262,7 @@ namespace misc_utils
|
|||
}
|
||||
ASSERT_MES_AND_THROW("failed to match word number in json entry: " << std::string(star_end_string, buf_end));
|
||||
}
|
||||
inline bool match_word(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val)
|
||||
inline bool match_word(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val)
|
||||
{
|
||||
try
|
||||
{
|
||||
|
|
|
@ -39,7 +39,7 @@ namespace epee
|
|||
{
|
||||
namespace json
|
||||
{
|
||||
#define CHECK_ISSPACE() if(!isspace(*it)){ ASSERT_MES_AND_THROW("Wrong JSON character at: " << std::string(it, buf_end));}
|
||||
#define CHECK_ISSPACE() if(!epee::misc_utils::parse::isspace(*it)){ ASSERT_MES_AND_THROW("Wrong JSON character at: " << std::string(it, buf_end));}
|
||||
|
||||
/*inline void parse_error()
|
||||
{
|
||||
|
@ -114,11 +114,11 @@ namespace epee
|
|||
std::string val;
|
||||
match_string2(it, buf_end, val);
|
||||
//insert text value
|
||||
stg.set_value(name, val, current_section);
|
||||
stg.set_value(name, std::move(val), current_section);
|
||||
state = match_state_wonder_after_value;
|
||||
}else if (isdigit(*it) || *it == '-')
|
||||
}else if (epee::misc_utils::parse::isdigit(*it) || *it == '-')
|
||||
{//just a named number value started
|
||||
std::string val;
|
||||
boost::string_ref val;
|
||||
bool is_v_float = false;bool is_signed = false;
|
||||
match_number2(it, buf_end, val, is_v_float, is_signed);
|
||||
if(!is_v_float)
|
||||
|
@ -126,27 +126,27 @@ namespace epee
|
|||
if(is_signed)
|
||||
{
|
||||
errno = 0;
|
||||
int64_t nval = strtoll(val.c_str(), NULL, 10);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + val);
|
||||
int64_t nval = strtoll(val.data(), NULL, 10);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
|
||||
stg.set_value(name, nval, current_section);
|
||||
}else
|
||||
{
|
||||
errno = 0;
|
||||
uint64_t nval = strtoull(val.c_str(), NULL, 10);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + val);
|
||||
uint64_t nval = strtoull(val.data(), NULL, 10);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
|
||||
stg.set_value(name, nval, current_section);
|
||||
}
|
||||
}else
|
||||
{
|
||||
errno = 0;
|
||||
double nval = strtod(val.c_str(), NULL);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + val);
|
||||
double nval = strtod(val.data(), NULL);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
|
||||
stg.set_value(name, nval, current_section);
|
||||
}
|
||||
state = match_state_wonder_after_value;
|
||||
}else if(isalpha(*it) )
|
||||
{// could be null, true or false
|
||||
std::string word;
|
||||
boost::string_ref word;
|
||||
match_word2(it, buf_end, word);
|
||||
if(boost::iequals(word, "null"))
|
||||
{
|
||||
|
@ -203,13 +203,13 @@ namespace epee
|
|||
//mean array of strings
|
||||
std::string val;
|
||||
match_string2(it, buf_end, val);
|
||||
h_array = stg.insert_first_value(name, val, current_section);
|
||||
h_array = stg.insert_first_value(name, std::move(val), current_section);
|
||||
CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values entry");
|
||||
state = match_state_array_after_value;
|
||||
array_md = array_mode_string;
|
||||
}else if (isdigit(*it) || *it == '-')
|
||||
}else if (epee::misc_utils::parse::isdigit(*it) || *it == '-')
|
||||
{//array of numbers value started
|
||||
std::string val;
|
||||
boost::string_ref val;
|
||||
bool is_v_float = false;bool is_signed_val = false;
|
||||
match_number2(it, buf_end, val, is_v_float, is_signed_val);
|
||||
if(!is_v_float)
|
||||
|
@ -217,22 +217,22 @@ namespace epee
|
|||
if (is_signed_val)
|
||||
{
|
||||
errno = 0;
|
||||
int64_t nval = strtoll(val.c_str(), NULL, 10);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + val);
|
||||
int64_t nval = strtoll(val.data(), NULL, 10);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
|
||||
h_array = stg.insert_first_value(name, nval, current_section);
|
||||
}else
|
||||
{
|
||||
errno = 0;
|
||||
uint64_t nval = strtoull(val.c_str(), NULL, 10);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + val);
|
||||
uint64_t nval = strtoull(val.data(), NULL, 10);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
|
||||
h_array = stg.insert_first_value(name, nval, current_section);
|
||||
}
|
||||
CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values section entry");
|
||||
}else
|
||||
{
|
||||
errno = 0;
|
||||
double nval = strtod(val.c_str(), NULL);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + val);
|
||||
double nval = strtod(val.data(), NULL);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
|
||||
h_array = stg.insert_first_value(name, nval, current_section);
|
||||
CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values section entry");
|
||||
}
|
||||
|
@ -245,7 +245,7 @@ namespace epee
|
|||
state = match_state_wonder_after_value;
|
||||
}else if(isalpha(*it) )
|
||||
{// array of booleans
|
||||
std::string word;
|
||||
boost::string_ref word;
|
||||
match_word2(it, buf_end, word);
|
||||
if(boost::iequals(word, "true"))
|
||||
{
|
||||
|
@ -291,15 +291,15 @@ namespace epee
|
|||
{
|
||||
std::string val;
|
||||
match_string2(it, buf_end, val);
|
||||
bool res = stg.insert_next_value(h_array, val);
|
||||
bool res = stg.insert_next_value(h_array, std::move(val));
|
||||
CHECK_AND_ASSERT_THROW_MES(res, "failed to insert values");
|
||||
state = match_state_array_after_value;
|
||||
}else CHECK_ISSPACE();
|
||||
break;
|
||||
case array_mode_numbers:
|
||||
if (isdigit(*it) || *it == '-')
|
||||
if (epee::misc_utils::parse::isdigit(*it) || *it == '-')
|
||||
{//array of numbers value started
|
||||
std::string val;
|
||||
boost::string_ref val;
|
||||
bool is_v_float = false;bool is_signed_val = false;
|
||||
match_number2(it, buf_end, val, is_v_float, is_signed_val);
|
||||
bool insert_res = false;
|
||||
|
@ -308,21 +308,21 @@ namespace epee
|
|||
if (is_signed_val)
|
||||
{
|
||||
errno = 0;
|
||||
int64_t nval = strtoll(val.c_str(), NULL, 10);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + val);
|
||||
int64_t nval = strtoll(val.data(), NULL, 10);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
|
||||
insert_res = stg.insert_next_value(h_array, nval);
|
||||
}else
|
||||
{
|
||||
errno = 0;
|
||||
uint64_t nval = strtoull(val.c_str(), NULL, 10);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + val);
|
||||
uint64_t nval = strtoull(val.data(), NULL, 10);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
|
||||
insert_res = stg.insert_next_value(h_array, nval);
|
||||
}
|
||||
}else
|
||||
{
|
||||
errno = 0;
|
||||
double nval = strtod(val.c_str(), NULL);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + val);
|
||||
double nval = strtod(val.data(), NULL);
|
||||
if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
|
||||
insert_res = stg.insert_next_value(h_array, nval);
|
||||
}
|
||||
CHECK_AND_ASSERT_THROW_MES(insert_res, "Failed to insert next value");
|
||||
|
@ -333,7 +333,7 @@ namespace epee
|
|||
case array_mode_booleans:
|
||||
if(isalpha(*it) )
|
||||
{// array of booleans
|
||||
std::string word;
|
||||
boost::string_ref word;
|
||||
match_word2(it, buf_end, word);
|
||||
if(boost::iequals(word, "true"))
|
||||
{
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
#include "p2p/net_peerlist_boost_serialization.h"
|
||||
#include "span.h"
|
||||
#include "string_tools.h"
|
||||
#include "storages/parserse_base_utils.h"
|
||||
|
||||
namespace
|
||||
{
|
||||
|
@ -833,3 +834,86 @@ TEST(net_buffer, move)
|
|||
ASSERT_TRUE(!memcmp(span.data() + 1, std::string(4000, '0').c_str(), 4000));
|
||||
}
|
||||
|
||||
TEST(parsing, isspace)
|
||||
{
|
||||
ASSERT_FALSE(epee::misc_utils::parse::isspace(0));
|
||||
for (int c = 1; c < 256; ++c)
|
||||
{
|
||||
ASSERT_EQ(epee::misc_utils::parse::isspace(c), strchr("\r\n\t\f\v ", c) != NULL);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(parsing, isdigit)
|
||||
{
|
||||
ASSERT_FALSE(epee::misc_utils::parse::isdigit(0));
|
||||
for (int c = 1; c < 256; ++c)
|
||||
{
|
||||
ASSERT_EQ(epee::misc_utils::parse::isdigit(c), strchr("0123456789", c) != NULL);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(parsing, number)
|
||||
{
|
||||
boost::string_ref val;
|
||||
std::string s;
|
||||
std::string::const_iterator i;
|
||||
|
||||
// the parser expects another character to end the number, and accepts things
|
||||
// that aren't numbers, as it's meant as a pre-filter for strto* functions,
|
||||
// so we just check that numbers get accepted, but don't test non numbers
|
||||
|
||||
s = "0 ";
|
||||
i = s.begin();
|
||||
epee::misc_utils::parse::match_number(i, s.end(), val);
|
||||
ASSERT_EQ(val, "0");
|
||||
|
||||
s = "000 ";
|
||||
i = s.begin();
|
||||
epee::misc_utils::parse::match_number(i, s.end(), val);
|
||||
ASSERT_EQ(val, "000");
|
||||
|
||||
s = "10x";
|
||||
i = s.begin();
|
||||
epee::misc_utils::parse::match_number(i, s.end(), val);
|
||||
ASSERT_EQ(val, "10");
|
||||
|
||||
s = "10.09/";
|
||||
i = s.begin();
|
||||
epee::misc_utils::parse::match_number(i, s.end(), val);
|
||||
ASSERT_EQ(val, "10.09");
|
||||
|
||||
s = "-1.r";
|
||||
i = s.begin();
|
||||
epee::misc_utils::parse::match_number(i, s.end(), val);
|
||||
ASSERT_EQ(val, "-1.");
|
||||
|
||||
s = "-49.;";
|
||||
i = s.begin();
|
||||
epee::misc_utils::parse::match_number(i, s.end(), val);
|
||||
ASSERT_EQ(val, "-49.");
|
||||
|
||||
s = "0.78/";
|
||||
i = s.begin();
|
||||
epee::misc_utils::parse::match_number(i, s.end(), val);
|
||||
ASSERT_EQ(val, "0.78");
|
||||
|
||||
s = "33E9$";
|
||||
i = s.begin();
|
||||
epee::misc_utils::parse::match_number(i, s.end(), val);
|
||||
ASSERT_EQ(val, "33E9");
|
||||
|
||||
s = ".34e2=";
|
||||
i = s.begin();
|
||||
epee::misc_utils::parse::match_number(i, s.end(), val);
|
||||
ASSERT_EQ(val, ".34e2");
|
||||
|
||||
s = "-9.34e-2=";
|
||||
i = s.begin();
|
||||
epee::misc_utils::parse::match_number(i, s.end(), val);
|
||||
ASSERT_EQ(val, "-9.34e-2");
|
||||
|
||||
s = "+9.34e+03=";
|
||||
i = s.begin();
|
||||
epee::misc_utils::parse::match_number(i, s.end(), val);
|
||||
ASSERT_EQ(val, "+9.34e+03");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue