mirror of
https://github.com/boostorg/parser.git
synced 2026-01-19 04:22:13 +00:00
@@ -57,11 +57,9 @@ namespace boost::parser::detail {
|
||||
return out;
|
||||
} else {
|
||||
// Skip [0x41, 0x5a), handled above.
|
||||
auto const first =
|
||||
detail::text::detail::begin(mapping_ranges) + 1;
|
||||
auto const first = text::detail::begin(mapping_ranges) + 1;
|
||||
// 7th entry starts with 0x100.
|
||||
auto const last =
|
||||
detail::text::detail::begin(mapping_ranges) + 7;
|
||||
auto const last = text::detail::begin(mapping_ranges) + 7;
|
||||
if (auto out_opt = do_short_mapping(first, last, cp, out))
|
||||
return *out_opt;
|
||||
}
|
||||
@@ -71,8 +69,8 @@ namespace boost::parser::detail {
|
||||
|
||||
// Single-cp-mapping path (next most common case).
|
||||
{
|
||||
auto const first = detail::text::detail::begin(mapping_ranges);
|
||||
auto const last = detail::text::detail::end(mapping_ranges);
|
||||
auto const first = text::detail::begin(mapping_ranges);
|
||||
auto const last = text::detail::end(mapping_ranges);
|
||||
if (auto out_opt = do_short_mapping(first, last, cp, out))
|
||||
return *out_opt;
|
||||
}
|
||||
@@ -95,8 +93,8 @@ namespace boost::parser::detail {
|
||||
return std::copy(
|
||||
it->mapping_,
|
||||
std::find(
|
||||
detail::text::detail::begin(it->mapping_),
|
||||
detail::text::detail::end(it->mapping_),
|
||||
text::detail::begin(it->mapping_),
|
||||
text::detail::end(it->mapping_),
|
||||
0),
|
||||
out);
|
||||
#endif
|
||||
@@ -106,7 +104,6 @@ namespace boost::parser::detail {
|
||||
*out++ = cp;
|
||||
return out;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
1228
include/boost/parser/detail/text/trie_map.hpp
Normal file
1228
include/boost/parser/detail/text/trie_map.hpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -18,7 +18,7 @@
|
||||
#include <boost/parser/detail/printing.hpp>
|
||||
|
||||
#include <boost/parser/detail/text/algorithm.hpp>
|
||||
#include <boost/parser/detail/text/trie.hpp>
|
||||
#include <boost/parser/detail/text/trie_map.hpp>
|
||||
#include <boost/parser/detail/text/unpack.hpp>
|
||||
|
||||
#include <type_traits>
|
||||
@@ -931,26 +931,6 @@ namespace boost { namespace parser {
|
||||
return *context.callbacks_;
|
||||
}
|
||||
|
||||
template<typename Context, typename T>
|
||||
decltype(auto) get_trie(
|
||||
Context const & context, symbol_parser<T> const & symbol_parser)
|
||||
{
|
||||
using trie_t = text::trie<std::vector<char32_t>, T>;
|
||||
symbol_table_tries_t & symbol_table_tries =
|
||||
*context.symbol_table_tries_;
|
||||
std::any & a = symbol_table_tries[(void *)&symbol_parser];
|
||||
if (!a.has_value()) {
|
||||
a = trie_t{};
|
||||
trie_t & trie = *std::any_cast<trie_t>(&a);
|
||||
for (auto const & e : symbol_parser.initial_elements()) {
|
||||
trie.insert(e.first | text::as_utf32, e.second);
|
||||
}
|
||||
return trie;
|
||||
} else {
|
||||
return *std::any_cast<trie_t>(&a);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Type traits.
|
||||
|
||||
@@ -1512,10 +1492,9 @@ namespace boost { namespace parser {
|
||||
using case_fold_array_t = std::array<char32_t, detail::longest_mapping>;
|
||||
|
||||
template<typename I, typename S>
|
||||
struct no_case_iter : stl_interfaces::iterator_interface<
|
||||
struct no_case_iter : stl_interfaces::proxy_iterator_interface<
|
||||
no_case_iter<I, S>,
|
||||
std::forward_iterator_tag,
|
||||
char32_t,
|
||||
char32_t>
|
||||
{
|
||||
no_case_iter() : it_(), last_(), idx_(0), last_idx_() {}
|
||||
@@ -1545,10 +1524,9 @@ namespace boost { namespace parser {
|
||||
return lhs.it_ == rhs.it_ && lhs.idx_ == rhs.idx_;
|
||||
}
|
||||
|
||||
using base_type = stl_interfaces::iterator_interface<
|
||||
using base_type = stl_interfaces::proxy_iterator_interface<
|
||||
no_case_iter<I, S>,
|
||||
std::forward_iterator_tag,
|
||||
char32_t,
|
||||
char32_t>;
|
||||
using base_type::operator++;
|
||||
|
||||
@@ -1573,6 +1551,66 @@ namespace boost { namespace parser {
|
||||
int last_idx_;
|
||||
};
|
||||
|
||||
template<typename V>
|
||||
struct case_fold_view
|
||||
{
|
||||
using iterator =
|
||||
no_case_iter<detail::iterator_t<V>, detail::sentinel_t<V>>;
|
||||
|
||||
case_fold_view(V base) : base_(std::move(base)) {}
|
||||
|
||||
iterator begin() const
|
||||
{
|
||||
return iterator(
|
||||
text::detail::begin(base_), text::detail::end(base_));
|
||||
}
|
||||
auto end() const { return text::detail::end(base_); }
|
||||
|
||||
private:
|
||||
V base_;
|
||||
};
|
||||
|
||||
template<typename Context, typename T>
|
||||
auto get_trie(
|
||||
Context const & context, symbol_parser<T> const & symbol_parser)
|
||||
{
|
||||
using trie_t = text::trie_map<std::vector<char32_t>, T>;
|
||||
using result_type = std::pair<trie_t &, bool>;
|
||||
symbol_table_tries_t & symbol_table_tries =
|
||||
*context.symbol_table_tries_;
|
||||
|
||||
auto & [any, has_case_folded] =
|
||||
symbol_table_tries[(void *)&symbol_parser];
|
||||
|
||||
bool const needs_case_folded = context.no_case_depth_;
|
||||
|
||||
if (!any.has_value()) {
|
||||
any = trie_t{};
|
||||
has_case_folded = false;
|
||||
trie_t & trie = *std::any_cast<trie_t>(&any);
|
||||
for (auto const & e : symbol_parser.initial_elements()) {
|
||||
trie.insert(e.first | text::as_utf32, e.second);
|
||||
if (needs_case_folded) {
|
||||
trie.insert(
|
||||
case_fold_view(e.first | text::as_utf32), e.second);
|
||||
has_case_folded = true;
|
||||
}
|
||||
}
|
||||
return result_type(trie, has_case_folded);
|
||||
} else {
|
||||
trie_t & trie = *std::any_cast<trie_t>(&any);
|
||||
if (needs_case_folded && !has_case_folded) {
|
||||
trie_t new_trie = trie;
|
||||
for (auto && [key, value] : trie) {
|
||||
new_trie.insert(
|
||||
case_fold_view(key | text::as_utf32), value);
|
||||
}
|
||||
std::swap(new_trie, trie);
|
||||
}
|
||||
return result_type(trie, has_case_folded);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
struct char_subranges<hex_digit_subranges>
|
||||
{
|
||||
@@ -4538,9 +4576,13 @@ namespace boost { namespace parser {
|
||||
parser::detail::text::optional_ref<T>
|
||||
find(Context const & context, std::string_view str) const
|
||||
{
|
||||
parser::detail::text::trie<std::vector<char32_t>, T> & trie_ =
|
||||
detail::get_trie(context, ref());
|
||||
return trie_[str | detail::text::as_utf32];
|
||||
auto [trie, has_case_folded] = detail::get_trie(context, ref());
|
||||
if (context.no_case_depth_) {
|
||||
return trie[detail::case_fold_view(
|
||||
str | detail::text::as_utf32)];
|
||||
} else {
|
||||
return trie[str | detail::text::as_utf32];
|
||||
}
|
||||
}
|
||||
|
||||
/** Inserts an entry consisting of a UTF-8 string `str` to match, and
|
||||
@@ -4549,9 +4591,14 @@ namespace boost { namespace parser {
|
||||
template<typename Context>
|
||||
void insert(Context const & context, std::string_view str, T && x) const
|
||||
{
|
||||
parser::detail::text::trie<std::vector<char32_t>, T> & trie_ =
|
||||
detail::get_trie(context, ref());
|
||||
trie_.insert(str | detail::text::as_utf32, std::move(x));
|
||||
auto [trie, has_case_folded] = detail::get_trie(context, ref());
|
||||
if (context.no_case_depth_) {
|
||||
trie.insert(
|
||||
detail::case_fold_view(str | detail::text::as_utf32),
|
||||
std::move(x));
|
||||
} else {
|
||||
trie.insert(str | detail::text::as_utf32, std::move(x));
|
||||
}
|
||||
}
|
||||
|
||||
/** Erases the entry whose UTF-8 match string is `str` from the copy
|
||||
@@ -4559,9 +4606,13 @@ namespace boost { namespace parser {
|
||||
template<typename Context>
|
||||
void erase(Context const & context, std::string_view str) const
|
||||
{
|
||||
parser::detail::text::trie<std::vector<char32_t>, T> & trie_ =
|
||||
detail::get_trie(context, ref());
|
||||
trie_.erase(str | detail::text::as_utf32);
|
||||
auto [trie, has_case_folded] = detail::get_trie(context, ref());
|
||||
if (context.no_case_depth_) {
|
||||
trie.erase(
|
||||
detail::case_fold_view(str | detail::text::as_utf32));
|
||||
} else {
|
||||
trie.erase(str | detail::text::as_utf32);
|
||||
}
|
||||
}
|
||||
|
||||
template<
|
||||
@@ -4600,12 +4651,14 @@ namespace boost { namespace parser {
|
||||
[[maybe_unused]] auto _ = detail::scoped_trace(
|
||||
*this, first, last, context, flags, retval);
|
||||
|
||||
parser::detail::text::trie<std::vector<char32_t>, T> const & trie_ =
|
||||
detail::get_trie(context, ref());
|
||||
auto const lookup = trie_.longest_match(first, last);
|
||||
auto [trie, _0] = detail::get_trie(context, ref());
|
||||
auto const lookup = context.no_case_depth_
|
||||
? trie.longest_match(detail::case_fold_view(
|
||||
BOOST_PARSER_SUBRANGE(first, last)))
|
||||
: trie.longest_match(first, last);
|
||||
if (lookup.match) {
|
||||
std::advance(first, lookup.size);
|
||||
detail::assign(retval, T{*trie_[lookup]});
|
||||
detail::assign(retval, T{*trie[lookup]});
|
||||
} else {
|
||||
success = false;
|
||||
}
|
||||
|
||||
@@ -72,7 +72,7 @@ namespace boost { namespace parser {
|
||||
};
|
||||
|
||||
using symbol_table_tries_t =
|
||||
std::map<void *, std::any, std::less<void *>>;
|
||||
std::map<void *, std::pair<std::any, bool>, std::less<void *>>;
|
||||
|
||||
template<
|
||||
bool DoTrace,
|
||||
|
||||
@@ -168,6 +168,97 @@ TEST(no_case, match_any_within_string)
|
||||
}
|
||||
}
|
||||
|
||||
TEST(no_case, symbol_table)
|
||||
{
|
||||
// without mutation
|
||||
{
|
||||
symbols<int> const roman_numerals = {
|
||||
{"I", 1}, {"V", 5}, {"X", 10}, {"L", 50}, {"C", 100}};
|
||||
symbols<std::string> const named_strings = {
|
||||
{"I", "1"}, {"V", "5"}, {"X", "10"}, {"L", "50"}, {"C", "100"}};
|
||||
|
||||
{
|
||||
auto const result = parse("I", no_case[roman_numerals]);
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_EQ(*result, 1);
|
||||
}
|
||||
{
|
||||
auto const result = parse("i", no_case[roman_numerals]);
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_EQ(*result, 1);
|
||||
}
|
||||
{
|
||||
auto const result = parse("I", no_case[named_strings]);
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_EQ(*result, "1");
|
||||
}
|
||||
{
|
||||
auto const result = parse("i", no_case[named_strings]);
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_EQ(*result, "1");
|
||||
}
|
||||
|
||||
{
|
||||
auto const result = parse("L", no_case[roman_numerals]);
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_EQ(*result, 50);
|
||||
}
|
||||
{
|
||||
auto const result = parse("l", no_case[roman_numerals]);
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_EQ(*result, 50);
|
||||
}
|
||||
{
|
||||
auto const result = parse("L", no_case[named_strings]);
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_EQ(*result, "50");
|
||||
}
|
||||
{
|
||||
auto const result = parse("l", no_case[named_strings]);
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_EQ(*result, "50");
|
||||
}
|
||||
}
|
||||
// with mutation
|
||||
{
|
||||
symbols<int> roman_numerals;
|
||||
roman_numerals.insert_for_next_parse("I", 1)("V", 5)("X", 10);
|
||||
auto const add_numeral = [&roman_numerals](auto & context) {
|
||||
using namespace boost::parser::literals;
|
||||
char chars[2] = {get(_attr(context), 0_c), 0};
|
||||
roman_numerals.insert(context, chars, get(_attr(context), 1_c));
|
||||
};
|
||||
auto const numerals_parser = omit[roman_numerals] >>
|
||||
(char_ >> int_)[add_numeral] >>
|
||||
no_case[roman_numerals];
|
||||
|
||||
{
|
||||
auto const result = parse("VL50L", numerals_parser);
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_EQ(*result, 50);
|
||||
EXPECT_FALSE(parse("L", roman_numerals));
|
||||
}
|
||||
{
|
||||
auto const result = parse("VL50l", numerals_parser);
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_EQ(*result, 50);
|
||||
EXPECT_FALSE(parse("L", roman_numerals));
|
||||
}
|
||||
{
|
||||
auto const result = parse("VC100C", numerals_parser);
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_EQ(*result, 100);
|
||||
EXPECT_FALSE(parse("C", roman_numerals));
|
||||
}
|
||||
{
|
||||
auto const result = parse("Vc100C", numerals_parser);
|
||||
EXPECT_TRUE(result);
|
||||
EXPECT_EQ(*result, 100);
|
||||
EXPECT_FALSE(parse("C", roman_numerals));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
constexpr auto capital_sharp_s = u8"ẞ"; // U+1E9E
|
||||
constexpr auto small_sharp_s = u8"ß"; // U+00DF
|
||||
constexpr auto double_s = u8"sS"; // U+0073 U+0073
|
||||
|
||||
Reference in New Issue
Block a user