2
0
mirror of https://github.com/boostorg/parser.git synced 2026-01-19 04:22:13 +00:00

Add insert/erase/clear operations on symbol tables for the current parse and

subsequent parses both. Add better tests for the added API and the previous
subset of the operations already present. Fix errors revealed by the tests.

Fixes #183.
This commit is contained in:
Zach Laine
2024-09-30 16:14:22 -05:00
parent 41fb3997ba
commit 1e64b6e416
3 changed files with 369 additions and 17 deletions

View File

@@ -1609,17 +1609,66 @@ namespace boost { namespace parser {
V base_;
};
enum class symbol_table_op { insert, erase, clear };
template<typename T>
struct symbol_table_operation
{
std::string key_;
std::optional<T> value_;
symbol_table_op kind_;
};
template<typename T>
void apply_symbol_table_operations(
std::vector<std::pair<std::string, T>> & initial_elements,
std::vector<symbol_table_operation<T>> & pending_operations)
{
auto lower_bound = [&initial_elements](std::string const & str) {
return std::lower_bound(
initial_elements.begin(),
initial_elements.end(),
str,
[&str](auto const & a, auto b) {
return a.first < b;
});
};
for (auto & op : pending_operations) {
if (op.kind_ == symbol_table_op::insert) {
auto it = lower_bound(op.key_);
if (it == initial_elements.end() ||
it->first != op.key_) {
initial_elements.insert(
it,
std::pair<std::string, T>(
std::move(op.key_), std::move(*op.value_)));
} else {
it->second = std::move(*op.value_);
}
} else if (op.kind_ == symbol_table_op::erase) {
auto it = lower_bound(op.key_);
if (it != initial_elements.end() && it->first == op.key_)
initial_elements.erase(it);
} else {
initial_elements.clear();
}
}
pending_operations.clear();
}
template<typename Context, typename T>
auto get_trie(
Context const & context, symbol_parser<T> const & symbol_parser)
Context const & context, symbol_parser<T> const & sym_parser)
{
using trie_t = text::trie_map<std::vector<char32_t>, T>;
using result_type = std::pair<trie_t &, bool>;
symbol_table_tries_t & symbol_table_tries =
*context.symbol_table_tries_;
auto & [any, has_case_folded] =
symbol_table_tries[(void *)&symbol_parser];
auto & [any, has_case_folded, pending_ops] =
symbol_table_tries[(void *)&sym_parser.ref()];
bool const needs_case_folded = context.no_case_depth_;
@@ -1627,7 +1676,13 @@ namespace boost { namespace parser {
any = trie_t{};
has_case_folded = false;
trie_t & trie = *std::any_cast<trie_t>(&any);
for (auto const & e : symbol_parser.initial_elements()) {
if (pending_ops) {
detail::apply_symbol_table_operations(
sym_parser.initial_elements(),
sym_parser.pending_operations());
pending_ops = false;
}
for (auto const & e : sym_parser.initial_elements()) {
trie.insert(e.first | text::as_utf32, e.second);
if (needs_case_folded) {
trie.insert(
@@ -2153,6 +2208,67 @@ namespace boost { namespace parser {
Iter & it_;
};
template<typename Parser>
using has_parser_data_member_expr =
decltype(std::declval<Parser>().parser_);
template<typename Parser>
constexpr bool has_parser_data_member_v =
is_detected_v<has_parser_data_member_expr, Parser>;
template<typename Parser>
using has_parsers_data_member_expr =
decltype(std::declval<Parser>().parsers_);
template<typename Parser>
constexpr bool has_parsers_data_member_v =
is_detected_v<has_parsers_data_member_expr, Parser>;
template<typename Context, typename Parser>
void visit_symbol_table_parsers_impl(
Context & context, Parser const & p);
template<typename Context, typename Parser>
void visit_symbol_table_parser(Context & context, Parser const & p)
{
if constexpr (has_parser_data_member_v<Parser>) {
detail::visit_symbol_table_parsers_impl(context, p.parser_);
}
}
template<typename Context, typename T>
void visit_symbol_table_parser(
Context & context, symbol_parser<T> const & p)
{
auto & [_, has_case_folded, pending_ops] =
(*context.symbol_table_tries_)[(void *)&p.ref()];
has_case_folded = false;
pending_ops = !p.pending_operations().empty();
}
template<typename Context, typename Parser>
void visit_symbol_table_parsers_impl(
Context & context, Parser const & p)
{
if constexpr (has_parsers_data_member_v<Parser>) {
auto visit = [&context](auto & parser) {
detail::visit_symbol_table_parsers_impl(context, parser);
};
detail::hl::for_each(p.parsers_, visit);
} else {
detail::visit_symbol_table_parser(context, p);
}
}
template<
typename Context,
typename Parser,
typename GlobalState,
typename ErrorHandler>
void visit_symbol_table_parsers(
Context & context,
parser_interface<Parser, GlobalState, ErrorHandler> const & p)
{
detail::visit_symbol_table_parsers_impl(context, p.parser_);
}
template<
bool Debug,
typename Iter,
@@ -2179,6 +2295,7 @@ namespace boost { namespace parser {
error_handler,
parser.globals_,
symbol_table_tries);
detail::visit_symbol_table_parsers(context, parser);
auto const flags =
Debug ? detail::enable_trace(detail::flags::gen_attrs)
: detail::flags::gen_attrs;
@@ -2227,6 +2344,7 @@ namespace boost { namespace parser {
error_handler,
parser.globals_,
symbol_table_tries);
detail::visit_symbol_table_parsers(context, parser);
auto const flags =
Debug ? detail::enable_trace(detail::flags::gen_attrs)
: detail::flags::gen_attrs;
@@ -2281,6 +2399,7 @@ namespace boost { namespace parser {
callbacks,
parser.globals_,
symbol_table_tries);
detail::visit_symbol_table_parsers(context, parser);
auto const flags =
Debug ? detail::enable_trace(detail::flags::gen_attrs)
: detail::flags::gen_attrs;
@@ -2332,6 +2451,7 @@ namespace boost { namespace parser {
error_handler,
parser.globals_,
symbol_table_tries);
detail::visit_symbol_table_parsers(context, parser);
auto const flags =
Debug ? detail::enable_trace(detail::default_flags())
: detail::default_flags();
@@ -2377,6 +2497,7 @@ namespace boost { namespace parser {
error_handler,
parser.globals_,
symbol_table_tries);
detail::visit_symbol_table_parsers(context, parser);
auto const flags =
Debug ? detail::enable_trace(detail::default_flags())
: detail::default_flags();
@@ -2430,6 +2551,7 @@ namespace boost { namespace parser {
callbacks,
parser.globals_,
symbol_table_tries);
detail::visit_symbol_table_parsers(context, parser);
auto const flags =
Debug ? detail::enable_trace(detail::default_flags())
: detail::default_flags();
@@ -4976,6 +5098,15 @@ namespace boost { namespace parser {
}
}
/** Erases the entry whose UTF-8 match string is `str` from the copy
of the symbol table inside the parse context `context`. */
template<typename Context>
void clear(Context const & context) const
{
auto [trie, _] = detail::get_trie(context, ref());
trie.clear();
}
template<
typename Iter,
typename Sentinel,
@@ -5025,7 +5156,9 @@ namespace boost { namespace parser {
}
}
std::vector<std::pair<std::string_view, T>> initial_elements_;
mutable std::vector<std::pair<std::string, T>> initial_elements_;
mutable std::vector<detail::symbol_table_operation<T>>
pending_operations_;
symbol_parser const * copied_from_;
symbol_parser const & ref() const noexcept
@@ -5034,11 +5167,16 @@ namespace boost { namespace parser {
return *copied_from_;
return *this;
}
std::vector<std::pair<std::string_view, T>> const &
std::vector<std::pair<std::string, T>> &
initial_elements() const noexcept
{
return ref().initial_elements_;
}
std::vector<detail::symbol_table_operation<T>> &
pending_operations() const noexcept
{
return ref().pending_operations_;
}
std::string_view diagnostic_text_;
};
@@ -5598,7 +5736,9 @@ namespace boost { namespace parser {
{}
symbols(std::initializer_list<std::pair<std::string_view, T>> il)
{
this->parser_.initial_elements_ = il;
this->parser_.initial_elements_.resize(il.size());
std::copy(il.begin(), il.end(),
this->parser_.initial_elements_.begin());
}
symbols(
char const * diagnostic_text,
@@ -5606,22 +5746,49 @@ namespace boost { namespace parser {
parser_interface<symbol_parser<T>>(
symbol_parser<T>(diagnostic_text))
{
this->parser_.initial_elements_ = il;
this->parser_.initial_elements_.resize(il.size());
std::copy(il.begin(), il.end(),
this->parser_.initial_elements_.begin());
}
using parser_interface<symbol_parser<T>>::operator();
/** Adds an entry consisting of a UTF-8 string `str` to match, and an
associated attribute `x`, to `*this`. The entry is added for use
in all subsequent top-level parses. Subsequent lookups during the
current top-level parse will not match `str`. */
/** Inserts an entry consisting of a UTF-8 string `str` to match, and
an associated attribute `x`, to `*this`. The entry is added for
use in all subsequent top-level parses. Subsequent lookups during
the current top-level parse will not necessarily match `str`. */
symbols & insert_for_next_parse(std::string_view str, T x)
{
this->parser_.initial_elements_.push_back(
std::pair<std::string_view, T>(str, std::move(x)));
this->parser_.pending_operations().push_back(
detail::symbol_table_operation<T>{
std::string(str),
std::move(x),
detail::symbol_table_op::insert});
return *this;
}
/** Erases the entry whose UTF-8 match string is `str`, from `*this`.
The entry will no longer be available for use in all subsequent
top-level parses. `str` will not be removed from the symbols
matched in the current top-level parse. */
void erase_for_next_parse(std::string_view str)
{
this->parser_.pending_operations().push_back(
detail::symbol_table_operation<T>{
std::string(str),
std::nullopt,
detail::symbol_table_op::erase});
}
/** Erases all the entries from the copy of the symbol table inside
the parse context `context`. */
void clear_for_next_parse()
{
this->parser_.pending_operations().push_back(
detail::symbol_table_operation<T>{
{}, std::nullopt, detail::symbol_table_op::clear});
}
/** Equivalent to `insert_for_next_parse(str, std::move(x))`. */
symbols & operator()(std::string_view str, T x)
{
@@ -5655,7 +5822,15 @@ namespace boost { namespace parser {
{
this->parser_.erase(context, str);
}
};
/** Erases all the entries from the copy of the symbol table inside
the parse context `context`. */
template<typename Context>
void clear(Context const & context) const
{
this->parser_.clear(context);
}
};
#ifndef BOOST_PARSER_DOXYGEN

View File

@@ -80,8 +80,15 @@ namespace boost { namespace parser {
in_apply_parser = 1 << 3
};
struct symbol_table_trie_element
{
std::any trie_;
bool has_case_folded_;
bool pending_operations_;
};
using symbol_table_tries_t =
std::map<void *, std::pair<std::any, bool>, std::less<void *>>;
std::map<void *, symbol_table_trie_element, std::less<void *>>;
template<
bool DoTrace,

View File

@@ -12,7 +12,6 @@ using namespace boost::parser;
int main()
{
// symbols_empty
{
symbols<int> roman_numerals;
@@ -164,5 +163,176 @@ int main()
}
}
// insert/erase/clear
{
symbols<int> roman_numerals;
roman_numerals.insert_for_next_parse("I", 1)("V", 5)("X", 10);
auto const insert_numeral = [&roman_numerals](auto & context) {
using namespace boost::parser::literals;
char chars[2] = {get(_attr(context), 0_c), 0};
roman_numerals.insert(context, chars, get(_attr(context), 1_c));
};
auto const erase_numeral = [&roman_numerals](auto & context) {
using namespace boost::parser::literals;
char chars[2] = {_attr(context), 0};
roman_numerals.erase(context, chars);
};
auto const clear_numerals = [&roman_numerals](auto & context) {
roman_numerals.clear(context);
};
auto const add_parser = ("add" >> char_ >> int_)[insert_numeral];
auto const delete_parser = ("del" >> char_)[erase_numeral];
auto const clear_parser = lit("clear")[clear_numerals];
auto const next_insert_numeral = [&roman_numerals](auto & context) {
using namespace boost::parser::literals;
char chars[2] = {get(_attr(context), 0_c), 0};
roman_numerals.insert_for_next_parse(chars, get(_attr(context), 1_c));
};
auto const next_erase_numeral = [&roman_numerals](auto & context) {
using namespace boost::parser::literals;
char chars[2] = {_attr(context), 0};
roman_numerals.erase_for_next_parse(chars);
};
auto const next_clear_numerals = [&roman_numerals](auto &) {
roman_numerals.clear_for_next_parse();
};
auto const next_add_parser =
("next-add" >> char_ >> int_)[next_insert_numeral];
auto const next_delete_parser = ("next-del" >> char_)[next_erase_numeral];
auto const next_clear_parser = lit("next-clear")[next_clear_numerals];
{
// add only for this parse
auto result = parse("addL50L", add_parser >> roman_numerals);
BOOST_TEST(result);
BOOST_TEST(*result == 50);
result = parse("L", roman_numerals);
BOOST_TEST(!result);
}
{
// add only for the next parse
auto result = parse("next-addL50L", next_add_parser >> roman_numerals);
BOOST_TEST(!result); // TODO
result = parse("L", roman_numerals);
BOOST_TEST(result);
BOOST_TEST(*result == 50);
}
{
// add for both parses
auto result = parse("addL50next-addL50L",
add_parser >> next_add_parser >> roman_numerals);
BOOST_TEST(result);
BOOST_TEST(*result == 50);
result = parse("L", roman_numerals);
BOOST_TEST(result);
BOOST_TEST(*result == 50);
}
{
// add for both parses
auto result = parse("next-addL50addL50L",
next_add_parser >> add_parser >> roman_numerals);
BOOST_TEST(result);
BOOST_TEST(*result == 50);
result = parse("L", roman_numerals);
BOOST_TEST(result);
BOOST_TEST(*result == 50);
}
{
// delete nonexistent entry
auto result = parse("delQ", delete_parser);
BOOST_TEST(result);
}
{
// delete nonexistent entry
auto result = parse("next-delQ", next_delete_parser);
BOOST_TEST(result);
}
{
// delete only for this parse
BOOST_TEST(*parse("V", roman_numerals) == 5);
auto result = parse("delVV", delete_parser >> roman_numerals);
BOOST_TEST(!result);
result = parse("V", roman_numerals);
BOOST_TEST(result);
BOOST_TEST(*result == 5);
}
{
// delete only for the next parse
BOOST_TEST(*parse("V", roman_numerals) == 5);
auto result = parse("next-delVV", next_delete_parser >> roman_numerals);
BOOST_TEST(result); // TODO
BOOST_TEST(*result == 5); // TODO
result = parse("V", roman_numerals);
BOOST_TEST(!result);
}
{
// delete for both parses
BOOST_TEST(*parse("X", roman_numerals) == 10);
auto result = parse(
"delXnext-delXX",
delete_parser >> next_delete_parser >> roman_numerals);
BOOST_TEST(!result);
result = parse("X", roman_numerals);
BOOST_TEST(!result);
}
{
// add and then delete only for this parse
auto result = parse(
"addC100CdelCC",
add_parser >> roman_numerals >> delete_parser >> roman_numerals);
BOOST_TEST(!result);
}
{
// add for this parse and then delete only for the next parse
auto result = parse(
"addC100Cnext-delCC",
add_parser >> roman_numerals >> next_delete_parser >>
roman_numerals);
BOOST_TEST(result);
BOOST_TEST(*result == std::tuple(100, 100));
}
{
// clear only for this parse
BOOST_TEST(*parse("I", roman_numerals) == 1);
BOOST_TEST(*parse("L", roman_numerals) == 50);
BOOST_TEST(!parse("clearI", clear_parser >> roman_numerals));
BOOST_TEST(!parse("clearL", clear_parser >> roman_numerals));
BOOST_TEST(*parse("I", roman_numerals) == 1);
BOOST_TEST(*parse("L", roman_numerals) == 50);
}
{
// clear only for the next parse
BOOST_TEST(*parse("I", roman_numerals) == 1);
BOOST_TEST(*parse("L", roman_numerals) == 50);
auto result = parse("next-clearI", next_clear_parser >> roman_numerals);
BOOST_TEST(result); // TODO
BOOST_TEST(*result == 1); // TODO
BOOST_TEST(!parse("I", roman_numerals));
BOOST_TEST(!parse("L", roman_numerals));
}
}
return boost::report_errors();
}