diff --git a/include/boost/parser/detail/case_fold.hpp b/include/boost/parser/detail/case_fold.hpp index 21720078..94bc6d63 100644 --- a/include/boost/parser/detail/case_fold.hpp +++ b/include/boost/parser/detail/case_fold.hpp @@ -57,11 +57,9 @@ namespace boost::parser::detail { return out; } else { // Skip [0x41, 0x5a), handled above. - auto const first = - detail::text::detail::begin(mapping_ranges) + 1; + auto const first = text::detail::begin(mapping_ranges) + 1; // 7th entry starts with 0x100. - auto const last = - detail::text::detail::begin(mapping_ranges) + 7; + auto const last = text::detail::begin(mapping_ranges) + 7; if (auto out_opt = do_short_mapping(first, last, cp, out)) return *out_opt; } @@ -71,8 +69,8 @@ namespace boost::parser::detail { // Single-cp-mapping path (next most common case). { - auto const first = detail::text::detail::begin(mapping_ranges); - auto const last = detail::text::detail::end(mapping_ranges); + auto const first = text::detail::begin(mapping_ranges); + auto const last = text::detail::end(mapping_ranges); if (auto out_opt = do_short_mapping(first, last, cp, out)) return *out_opt; } @@ -95,8 +93,8 @@ namespace boost::parser::detail { return std::copy( it->mapping_, std::find( - detail::text::detail::begin(it->mapping_), - detail::text::detail::end(it->mapping_), + text::detail::begin(it->mapping_), + text::detail::end(it->mapping_), 0), out); #endif @@ -106,7 +104,6 @@ namespace boost::parser::detail { *out++ = cp; return out; } - } #endif diff --git a/include/boost/parser/detail/text/trie_map.hpp b/include/boost/parser/detail/text/trie_map.hpp new file mode 100644 index 00000000..e2c9e986 --- /dev/null +++ b/include/boost/parser/detail/text/trie_map.hpp @@ -0,0 +1,1228 @@ +// Copyright (C) 2020 T. Zachary Laine +// +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_PARSER_DETAIL_TEXT_TRIE_MAP_HPP +#define BOOST_PARSER_DETAIL_TEXT_TRIE_MAP_HPP + +#include + +#include + + +namespace boost::parser::detail { namespace text { + + template + struct trie_map_iterator; + + template + struct const_trie_map_iterator; + + template + using reverse_trie_map_iterator = + stl_interfaces::reverse_iterator>; + + template + using const_reverse_trie_map_iterator = + stl_interfaces::reverse_iterator>; + + /** A range type returned by certain operations on a trie_map or + trie_set. */ + template + struct trie_range + { + using iterator = Iter; + + iterator first; + iterator last; + + iterator begin() const { return first; } + iterator end() const { return last; } + + friend bool operator==(trie_range const & lhs, trie_range const & rhs) + { + return lhs.first == rhs.first && lhs.last == rhs.last; + } + friend bool operator!=(trie_range const & lhs, trie_range const & rhs) + { + return !(lhs == rhs); + } + }; + + /** A constant range type returned by certain operations on a trie_map or + trie_set. */ + template + struct const_trie_range + { + using const_iterator = Iter; + + const_iterator first; + const_iterator last; + + const_iterator begin() const { return first; } + const_iterator end() const { return last; } + + friend bool + operator==(const_trie_range const & lhs, const_trie_range const & rhs) + { + return lhs.first == rhs.first && lhs.last == rhs.last; + } + friend bool + operator!=(const_trie_range const & lhs, const_trie_range const & rhs) + { + return !(lhs == rhs); + } + }; + + /** The result type of insert() operations on a trie_map or trie_set. */ + template + struct trie_insert_result + { + using iterator = Iter; + + trie_insert_result() : iter(), inserted(false) {} + trie_insert_result(iterator it, bool ins) : iter(it), inserted(ins) {} + + iterator iter; + bool inserted; + + friend bool operator==( + trie_insert_result const & lhs, trie_insert_result const & rhs) + { + return lhs.iter == rhs.iter && lhs.inserted == rhs.inserted; + } + friend bool operator!=( + trie_insert_result const & lhs, trie_insert_result const & rhs) + { + return !(lhs == rhs); + } + }; + + namespace detail { + + struct index_within_parent_t + { + index_within_parent_t() : value_(-1) {} + + std::size_t value() const { return value_; } + + template< + typename Key, + typename Value, + typename Iter, + std::size_t KeySize> + void insert_at( + std::unique_ptr> const & child, + std::ptrdiff_t offset, + Iter it, + Iter end) + { + child->index_within_parent_.value_ = offset; + for (; it != end; ++it) { + ++(*it)->index_within_parent_.value_; + } + } + + template + void insert_ptr(std::unique_ptr> const & child) + { + child->index_within_parent_.value_ = 0; + } + + template + void erase(Iter it, Iter end) + { + for (; it != end; ++it) { + --(*it)->index_within_parent_.value_; + } + } + + private: + std::size_t value_; + }; + + template + struct trie_iterator_state_t + { + trie_node_t const * + parent_; + std::size_t index_; + }; + + template + trie_iterator_state_t + parent_state(trie_iterator_state_t state) + { + return {state.parent_->parent(), + state.parent_->index_within_parent()}; + } + + template + Key reconstruct_key(trie_iterator_state_t state) + { + Key retval; + while (state.parent_->parent()) { + retval.insert(retval.end(), state.parent_->key(state.index_)); + state = parent_state(state); + } + std::reverse(retval.begin(), retval.end()); + return retval; + } + + template + trie_node_t const * + to_node(trie_iterator_state_t state) + { + if (state.index_ < state.parent_->size()) + return state.parent_->child(state.index_); + return nullptr; + } + } + + /** An associative container similar to std::map, built upon a trie, or + prefix-tree. A trie_map contains a set of keys, each of which is a + sequence, and a set of values, each associated with some key. Each + node in the trie_map represents some prefix found in at least one + member of the set of values contained in the trie_map. If a certain + node represents the end of one of the keys, it has an associated + value. Such a node may or may not have children. + + Complexity of lookups is always O(M), where M is the size of the Key + being lookep up. Note that this implies that lookup complexity is + independent of the size of the trie_map. + + \param Key The key-type; must be a sequence of values comparable via + Compare()(x, y). + \param Value The value-type. + \param Compare The type of the comparison object used to compare + elements of the key-type. + */ + template + struct trie_map + { + private: + using node_t = + detail::trie_node_t; + using iter_state_t = detail::trie_iterator_state_t; + + public: + using key_type = Key; + using mapped_type = Value; + using value_type = trie_element; + using key_compare = Compare; + using key_element_type = typename Key::value_type; + + using reference = value_type &; + using const_reference = value_type const &; + using iterator = trie_map_iterator; + using const_iterator = const_trie_map_iterator; + using reverse_iterator = + reverse_trie_map_iterator; + using const_reverse_iterator = + const_reverse_trie_map_iterator; + using size_type = std::ptrdiff_t; + using difference_type = std::ptrdiff_t; + + using range = trie_range; + using const_range = const_trie_range; + using insert_result = trie_insert_result; + using match_result = trie_match_result; + + trie_map() : size_(0) {} + + trie_map(Compare const & comp) : size_(0), comp_(comp) {} + + template + trie_map(Iter first, Sentinel last, Compare const & comp = Compare()) : + size_(0), + comp_(comp) + { + insert(first, last); + } + template + explicit trie_map(Range r, Compare const & comp = Compare()) : + size_(0), + comp_(comp) + { + insert(detail::begin(r), detail::end(r)); + } + template + explicit trie_map( + parser::detail::text::trie const & + trie) : + size_(0) + { + Key key; + from_trie_impl(trie.header_, key); + } + trie_map(std::initializer_list il) : size_(0) + { + insert(il); + } + + trie_map & operator=(std::initializer_list il) + { + clear(); + for (auto const & x : il) { + insert(x); + } + return *this; + } + + bool empty() const { return size_ == 0; } + size_type size() const { return size_; } + size_type max_size() const { return PTRDIFF_MAX; } + + const_iterator begin() const + { + iter_state_t state{&header_, 0}; + if (size_) { + while (!state.parent_->min_value()) { + state.parent_ = state.parent_->min_child(); + } + } + return const_iterator(state); + } + const_iterator end() const + { + iter_state_t state{&header_, 0}; + if (size_) { + node_t const * node = nullptr; + while ((node = to_node(state))) { + state.parent_ = node; + state.index_ = state.parent_->size() - 1; + } + state.parent_ = state.parent_->parent(); + state.index_ = state.parent_->size(); + } + return const_iterator(state); + } + const_iterator cbegin() const { return begin(); } + const_iterator cend() const { return end(); } + + const_reverse_iterator rbegin() const + { + return const_reverse_iterator{end()}; + } + const_reverse_iterator rend() const + { + return const_reverse_iterator{begin()}; + } + const_reverse_iterator crbegin() const { return rbegin(); } + const_reverse_iterator crend() const { return rend(); } + +#ifndef BOOST_PARSER_DOXYGEN + +#define BOOST_TRIE_MAP_C_STR_OVERLOAD(rtype, func) \ + template \ + rtype func(Char const(&chars)[N]) \ + { \ + static_assert( \ + std::is_same::value, \ + "Only well-formed when Char is Key::value_type."); \ + return func(detail::char_range{chars, chars + N - 1}); \ + } + +#define BOOST_TRIE_MAP_C_STR_OVERLOAD_QUALS(rtype, func, quals) \ + template \ + rtype func(Char const(&chars)[N]) quals \ + { \ + static_assert( \ + std::is_same::value, \ + "Only well-formed when Char is Key::value_type."); \ + return func(detail::char_range{chars, chars + N - 1}); \ + } + +#endif + + /** Returns true if `key` is found in *this. */ + template + bool contains(KeyRange const & key) const + { + return find(key) != end(); + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD_QUALS(bool, contains, const) +#endif + + /** Returns the iterator pointing to the key/value pair associated + with `key`, if `key` is found in *this. Returns end() + otherwise. */ + template + const_iterator find(KeyRange const & key) const + { + auto first = detail::begin(key); + auto const last = detail::end(key); + auto match = longest_match_impl(first, last); + if (first == last && match.match) { + return const_iterator(iter_state_t{ + to_node_ptr(match.node)->parent(), + to_node_ptr(match.node)->index_within_parent()}); + } + return this->end(); + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD_QUALS(const_iterator, find, const) +#endif + + /** Returns the iterator pointing to the first key/value pair whose + key is not less than `key`. Returns end() if no such key can be + found. */ + template + const_iterator lower_bound(KeyRange const & key) const + { + return bound_impl(key); + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD_QUALS(const_iterator, lower_bound, const) +#endif + + /** Returns the iterator pointing to the first key/value pair whose + key is not greater than `key`. Returns end() if no such key can + be found. */ + template + const_iterator upper_bound(KeyRange const & key) const + { + return bound_impl(key); + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD_QUALS(const_iterator, upper_bound, const) +#endif + + /** Returns the `const_range(lower_bound(key), upper_bound(key))`.*/ + template + const_range equal_range(KeyRange const & key) const + { + return {lower_bound(key), upper_bound(key)}; + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD_QUALS(const_range, equal_range, const) +#endif + + /** Returns the longest subsequence of `[first, last)` found in *this, + whether or not it is a match. */ + template + match_result longest_subsequence(KeyIter first, Sentinel last) const + + { + return longest_match_impl(first, last); + } + + /** Returns the longest subsequence of `key` found in *this, whether + or not it is a match. */ + template + match_result longest_subsequence(KeyRange const & key) const + { + return longest_subsequence(detail::begin(key), detail::end(key)); + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD_QUALS( + match_result, longest_subsequence, const) +#endif + + /** Returns the longest matching subsequence of `[first, last)` found + in *this. */ + template + match_result longest_match(KeyIter first, Sentinel last) const + { + auto const retval = longest_match_impl(first, last); + return back_up_to_match(retval); + } + + /** Returns the longest matching subsequence of `key` found in + *this. */ + template + match_result longest_match(KeyRange const & key) const + { + return longest_match(detail::begin(key), detail::end(key)); + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD_QUALS(match_result, longest_match, const) +#endif + + /** Returns the result of extending `prev` by one element, `e`. */ + template + match_result extend_subsequence(match_result prev, KeyElementT e) const + { + auto e_ptr = &e; + return extend_subsequence_impl(prev, e_ptr, e_ptr + 1); + } + + /** Returns the result of extending `prev` by the longest subsequence + of `[first, last)` found in *this. */ + template + match_result extend_subsequence( + match_result prev, KeyIter first, Sentinel last) const + { + return extend_subsequence_impl(prev, first, last); + } + + /** Returns the result of extending `prev` by one element, `e`, if + that would form a match, and `prev` otherwise. `prev` must be a + match. */ + template + match_result extend_match(match_result prev, KeyElementT e) const + { + BOOST_PARSER_ASSERT(prev.match); + auto e_ptr = &e; + auto const retval = extend_subsequence_impl(prev, e_ptr, e_ptr + 1); + return back_up_to_match(retval); + } + + /** Returns the result of extending `prev` by the longest subsequence + of `[first, last)` found in *this, if that would form a match, and + `prev` otherwise. `prev` must be a match. */ + template + match_result + extend_match(match_result prev, KeyIter first, Sentinel last) const + { + BOOST_PARSER_ASSERT(prev.match); + auto const retval = extend_subsequence_impl(prev, first, last); + return back_up_to_match(retval); + } + + /** Writes the sequence of elements that would advance `prev` by one + element to `out`, and returns the final value of `out` after the + writes. */ + template + OutIter copy_next_key_elements(match_result prev, OutIter out) const + { + auto node = to_node_ptr(prev.node); + return std::copy(node->key_begin(), node->key_end(), out); + } + + /** Returns an optional reference to the const value associated with + `key` in *this (if any). */ + template + optional_ref operator[](KeyRange const & key) const + { + auto it = find(key); + if (it == end()) + return {}; + return it->value; + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD_QUALS( + optional_ref, operator[], const) +#endif + + /** Returns an optional reference to the const value associated with + `match` in *this (if any). */ + optional_ref operator[](match_result match) const + { + if (!match.match) + return {}; + return *to_node_ptr(match.node)->value(); + } + + iterator begin() { return iterator(const_this()->begin()); } + iterator end() { return iterator(const_this()->end()); } + + reverse_iterator rbegin() { return reverse_iterator{end()}; } + reverse_iterator rend() { return reverse_iterator{begin()}; } + + void clear() + { + header_ = node_t(); + size_ = 0; + } + + /** Returns the iterator pointing to the key/value pair associated + with `key`, if `key` is found in *this. Returns end() + otherwise. */ + template + iterator find(KeyRange const & key) + { + return iterator(const_this()->find(key)); + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD(iterator, find) +#endif + + /** Returns the iterator pointing to the first key/value pair whose + key is not less than `key`. Returns end() if no such key can be + found. */ + template + iterator lower_bound(KeyRange const & key) + { + return iterator(const_this()->lower_bound(key)); + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD(iterator, lower_bound) +#endif + + /** Returns the iterator pointing to the first key/value pair whose + key is not greater than `key`. Returns end() if no such key can + be found. */ + template + iterator upper_bound(KeyRange const & key) + { + return iterator(const_this()->upper_bound(key)); + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD(iterator, upper_bound) +#endif + + /** Returns the `const_range(lower_bound(key), upper_bound(key))`.*/ + template + range equal_range(KeyRange const & key) + { + return {lower_bound(key), upper_bound(key)}; + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD(range, equal_range) +#endif + + /** Returns an optional reference to the value associated with `key` + in *this (if any). */ + template + optional_ref operator[](KeyRange const & key) + { + auto it = find(key); + if (it == end()) + return {}; + return it->value; + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD( + optional_ref, operator[]) +#endif + + /** Returns an optional reference to the value associated with `match` + in *this (if any). */ + optional_ref operator[](match_result match) + { + if (!match.match) + return {}; + return *const_cast(to_node_ptr(match.node))->value(); + } + + /** Inserts the key/value pair `[first, last)`, `value` into *this. + The `inserted` field of the result will be true if the operation + resulted in a new insertion, or false otherwise. */ + template + insert_result insert(KeyIter first, Sentinel last, Value value) + { + if (empty()) { + std::unique_ptr new_node(new node_t(&header_)); + header_.insert(std::move(new_node)); + } + + auto match = longest_match_impl(first, last); + if (first == last && match.match) { + return {iterator(iter_state_t{ + to_node_ptr(match.node)->parent(), + to_node_ptr(match.node)->index_within_parent()}), + false}; + } + auto node = create_children( + const_cast(to_node_ptr(match.node)), first, last); + node->value() = std::move(value); + ++size_; + return {iterator(iter_state_t{node->parent(), 0}), true}; + } + + /** Inserts the key/value pair `key`, `value` into *this. The + `inserted` field of the result will be true if the operation + resulted in a new insertion, or false otherwise. */ + template + insert_result insert(KeyRange const & key, Value value) + { + return insert(detail::begin(key), detail::end(key), std::move(value)); + } + + template + insert_result insert(Char const (&chars)[N], Value value) + { + static_assert( + std::is_same::value, + "Only well-formed when Char is Key::value_type."); + return insert( + detail::char_range{chars, chars + N - 1}, + std::move(value)); + } + + /** Inserts the key/value pair `e` into *this. The `inserted` field + of the result will be true if the operation resulted in a new + insertion, or false otherwise. */ + insert_result insert(value_type e) + { + return insert( + detail::begin(e.key), detail::end(e.key), std::move(e.value)); + } + + /** Inserts the sequence of key/value pairs `[first, last)` into + *this. The `inserted` field of the result will be true if the + operation resulted in a new insertion, or false otherwise. */ + template + void insert(Iter first, Sentinel last) + { + for (; first != last; ++first) { + insert(first->key, first->value); + } + } + + /** Inserts the sequence of key/value pairs `r` into *this. The + `inserted` field of the result will be true if the operation + resulted in a new insertion, or false otherwise. */ + template + insert_result insert(Range const & r) + { + return insert(detail::begin(r), detail::end(r)); + } + + /** Inserts the sequence of key/value pairs `il` into this. The + `inserted` field of the result will be true if the operation + resulted in a new insertion, or false otherwise. */ + void insert(std::initializer_list il) + { + for (auto const & x : il) { + insert(x); + } + } + + /** Inserts the key/value pair `[first, last)`, `value` into *this, or + assigns `value` over the existing value associated with `[first, + last)`, if this key is already found in *this. The `inserted` + field of the result will be true if the operation resulted in a + new insertion, or false otherwise. */ + template + insert_result + insert_or_assign(KeyIter first, Sentinel last, Value value) + { + auto it = first; + auto match = longest_match_impl(it, last); + if (it == last && match.match) { + const_cast(*to_node_ptr(match.node)->value()) = + std::move(value); + return insert_result{ + iterator(iter_state_t{ + to_node_ptr(match.node)->parent(), + to_node_ptr(match.node)->index_within_parent()}), + false}; + } + return insert(first, last, std::move(value)); + } + + /** Inserts the key/value pair `key`, `value` into *this, or assigns + `value` over the existing value associated with `key`, if `key` is + already found in *this. The `inserted` field of the result will + be true if the operation resulted in a new insertion, or false + otherwise. */ + template + insert_result insert_or_assign(KeyRange const & key, Value value) + { + return insert_or_assign( + detail::begin(key), detail::end(key), std::move(value)); + } + + template + insert_result insert_or_assign(Char const (&chars)[N], Value value) + { + static_assert( + std::is_same::value, + "Only well-formed when Char is Key::value_type."); + return insert_or_assign( + detail::char_range{chars, chars + N - 1}, + std::move(value)); + } + + /** Erases the key/value pair associated with `key` from + *this. Returns true if the key is found in *this, false + otherwise. */ + template + bool erase(KeyRange const & key) + { + auto it = find(key); + if (it == end()) + return false; + erase(it); + return true; + } + +#ifndef BOOST_PARSER_DOXYGEN + BOOST_TRIE_MAP_C_STR_OVERLOAD(bool, erase) +#endif + + /** Erases the key/value pair pointed to by `it` from *this. Returns + an iterator to the next key/value pair in this. */ + iterator erase(iterator it) + { + auto state = it.it_.state_; + + --size_; + + auto node = + const_cast(state.parent_->child(state.index_)); + if (!node->empty()) { + // node has a value, but also children. Remove the value and + // return the next-iterator. + ++it; + node->value() = std::optional(); + return it; + } + + // node has a value, *and* no children. Remove it and all its + // singular predecessors. + const_cast(state.parent_)->erase(state.index_); + while (state.parent_->parent() && state.parent_->empty() && + !state.parent_->value()) { + state = parent_state(state); + const_cast(state.parent_)->erase(state.index_); + } + + if (state.parent_->parent()) + state = parent_state(state); + auto retval = iterator(state); + if (!empty()) + ++retval; + + return retval; + } + + /** Erases the sequence of key/value pairs pointed to by `[first, + last)` from *this. Returns an iterator to the next key/value pair + in *this. */ + iterator erase(iterator first, iterator last) + { + auto retval = last; + if (first == last) + return retval; + --last; + while (last != first) { + erase(last--); + } + erase(last); + return retval; + } + + void swap(trie_map & other) + { + header_.swap(other.header_); + std::swap(size_, other.size_); + std::swap(comp_, other.comp_); + } + + friend bool operator==(trie_map const & lhs, trie_map const & rhs) + { + return lhs.size() == rhs.size() && + std::equal(lhs.begin(), lhs.end(), rhs.begin()); + } + friend bool operator!=(trie_map const & lhs, trie_map const & rhs) + { + return !(lhs == rhs); + } + +#ifndef BOOST_PARSER_DOXYGEN + + private: + trie_map const * const_this() + { + return const_cast(this); + } + static node_t const * to_node_ptr(void const * ptr) + { + return static_cast(ptr); + } + + template + void from_trie_impl( + detail::trie_node_t< + detail::no_index_within_parent_t, + Key, + Value, + KeySize> const & node, + Key & key, + bool root = true) + { + // TODO: Use an iterative approach instead? + + if (!!node.value()) { + insert(key, *node.value()); + } + + std::vector next_elements; + node.copy_next_key_elements(std::back_inserter(next_elements)); + for (auto const & e : next_elements) { + auto const * n = node.child(e, comp_); + if (!root) + key.insert(key.end(), e); + from_trie_impl(*n, key, false); + if (!root) + key.erase(std::prev(key.end())); + } + } + + template + match_result longest_match_impl(KeyIter & first, Sentinel last) const + { + return extend_subsequence_impl( + match_result{&header_, 0, false, true}, first, last); + } + + template + match_result extend_subsequence_impl( + match_result prev, KeyIter & first, Sentinel last) const + { + if (to_node_ptr(prev.node) == &header_) { + if (header_.empty()) + return prev; + prev.node = header_.child(0); + } + + if (first == last) { + prev.match = !!to_node_ptr(prev.node)->value(); + prev.leaf = to_node_ptr(prev.node)->empty(); + return prev; + } + + node_t const * node = to_node_ptr(prev.node); + size_type size = prev.size; + while (first != last) { + auto const it = node->find(*first, comp_); + if (it == node->end()) + break; + ++first; + ++size; + node = it->get(); + } + + return match_result{node, size, !!node->value(), node->empty()}; + } + + static match_result back_up_to_match(match_result retval) + { + auto node = to_node_ptr(retval.node); + while (node->parent() && !node->value()) { + retval.node = node = node->parent(); + --retval.size; + } + if (!!node->value()) + retval.match = true; + return retval; + } + + template + node_t * create_children(node_t * node, KeyIter first, Sentinel last) + { + auto retval = node; + for (; first != last; ++first) { + std::unique_ptr new_node(new node_t(retval)); + retval = + retval->insert(*first, comp_, std::move(new_node))->get(); + } + return retval; + } + + template + const_iterator bound_impl(KeyRange const & key) const + { + auto first = detail::begin(key); + auto const last = detail::end(key); + auto match = longest_match_impl(first, last); + if (first == last && match.match) { + auto retval = const_iterator(iter_state_t{ + to_node_ptr(match.node)->parent(), + to_node_ptr(match.node)->index_within_parent()}); + if (!LowerBound) + ++retval; + return retval; + } + + auto node = to_node_ptr(match.node); + if (node->before_child_subtree(*first)) { + // If the next element of the key would be before this node's + // children, use the successor of this node; let + // const_iterator::operator++() figure out for us which node + // that is. + return ++const_iterator( + iter_state_t{node->parent(), node->index_within_parent()}); + } + + auto const it = node->lower_bound(*first, comp_); + if (it == node->end()) { + // Find the max value in this subtree, and go one past that. + do { + node = to_node_ptr(node->max_child()); + } while (!node->value()); + return ++const_iterator( + iter_state_t{node->parent(), node->parent()->size() - 1}); + } + + // Otherwise, find the min value within the child found above. + std::size_t parent_index = it - node->begin(); + node = to_node_ptr(it->get()); + while (!node->value()) { + node = to_node_ptr(node->min_child()); + parent_index = 0; + } + return const_iterator(iter_state_t{node->parent(), parent_index}); + } + + node_t header_; + size_type size_; + key_compare comp_; + +#endif + }; + + template + struct trie_set; + + template + struct const_trie_set_iterator; + + template + struct const_trie_map_iterator : stl_interfaces::iterator_interface< + const_trie_map_iterator, + std::bidirectional_iterator_tag, + trie_element, + trie_element> + { + private: + using state_t = detail::trie_iterator_state_t; + state_t state_; + using ref_type = trie_element; + using ptr_type = stl_interfaces::proxy_arrow_result; + + public: + const_trie_map_iterator() : state_{nullptr, 0} {} + + const_trie_map_iterator(trie_match_result match_result) + { + BOOST_PARSER_ASSERT(match_result.node); + BOOST_PARSER_ASSERT(match_result.match); + auto node = static_cast const *>(match_result.node); + state_.parent_ = node->parent(); + state_.index_ = node->index_within_parent(); + } + + ref_type operator*() const + { + return ref_type{detail::reconstruct_key(state_), + state_.parent_->child_value(state_.index_)}; + } + ptr_type operator->() const + { + ref_type && deref_result = **this; + return ptr_type( + ref_type{std::move(deref_result.key), deref_result.value}); + } + + const_trie_map_iterator & operator++() + { + auto node = to_node(state_); + if (node && !node->empty()) { + state_.parent_ = node; + state_.index_ = 0; + } else { + // Try the next sibling node. + ++state_.index_; + auto const first_state = state_; + while (state_.parent_->parent() && + state_.parent_->parent()->parent() && + state_.parent_->size() <= state_.index_) { + state_ = parent_state(state_); + ++state_.index_; + } + + // If we went all the way up, incrementing indices, and they + // were all at size() for each node, the first increment above + // must have taken us to the end; use that. + if ((!state_.parent_->parent() || + !state_.parent_->parent()->parent()) && + state_.parent_->size() <= state_.index_) { + state_ = first_state; + return *this; + } + } + + node = state_.parent_->child(state_.index_); + while (!node->value()) { + auto i = 0u; + node = node->child(i); + state_ = state_t{node->parent(), i}; + } + + return *this; + } + const_trie_map_iterator & operator--() + { + // Decrement-from-end case. + if (state_.index_ == state_.parent_->size()) { + --state_.index_; + return *this; + } + + // Back up one node at a time until we find an ancestor with a + // value or a previous sibling. + while (state_.parent_->parent() && state_.index_ == 0) { + state_ = parent_state(state_); + if (state_.parent_->child(state_.index_)->value()) + return *this; + } + + // If we get found no value above, go down the maximum subtree of + // the previous sibling. + if (state_.index_) + --state_.index_; + auto node = state_.parent_->child(state_.index_); + while (!node->empty()) { + auto i = node->size() - 1; + node = node->child(i); + state_ = state_t{node->parent(), i}; + } + + return *this; + } + + friend bool operator==( + const_trie_map_iterator lhs, const_trie_map_iterator rhs) + { + return lhs.state_.parent_ == rhs.state_.parent_ && + lhs.state_.index_ == rhs.state_.index_; + } + + using base_type = stl_interfaces::iterator_interface< + const_trie_map_iterator, + std::bidirectional_iterator_tag, + trie_element, + trie_element>; + using base_type::operator++; + using base_type::operator--; + +#ifndef BOOST_PARSER_DOXYGEN + + private: + explicit const_trie_map_iterator(state_t state) : state_(state) {} + + template + friend struct trie_map; + template + friend struct trie_set; + template + friend struct trie_map_iterator; + template + friend struct const_trie_set_iterator; + +#endif + }; + + template + struct trie_map_iterator : stl_interfaces::iterator_interface< + trie_map_iterator, + std::bidirectional_iterator_tag, + trie_element, + trie_element> + { + private: + const_trie_map_iterator it_; + using ref_type = trie_element; + using ptr_type = stl_interfaces::proxy_arrow_result; + + public: + trie_map_iterator() {} + + trie_map_iterator(trie_match_result match_result) : + trie_map_iterator(const_trie_map_iterator(match_result)) + {} + + ref_type operator*() const + { + return ref_type{detail::reconstruct_key(it_.state_), + it_.state_.parent_->child_value(it_.state_.index_)}; + }; + + ptr_type operator->() const + { + ref_type && deref_result = **this; + return ptr_type( + ref_type{std::move(deref_result.key), deref_result.value}); + } + + trie_map_iterator & operator++() + { + ++it_; + return *this; + } + trie_map_iterator & operator--() + { + --it_; + return *this; + } + + friend bool + operator==(trie_map_iterator lhs, trie_map_iterator rhs) + { + return lhs.it_ == rhs.it_; + } + + using base_type = stl_interfaces::iterator_interface< + trie_map_iterator, + std::bidirectional_iterator_tag, + trie_element, + trie_element>; + using base_type::operator++; + using base_type::operator--; + +#ifndef BOOST_PARSER_DOXYGEN + + private: + explicit trie_map_iterator( + detail::trie_iterator_state_t state) : + it_(state) + {} + explicit trie_map_iterator(const_trie_map_iterator it) : + it_(it) + {} + + template + friend struct trie_map; + template + friend struct trie_set; + +#endif + }; + +}} + +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + +namespace std::ranges { + template + inline constexpr bool + enable_borrowed_range> = + true; + template + inline constexpr bool enable_borrowed_range< + boost::parser::detail::text::const_trie_range> = true; +} + +#endif + +#endif diff --git a/include/boost/parser/parser.hpp b/include/boost/parser/parser.hpp index b3402822..980525fe 100644 --- a/include/boost/parser/parser.hpp +++ b/include/boost/parser/parser.hpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include @@ -931,26 +931,6 @@ namespace boost { namespace parser { return *context.callbacks_; } - template - decltype(auto) get_trie( - Context const & context, symbol_parser const & symbol_parser) - { - using trie_t = text::trie, T>; - symbol_table_tries_t & symbol_table_tries = - *context.symbol_table_tries_; - std::any & a = symbol_table_tries[(void *)&symbol_parser]; - if (!a.has_value()) { - a = trie_t{}; - trie_t & trie = *std::any_cast(&a); - for (auto const & e : symbol_parser.initial_elements()) { - trie.insert(e.first | text::as_utf32, e.second); - } - return trie; - } else { - return *std::any_cast(&a); - } - } - // Type traits. @@ -1512,10 +1492,9 @@ namespace boost { namespace parser { using case_fold_array_t = std::array; template - struct no_case_iter : stl_interfaces::iterator_interface< + struct no_case_iter : stl_interfaces::proxy_iterator_interface< no_case_iter, std::forward_iterator_tag, - char32_t, char32_t> { no_case_iter() : it_(), last_(), idx_(0), last_idx_() {} @@ -1545,10 +1524,9 @@ namespace boost { namespace parser { return lhs.it_ == rhs.it_ && lhs.idx_ == rhs.idx_; } - using base_type = stl_interfaces::iterator_interface< + using base_type = stl_interfaces::proxy_iterator_interface< no_case_iter, std::forward_iterator_tag, - char32_t, char32_t>; using base_type::operator++; @@ -1573,6 +1551,66 @@ namespace boost { namespace parser { int last_idx_; }; + template + struct case_fold_view + { + using iterator = + no_case_iter, detail::sentinel_t>; + + case_fold_view(V base) : base_(std::move(base)) {} + + iterator begin() const + { + return iterator( + text::detail::begin(base_), text::detail::end(base_)); + } + auto end() const { return text::detail::end(base_); } + + private: + V base_; + }; + + template + auto get_trie( + Context const & context, symbol_parser const & symbol_parser) + { + using trie_t = text::trie_map, T>; + using result_type = std::pair; + symbol_table_tries_t & symbol_table_tries = + *context.symbol_table_tries_; + + auto & [any, has_case_folded] = + symbol_table_tries[(void *)&symbol_parser]; + + bool const needs_case_folded = context.no_case_depth_; + + if (!any.has_value()) { + any = trie_t{}; + has_case_folded = false; + trie_t & trie = *std::any_cast(&any); + for (auto const & e : symbol_parser.initial_elements()) { + trie.insert(e.first | text::as_utf32, e.second); + if (needs_case_folded) { + trie.insert( + case_fold_view(e.first | text::as_utf32), e.second); + has_case_folded = true; + } + } + return result_type(trie, has_case_folded); + } else { + trie_t & trie = *std::any_cast(&any); + if (needs_case_folded && !has_case_folded) { + trie_t new_trie = trie; + for (auto && [key, value] : trie) { + new_trie.insert( + case_fold_view(key | text::as_utf32), value); + } + std::swap(new_trie, trie); + } + return result_type(trie, has_case_folded); + } + } + template<> struct char_subranges { @@ -4538,9 +4576,13 @@ namespace boost { namespace parser { parser::detail::text::optional_ref find(Context const & context, std::string_view str) const { - parser::detail::text::trie, T> & trie_ = - detail::get_trie(context, ref()); - return trie_[str | detail::text::as_utf32]; + auto [trie, has_case_folded] = detail::get_trie(context, ref()); + if (context.no_case_depth_) { + return trie[detail::case_fold_view( + str | detail::text::as_utf32)]; + } else { + return trie[str | detail::text::as_utf32]; + } } /** Inserts an entry consisting of a UTF-8 string `str` to match, and @@ -4549,9 +4591,14 @@ namespace boost { namespace parser { template void insert(Context const & context, std::string_view str, T && x) const { - parser::detail::text::trie, T> & trie_ = - detail::get_trie(context, ref()); - trie_.insert(str | detail::text::as_utf32, std::move(x)); + auto [trie, has_case_folded] = detail::get_trie(context, ref()); + if (context.no_case_depth_) { + trie.insert( + detail::case_fold_view(str | detail::text::as_utf32), + std::move(x)); + } else { + trie.insert(str | detail::text::as_utf32, std::move(x)); + } } /** Erases the entry whose UTF-8 match string is `str` from the copy @@ -4559,9 +4606,13 @@ namespace boost { namespace parser { template void erase(Context const & context, std::string_view str) const { - parser::detail::text::trie, T> & trie_ = - detail::get_trie(context, ref()); - trie_.erase(str | detail::text::as_utf32); + auto [trie, has_case_folded] = detail::get_trie(context, ref()); + if (context.no_case_depth_) { + trie.erase( + detail::case_fold_view(str | detail::text::as_utf32)); + } else { + trie.erase(str | detail::text::as_utf32); + } } template< @@ -4600,12 +4651,14 @@ namespace boost { namespace parser { [[maybe_unused]] auto _ = detail::scoped_trace( *this, first, last, context, flags, retval); - parser::detail::text::trie, T> const & trie_ = - detail::get_trie(context, ref()); - auto const lookup = trie_.longest_match(first, last); + auto [trie, _0] = detail::get_trie(context, ref()); + auto const lookup = context.no_case_depth_ + ? trie.longest_match(detail::case_fold_view( + BOOST_PARSER_SUBRANGE(first, last))) + : trie.longest_match(first, last); if (lookup.match) { std::advance(first, lookup.size); - detail::assign(retval, T{*trie_[lookup]}); + detail::assign(retval, T{*trie[lookup]}); } else { success = false; } diff --git a/include/boost/parser/parser_fwd.hpp b/include/boost/parser/parser_fwd.hpp index fcfda326..9500ac46 100644 --- a/include/boost/parser/parser_fwd.hpp +++ b/include/boost/parser/parser_fwd.hpp @@ -72,7 +72,7 @@ namespace boost { namespace parser { }; using symbol_table_tries_t = - std::map>; + std::map, std::less>; template< bool DoTrace, diff --git a/test/no_case.cpp b/test/no_case.cpp index 5e454426..6a59a4a9 100644 --- a/test/no_case.cpp +++ b/test/no_case.cpp @@ -168,6 +168,97 @@ TEST(no_case, match_any_within_string) } } +TEST(no_case, symbol_table) +{ + // without mutation + { + symbols const roman_numerals = { + {"I", 1}, {"V", 5}, {"X", 10}, {"L", 50}, {"C", 100}}; + symbols const named_strings = { + {"I", "1"}, {"V", "5"}, {"X", "10"}, {"L", "50"}, {"C", "100"}}; + + { + auto const result = parse("I", no_case[roman_numerals]); + EXPECT_TRUE(result); + EXPECT_EQ(*result, 1); + } + { + auto const result = parse("i", no_case[roman_numerals]); + EXPECT_TRUE(result); + EXPECT_EQ(*result, 1); + } + { + auto const result = parse("I", no_case[named_strings]); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "1"); + } + { + auto const result = parse("i", no_case[named_strings]); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "1"); + } + + { + auto const result = parse("L", no_case[roman_numerals]); + EXPECT_TRUE(result); + EXPECT_EQ(*result, 50); + } + { + auto const result = parse("l", no_case[roman_numerals]); + EXPECT_TRUE(result); + EXPECT_EQ(*result, 50); + } + { + auto const result = parse("L", no_case[named_strings]); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "50"); + } + { + auto const result = parse("l", no_case[named_strings]); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "50"); + } + } + // with mutation + { + symbols roman_numerals; + roman_numerals.insert_for_next_parse("I", 1)("V", 5)("X", 10); + auto const add_numeral = [&roman_numerals](auto & context) { + using namespace boost::parser::literals; + char chars[2] = {get(_attr(context), 0_c), 0}; + roman_numerals.insert(context, chars, get(_attr(context), 1_c)); + }; + auto const numerals_parser = omit[roman_numerals] >> + (char_ >> int_)[add_numeral] >> + no_case[roman_numerals]; + + { + auto const result = parse("VL50L", numerals_parser); + EXPECT_TRUE(result); + EXPECT_EQ(*result, 50); + EXPECT_FALSE(parse("L", roman_numerals)); + } + { + auto const result = parse("VL50l", numerals_parser); + EXPECT_TRUE(result); + EXPECT_EQ(*result, 50); + EXPECT_FALSE(parse("L", roman_numerals)); + } + { + auto const result = parse("VC100C", numerals_parser); + EXPECT_TRUE(result); + EXPECT_EQ(*result, 100); + EXPECT_FALSE(parse("C", roman_numerals)); + } + { + auto const result = parse("Vc100C", numerals_parser); + EXPECT_TRUE(result); + EXPECT_EQ(*result, 100); + EXPECT_FALSE(parse("C", roman_numerals)); + } + } +} + constexpr auto capital_sharp_s = u8"ẞ"; // U+1E9E constexpr auto small_sharp_s = u8"ß"; // U+00DF constexpr auto double_s = u8"sS"; // U+0073 U+0073