From d0208fb12c7459a0b4ad2d4686f349f48674e022 Mon Sep 17 00:00:00 2001 From: Zach Laine Date: Thu, 25 Jan 2024 19:16:07 -0600 Subject: [PATCH] Add transform_replace range adaptor and transform_replace_view. --- doc/parser.qbk | 3 + doc/tutorial.qbk | 69 +- .../parser/detail/text/transcode_view.hpp | 4 +- include/boost/parser/parser.hpp | 23 +- include/boost/parser/replace.hpp | 6 +- include/boost/parser/search.hpp | 7 +- include/boost/parser/split.hpp | 4 +- include/boost/parser/transform_replace.hpp | 835 ++++++++++++++++++ test/CMakeLists.txt | 1 + test/parser.cpp | 93 ++ test/replace.cpp | 6 +- test/split.cpp | 2 +- test/transform_replace.cpp | 835 ++++++++++++++++++ 13 files changed, 1859 insertions(+), 29 deletions(-) create mode 100644 include/boost/parser/transform_replace.hpp create mode 100644 test/transform_replace.cpp diff --git a/doc/parser.qbk b/doc/parser.qbk index 89184cef..e0c1e6e3 100644 --- a/doc/parser.qbk +++ b/doc/parser.qbk @@ -89,6 +89,9 @@ [def _replace_ [globalref boost::parser::replace `boost::parser::replace`]] [def _replace_v_ [classref boost::parser::replace_view `boost::parser::replace_view`]] [def _replace_vs_ [classref boost::parser::replace_view `boost::parser::replace_view`s]] +[def _trans_replace_ [globalref boost::parser::transform_replace `boost::parser::transform_replace`]] +[def _trans_replace_v_ [classref boost::parser::transform_replace_view `boost::parser::transform_replace_view`]] +[def _trans_replace_vs_ [classref boost::parser::transform_replace_view `boost::parser::transform_replace_view`s]] [def _std_str_ `std::string`] diff --git a/doc/tutorial.qbk b/doc/tutorial.qbk index 38eeef35..01b30459 100644 --- a/doc/tutorial.qbk +++ b/doc/tutorial.qbk @@ -3122,10 +3122,10 @@ range. } assert(replace_result == "fooaafoobaabafoofoo"); -Note that we could not have written `std::string replace_result(r.begin(), +Note that we could *not* have written `std::string replace_result(r.begin(), r.end())`. This is ill-formed because the `std::string` range constructor -takes two iterators, but `decltype(rng.end())` is a sentinel type different -from `decltype(rng.begin())`. +takes two iterators of the same type, but `decltype(rng.end())` is a sentinel +type different from `decltype(rng.begin())`. Though the ranges `r` and `replacement` can both be C-style strings, _replace_v_ must know the end of `replacement` before it does any work. This @@ -3145,12 +3145,11 @@ sequences. So calls like this won't work: char const str[] = "some text"; char const replacement_str[] = "some text"; using namespace bp = boost::parser; - auto r = empty_str | bp::replace(parser, replacement_str | bp::as_utf8); + auto r = empty_str | bp::replace(parser, replacement_str | bp::as_utf8); // Error: ill-formed! Can't mix plain-char inputs and UTF replacements. -Notice that this does not work, even though `char` and UTF-8 are the same -size. If `r` and `replacement` are both sequences of `char`, everything will -work of course. It's just mixing `char` and UTF-encoded sequences that does -not work. +This does not work, even though `char` and UTF-8 are the same size. If `r` +and `replacement` are both sequences of `char`, everything will work of +course. It's just mixing `char` and UTF-encoded sequences that does not work. All the details called out in the subsection on _search_ above apply to _replace_: its parser produces no attributes; it accepts C-style strings for @@ -3162,6 +3161,60 @@ _replace_ can be called with, and _replace_v_ can be constructed with, a skip parser or not, and you can always pass _trace_ at the end of any of their overloads. +[heading _trans_replace_] + +[important _trans_replace_ and _trans_replace_v_ are not available on MSVC in +C++17 mode.] + +_trans_replace_ creates _trans_replace_vs_. _trans_replace_v_ is a +`std::views`-style view. It produces a range of subranges from the parsed +range `r` and the given invocable `f`. Wherever in the parsed range a match +to the given parser `parser` is found, let `parser`'s attribute be `attr`; +`f(std::move(attr))` is the subrange produced. Each subrange of `r` that does +not match `parser` is produced as a subrange as well. The subranges are +produced in the order in which they occur in `r`. Unlike _split_v_, +_trans_replace_v_ does not produce empty subranges, unless +`f(std::move(attr))` is empty. Here is an example. + + auto string_sum = [](std::vector const & ints) { + return std::to_string(std::accumulate(ints.begin(), ints.end(), 0)); + }; + + auto rng = "There are groups of [1, 2, 3, 4, 5] in the set." | + bp::transform_replace('[' >> bp::int_ % ',' >> ']', bp::ws, string_sum); + int count = 0; + // Prints "There are groups of 15 in the set". + for (auto subrange : rng) { + for (auto ch : subrange) { + std::cout << ch; + } + ++count; + } + std::cout << "\n"; + assert(count == 3); + +Let the type `decltype(f(std::move(attr)))` be `Replacement`. `Replacement` +must be a range, and must be compatible with `r`. See the description of +_replace_v_'s iterator compatibility requirements in the section above for +details. + +Just like _replace_ and _replace_v_, _trans_replace_ and _trans_replace_v_ do +silent transcoding of the result to the appropriate UTF, if applicable. If +both `r` and `f(std::move(attr))` are ranges of `char`, or are both the same +UTF, no transcoding occurs. If one of `r` and `f(std::move(attr))` is a range +of `char` and the other is some UTF, the program is ill-formed. + +_trans_replace_v_ will move each attribute into `f`; `f` may move from the +argument or copy it as desired. `f` may return an lvalue reference. If it +does so, the address of the reference will be taken and stored within +_trans_replace_v_. Otherwise, the value returned by `f` is moved into +_trans_replace_v_. In either case, the value type of _trans_replace_v_ is +always a subrange. + +_trans_replace_ can be called with, and _trans_replace_v_ can be constructed +with, a skip parser or not, and you can always pass _trace_ at the end of any +of their overloads. + [endsect] [section Unicode Support] diff --git a/include/boost/parser/detail/text/transcode_view.hpp b/include/boost/parser/detail/text/transcode_view.hpp index 04a20c4f..92f439a8 100644 --- a/include/boost/parser/detail/text/transcode_view.hpp +++ b/include/boost/parser/detail/text/transcode_view.hpp @@ -518,11 +518,9 @@ namespace boost::parser::detail { namespace text { } public: - constexpr utf_view() #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - requires std::default_initializable + constexpr utf_view() requires std::default_initializable = default; #endif - = default; constexpr utf_view(V base) : base_{std::move(base)} {} constexpr V base() const & diff --git a/include/boost/parser/parser.hpp b/include/boost/parser/parser.hpp index da3b18d7..b8bc9d68 100644 --- a/include/boost/parser/parser.hpp +++ b/include/boost/parser/parser.hpp @@ -1807,8 +1807,8 @@ namespace boost { namespace parser { } template - constexpr void move_back( - Container & c, std::optional & x, bool gen_attrs) + constexpr void + move_back(Container & c, std::optional && x, bool gen_attrs) { if (!gen_attrs || !x) return; @@ -1824,7 +1824,10 @@ namespace boost { namespace parser { detail::move_back_impl(c, std::move(*x)); } - template + template< + typename Container, + typename T, + typename Enable = std::enable_if_t>> constexpr void move_back(Container & c, std::optional && x, bool gen_attrs) { @@ -2892,6 +2895,7 @@ namespace boost { namespace parser { success); if (!success) { success = true; + first = prev_first; break; } } @@ -3998,11 +4002,18 @@ namespace boost { namespace parser { } return; } - if constexpr (out_container) { + using just_x = attr_t; + using just_out = detail::remove_cv_ref_t; + if constexpr ( + (!out_container || + !std::is_same_v) && + std::is_assignable_v && + (!std::is_same_v || + !std::is_integral_v)) { + detail::assign(out, std::move(x)); + } else { detail::move_back( out, std::move(x), detail::gen_attrs(flags)); - } else { - detail::assign(out, std::move(x)); } } }; diff --git a/include/boost/parser/replace.hpp b/include/boost/parser/replace.hpp index 4ccca6f5..2db07a89 100644 --- a/include/boost/parser/replace.hpp +++ b/include/boost/parser/replace.hpp @@ -8,8 +8,6 @@ namespace boost::parser { - // TODO: transform_replace. - namespace detail { template>> constexpr auto range_value_type = @@ -521,7 +519,7 @@ namespace boost::parser { std::ranges::viewable_range) && // clang-format on can_replace_view< - decltype(to_range::call(std::declval())), + to_range_t, decltype(to_range< ReplacementR, true, @@ -567,7 +565,7 @@ namespace boost::parser { std::ranges::viewable_range) && // clang-format on can_replace_view< - decltype(to_range::call(std::declval())), + to_range_t, decltype(to_range< ReplacementR, true, diff --git a/include/boost/parser/search.hpp b/include/boost/parser/search.hpp index 4e9e510e..3df2a567 100644 --- a/include/boost/parser/search.hpp +++ b/include/boost/parser/search.hpp @@ -82,6 +82,9 @@ namespace boost::parser { } }; + template + using to_range_t = decltype(to_range::call(std::declval())); + struct phony {}; @@ -533,7 +536,7 @@ namespace boost::parser { std::is_pointer_v> || std::ranges::viewable_range) && can_search_all_view< - decltype(to_range::call(std::declval())), + to_range_t, Parser, GlobalState, ErrorHandler, @@ -560,7 +563,7 @@ namespace boost::parser { std::is_pointer_v> || std::ranges::viewable_range) && can_search_all_view< - decltype(to_range::call(std::declval())), + to_range_t, Parser, GlobalState, ErrorHandler, diff --git a/include/boost/parser/split.hpp b/include/boost/parser/split.hpp index 92d17103..31a5ce72 100644 --- a/include/boost/parser/split.hpp +++ b/include/boost/parser/split.hpp @@ -262,7 +262,7 @@ namespace boost::parser { std::is_pointer_v> || std::ranges::viewable_range) && can_split_view< - decltype(to_range::call(std::declval())), + to_range_t, Parser, GlobalState, ErrorHandler, @@ -289,7 +289,7 @@ namespace boost::parser { std::is_pointer_v> || std::ranges::viewable_range) && can_split_view< - decltype(to_range::call(std::declval())), + to_range_t, Parser, GlobalState, ErrorHandler, diff --git a/include/boost/parser/transform_replace.hpp b/include/boost/parser/transform_replace.hpp new file mode 100644 index 00000000..e4b6b616 --- /dev/null +++ b/include/boost/parser/transform_replace.hpp @@ -0,0 +1,835 @@ +#ifndef BOOST_PARSER_TRANSFORM_REPLACE_HPP +#define BOOST_PARSER_TRANSFORM_REPLACE_HPP + +#include + +#if !defined(_MSC_VER) || BOOST_PARSER_USE_CONCEPTS + + +namespace boost::parser { + + namespace detail { + + template + using attr_type = decltype(std::declval().call( + std::bool_constant{}, + std::declval(), + std::declval(), + std::declval>(), + ws, + detail::default_flags(), + std::declval())); + template + using range_attr_t = attr_type, sentinel_t, Parser>; + +#if BOOST_PARSER_USE_CONCEPTS + // clang-format off + template + concept transform_replacement_for = + std::regular_invocable> && + detail::replacement_for< + std::invoke_result_t>, V> && + (detail::range_utf_format_v == + detail::range_utf_format_v< + std::invoke_result_t>>); + // clang-format on +#else + template + using transform_replacement_for_expr = decltype(std::declval()( + std::declval>())); + template< + typename F, + typename V, + typename Parser, + bool = is_detected_v> + constexpr bool transform_replacement_for = false; + template + constexpr bool transform_replacement_for = + replacement_for, V> && + (detail::range_utf_format_v == + detail::range_utf_format_v< + transform_replacement_for_expr>); +#endif + + template< + typename R, + typename Result, + text::format OtherFormat = range_utf_format_v>, + text::format Format = range_utf_format_v>> + struct utf_wrap + { + template + static auto call(R_ && r) + { + return (R_ &&) r | as_utf; + } + }; + template + struct utf_wrap + { + template + static R_ && call(R_ && r) + { + return (R_ &&) r; + } + }; + template + struct utf_wrap + { + template + static R_ && call(R_ && r) + { + return (R_ &&) r; + } + }; + template + struct utf_wrap + { + // Looks like you tried to use transform_replace() to replace + // subranges of chars with subranges of some UTF-N (for N=8, 16, + // or 32). Transcoding from char (unkown encoding) is not + // supported. Check the return type of your transform function. + }; + template + struct utf_wrap + { + // Looks like you tried to use transform_replace() to replace + // subranges of some UTF-N (for N=8, 16, or 32) with subranges of + // chars. Transcoding to char (unkown encoding) is not supported. + // Check the return type of your transform function. + }; + + template + struct regular_ref_wrapper + { + regular_ref_wrapper() = default; + regular_ref_wrapper(T & ref) : ptr_(&ref) {} + + T & get() const { return *ptr_; } + + T * ptr_; + }; + + // This type catches results of calling F, to accommodate when F + // returns an rvalue or a type that needs to be transcoded to a + // different UTF. + template + struct utf_rvalue_shim + { + using result_type = std::invoke_result_t; + using maybe_wrapped_result_type = + decltype(utf_wrap::call( + std::declval())); + static constexpr bool final_type_is_reference = + std::is_lvalue_reference_v; + using final_type = std::conditional_t< + final_type_is_reference, + regular_ref_wrapper< + std::remove_reference_t>, + remove_cv_ref_t>; + + template + utf_rvalue_shim(F_ && f) : f_((F_ &&) f) + {} + + // These two only have return values for testing and metaprogramming + // purposes. + template< + bool B = final_type_is_reference, + typename Enable = std::enable_if_t> + decltype(auto) operator()(Attr && attr) const + { + result_ = final_type( + utf_wrap::call((*f_)((Attr &&) attr))); + return result_->get(); + } + template< + bool B = final_type_is_reference, + typename Enable = std::enable_if_t> + decltype(auto) operator()(Attr && attr) + { + result_ = final_type( + utf_wrap::call((*f_)((Attr &&) attr))); + return result_->get(); + } + template< + bool B = final_type_is_reference, + typename Enable = std::enable_if_t> + final_type & operator()(Attr && attr) const + { + result_ = utf_wrap::call((*f_)((Attr &&) attr)); + return *result_; + } + template< + bool B = final_type_is_reference, + typename Enable = std::enable_if_t> + final_type & operator()(Attr && attr) + { + result_ = utf_wrap::call((*f_)((Attr &&) attr)); + return *result_; + } + + template< + bool B = final_type_is_reference, + typename Enable = std::enable_if_t> + decltype(auto) get() const + { + return result_->get(); + } + template< + bool B = final_type_is_reference, + typename Enable = std::enable_if_t> + decltype(auto) get() + { + return result_->get(); + } + template< + bool B = final_type_is_reference, + typename Enable = std::enable_if_t> + final_type & get() const + { + return *result_; + } + template< + bool B = final_type_is_reference, + typename Enable = std::enable_if_t> + final_type & get() + { + return *result_; + } + + std::optional f_; + mutable std::optional result_; + }; + + template< + typename R, + typename Parser, + typename GlobalState, + typename ErrorHandler, + typename SkipParser> + auto attr_search_impl( + R && r, + parser_interface const & parser, + parser_interface const & skip, + trace trace_mode) + { + auto first = text::detail::begin(r); + auto const last = text::detail::end(r); + + auto match_first = first; + auto match_last = first; + auto before = [&match_first](auto & ctx) { + match_first = _where(ctx).begin(); + }; + auto after = [&match_last](auto & ctx) { + match_last = _where(ctx).begin(); + }; + + auto const search_parser = + omit[*(char_ - parser)] >> + -lexeme[eps[before] >> parser::skip[parser] >> eps[after]]; + + using parse_result_outer = decltype(parser::prefix_parse( + first, last, search_parser, trace_mode)); + + static_assert( + !std::is_same_v, + "If you're seeing this error, you passed a parser to " + "transform_replace() that has no attribute. Please fix."); + + using parse_result = + remove_cv_ref_t())>; + + using return_tuple = tuple< + decltype(BOOST_PARSER_SUBRANGE(first, first)), + parse_result>; + + if (first == last) { + return return_tuple( + BOOST_PARSER_SUBRANGE(first, first), parse_result{}); + } + + if constexpr (std::is_same_v>) { + auto result = parser::prefix_parse( + first, last, search_parser, trace_mode); + if (*result) { + return return_tuple( + BOOST_PARSER_SUBRANGE(match_first, match_last), + std::move(**result)); + } + } else { + auto result = parser::prefix_parse( + first, last, search_parser, skip, trace_mode); + if (*result) { + return return_tuple( + BOOST_PARSER_SUBRANGE(match_first, match_last), + std::move(**result)); + } + } + + return return_tuple( + BOOST_PARSER_SUBRANGE(first, first), parse_result{}); + } + + template< + typename R, + typename Parser, + typename GlobalState, + typename ErrorHandler, + typename SkipParser> + auto attr_search_repack_shim( + R && r, + parser_interface const & parser, + parser_interface const & skip, + trace trace_mode) + { + using value_type = range_value_t; + if constexpr (std::is_same_v) { + return detail::attr_search_impl( + (R &&) r, parser, skip, trace_mode); + } else { + auto r_unpacked = detail::text::unpack_iterator_and_sentinel( + text::detail::begin(r), text::detail::end(r)); + auto result = detail::attr_search_impl( + r | as_utf32, parser, skip, trace_mode); + auto subrng = parser::get(result, llong<0>{}); + auto & attr = parser::get(result, llong<1>{}); + return tuple< + decltype(BOOST_PARSER_SUBRANGE( + r_unpacked.repack(subrng.begin().base()), + r_unpacked.repack(subrng.end().base()))), + remove_cv_ref_t>( + BOOST_PARSER_SUBRANGE( + r_unpacked.repack(subrng.begin().base()), + r_unpacked.repack(subrng.end().base())), + std::move(attr)); + } + } + } + + /** Produces a range of subranges of a given range `base`. Each subrange + is either a subrange of `base` that does not match the given parser + `parser`, or is `f(*boost::parser::parse(match, parser))`, where `f` + is the given invocable and `match` is the matching subrange. */ + template< +#if BOOST_PARSER_USE_CONCEPTS + std::ranges::viewable_range V, + std::move_constructible F, +#else + typename V, + typename F, +#endif + typename Parser, + typename GlobalState, + typename ErrorHandler, + typename SkipParser +#if !BOOST_PARSER_USE_CONCEPTS + , + typename Enable = + std::enable_if_t> +#endif + > +#if BOOST_PARSER_USE_CONCEPTS + requires detail::transform_replacement_for +#endif + struct transform_replace_view + : detail::stl_interfaces::view_interface> + { + //private: + using attr_t = detail::range_attr_t; + using replacement_range = std::invoke_result_t; + + public: + constexpr transform_replace_view() = default; + constexpr transform_replace_view( + V base, + parser_interface const & parser, + parser_interface const & skip, + F f, + trace trace_mode = trace::off) : + base_(std::move(base)), + f_(std::move(f)), + parser_(parser), + skip_(skip), + trace_mode_(trace_mode) + {} + constexpr transform_replace_view( + V base, + parser_interface const & parser, + F f, + trace trace_mode = trace::off) : + base_(std::move(base)), + f_(std::move(f)), + parser_(parser), + skip_(), + trace_mode_(trace_mode) + {} + + constexpr V base() const & +#if BOOST_PARSER_USE_CONCEPTS + requires std::copy_constructible +#endif + { + return base_; + } + constexpr V base() && { return std::move(base_); } + + constexpr F const & f() const { return *f_.f_; } + + constexpr auto begin() { return iterator{this}; } + constexpr auto end() { return sentinel{}; } + + constexpr auto begin() const +#if BOOST_PARSER_USE_CONCEPTS + requires std::ranges::range +#endif + { + return iterator{this}; + } + constexpr auto end() const +#if BOOST_PARSER_USE_CONCEPTS + requires std::ranges::range +#endif + { + return sentinel{}; + } + + template + struct sentinel + {}; + + template + struct iterator + : detail::stl_interfaces::proxy_iterator_interface< + iterator, + std::forward_iterator_tag, + BOOST_PARSER_SUBRANGE, + detail::maybe_const>>> + { + using I = detail::iterator_t>; + using S = detail::sentinel_t>; + + using ref_t_iter = detail::either_iterator< + detail::maybe_const, + detail::maybe_const>; + using reference_type = BOOST_PARSER_SUBRANGE; + + constexpr iterator() = default; + constexpr iterator( + detail::maybe_const * parent) : + parent_(parent), + r_(parent_->base_.begin(), parent_->base_.end()), + curr_(r_.begin(), r_.begin()), + next_it_(r_.begin()), + in_match_(true) + { + ++*this; + } + + constexpr iterator & operator++() + { + if (in_match_) { + r_ = BOOST_PARSER_SUBRANGE(next_it_, r_.end()); + auto new_match_and_attr = detail::attr_search_repack_shim( + r_, + parent_->parser_, + parent_->skip_, + parent_->trace_mode_); + auto const new_match = + parser::get(new_match_and_attr, llong<0>{}); + parent_->f_( + parser::get(std::move(new_match_and_attr), llong<1>{})); + if (new_match.begin() == curr_.end()) { + curr_ = new_match; + } else { + curr_ = + BOOST_PARSER_SUBRANGE(next_it_, new_match.begin()); + in_match_ = false; + } + next_it_ = new_match.end(); + } else { + if (!curr_.empty()) { + curr_ = BOOST_PARSER_SUBRANGE(curr_.end(), next_it_); + in_match_ = true; + } + if (curr_.empty()) + r_ = BOOST_PARSER_SUBRANGE(next_it_, r_.end()); + } + return *this; + } + + constexpr reference_type operator*() const + { + if (in_match_) { + return reference_type( + ref_t_iter(parent_->f_.get().begin()), + ref_t_iter(parent_->f_.get().end())); + } else { + return reference_type( + ref_t_iter(curr_.begin()), ref_t_iter(curr_.end())); + } + } + + friend constexpr bool operator==(iterator lhs, iterator rhs) + { + return lhs.r_.begin() == rhs.r_.begin(); + } + friend constexpr bool operator==(iterator it, sentinel) + { + return it.r_.begin() == it.r_.end(); + } + + using base_type = detail::stl_interfaces::proxy_iterator_interface< + iterator, + std::forward_iterator_tag, + reference_type>; + using base_type::operator++; + + private: + detail::maybe_const * parent_; + BOOST_PARSER_SUBRANGE r_; + BOOST_PARSER_SUBRANGE curr_; + I next_it_; + bool in_match_; + }; + + template + friend struct iterator; + + private: + V base_; + F f_; + parser_interface parser_; + parser_interface skip_; + trace trace_mode_; + }; + + // deduction guides + template< + typename V, + typename F, + typename Parser, + typename GlobalState, + typename ErrorHandler, + typename SkipParser> + transform_replace_view( + V &&, + parser_interface, + parser_interface, + F &&, + trace) + -> transform_replace_view< + detail::text::detail::all_t, + detail::remove_cv_ref_t, + Parser, + GlobalState, + ErrorHandler, + SkipParser>; + + template< + typename V, + typename F, + typename Parser, + typename GlobalState, + typename ErrorHandler, + typename SkipParser> + transform_replace_view( + V &&, + parser_interface, + parser_interface, + F &&) + -> transform_replace_view< + detail::text::detail::all_t, + detail::remove_cv_ref_t, + Parser, + GlobalState, + ErrorHandler, + SkipParser>; + + template< + typename V, + typename F, + typename Parser, + typename GlobalState, + typename ErrorHandler> + transform_replace_view( + V &&, parser_interface, F &&, trace) + -> transform_replace_view< + detail::text::detail::all_t, + detail::remove_cv_ref_t, + Parser, + GlobalState, + ErrorHandler, + parser_interface>>; + + template< + typename V, + typename F, + typename Parser, + typename GlobalState, + typename ErrorHandler> + transform_replace_view( + V &&, parser_interface, F &&) + -> transform_replace_view< + detail::text::detail::all_t, + detail::remove_cv_ref_t, + Parser, + GlobalState, + ErrorHandler, + parser_interface>>; + + namespace detail { + template< + typename V, + typename F, + typename Parser, + typename GlobalState, + typename ErrorHandler, + typename SkipParser> + using transform_replace_view_expr = decltype(transform_replace_view< + V, + F, + Parser, + GlobalState, + ErrorHandler, + SkipParser>( + std::declval(), + std::declval< + parser_interface const &>(), + std::declval const &>(), + std::declval(), + trace::on)); + + template< + typename V, + typename F, + typename Parser, + typename GlobalState, + typename ErrorHandler, + typename SkipParser> + constexpr bool can_transform_replace_view = is_detected_v< + transform_replace_view_expr, + V, + F, + Parser, + GlobalState, + ErrorHandler, + SkipParser>; + + struct transform_replace_impl + { +#if BOOST_PARSER_USE_CONCEPTS + + template< + parsable_range_like R, + std::move_constructible F, + typename Parser, + typename GlobalState, + typename ErrorHandler, + typename SkipParser> + requires + // clang-format off + (std::is_pointer_v> || + std::ranges::viewable_range) && + std::regular_invocable< + F &, + range_attr_t, Parser>> && + // clang-format on + can_transform_replace_view< + to_range_t, + utf_rvalue_shim< + to_range_t, + std::remove_cvref_t, + range_attr_t, Parser>>, + Parser, + GlobalState, + ErrorHandler, + SkipParser> + // clang-format off + [[nodiscard]] constexpr auto operator()( + R && r, + parser_interface const & + parser, + parser_interface const & skip, + F && f, + trace trace_mode = trace::off) const + // clang-format on + { + return transform_replace_view( + to_range::call((R &&) r), + parser, + skip, + utf_rvalue_shim< + to_range_t, + std::remove_cvref_t, + range_attr_t, Parser>>((F &&) f), + trace_mode); + } + + template< + parsable_range_like R, + std::move_constructible F, + typename Parser, + typename GlobalState, + typename ErrorHandler> + requires + // clang-format off + (std::is_pointer_v> || + std::ranges::viewable_range) && + std::regular_invocable< + F &, + range_attr_t, Parser>> && + // clang-format on + can_transform_replace_view< + to_range_t, + utf_rvalue_shim< + to_range_t, + std::remove_cvref_t, + range_attr_t, Parser>>, + Parser, + GlobalState, + ErrorHandler, + parser_interface>> + // clang-format off + [[nodiscard]] constexpr auto operator()( + R && r, + parser_interface const & + parser, + F && f, + trace trace_mode = trace::off) const + // clang-format on + { + return (*this)( + (R &&) r, + parser, + parser_interface>{}, + (F &&) f, + trace_mode); + } + +#else + + template< + typename R, + typename Parser, + typename GlobalState, + typename ErrorHandler, + typename SkipParser, + typename F = trace, + typename Trace = trace, + typename Enable = std::enable_if_t>> + [[nodiscard]] constexpr auto operator()( + R && r, + parser_interface const & + parser, + SkipParser && skip, + F && f = F{}, + Trace trace_mode = Trace{}) const + { + if constexpr ( + is_parser_iface> && + std::is_invocable_v< + F &, + range_attr_t, Parser>> && + std::is_same_v) { + // (r, parser, skip, f, trace) case + return impl( + to_range::call((R &&) r), + parser, + skip, + (F &&) f, + trace_mode); + } else if constexpr ( + std::is_invocable_v< + SkipParser &, + range_attr_t, Parser>> && + std::is_same_v, trace> && + std::is_same_v) { + // (r, parser, f, trace) case + return impl( + to_range::call((R &&) r), + parser, + parser_interface>{}, + (SkipParser &&) skip, + f); + } else { + static_assert( + sizeof(R) == 1 && false, + "Only the signatures replace(R, parser, skip, " + "replcement trace = trace::off) and replace(R, parser, " + "f, trace = trace::off) are supported."); + } + } + + private: + template< + typename R, + typename F, + typename Parser, + typename GlobalState, + typename ErrorHandler, + typename SkipParser> + [[nodiscard]] constexpr auto impl( + R && r, + parser_interface const & + parser, + parser_interface const & skip, + F && f, + trace trace_mode = trace::off) const + { + return transform_replace_view( + (R &&) r, + parser, + skip, + utf_rvalue_shim< + R, + remove_cv_ref_t, + range_attr_t>((F &&) f), + trace_mode); + } + +#endif + }; + } + + /** A range adaptor object ([range.adaptor.object]). Given subexpressions + `E` and `P`, `Q`, `R`, and 'S', each of the expressions `replace(E, + P)`, `replace(E, P, Q)`. `replace(E, P, Q, R)`, and `replace(E, P, Q, + R, S)` are expression-equivalent to `replace_view(E, P)`, + `replace_view(E, P, Q)`, `replace_view(E, P, Q, R)`, `replace_view(E, + P, Q, R, S)`, respectively. */ + inline constexpr detail::stl_interfaces::adaptor< + detail::transform_replace_impl> + transform_replace = detail::transform_replace_impl{}; + +#endif +} + +// TODO: Conditional borrowability. +#if 0 // BOOST_PARSER_USE_CONCEPTS +template< + typename V, + typename F, + typename Parser, + typename GlobalState, + typename ErrorHandler, + typename SkipParser> +constexpr bool std::ranges::enable_borrowed_range> = + enable_borrowed_range && enable_borrowed_range; +#endif + +#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9b7519cc..eb12a0f0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -50,6 +50,7 @@ add_test_executable(all_t) add_test_executable(search) add_test_executable(split) add_test_executable(replace) +add_test_executable(transform_replace) add_test_executable(hl) add_test_executable(aggr_tuple_assignment) add_test_executable(parser_lazy_params) diff --git a/test/parser.cpp b/test/parser.cpp index 1f799cbc..11d564fb 100644 --- a/test/parser.cpp +++ b/test/parser.cpp @@ -1852,6 +1852,99 @@ TEST(parser, lexeme) } } } + + { + auto const parser = string("abc"); + + // Follows the parser used in transform_replace(). + auto before = [&](auto & ctx) {}; + auto after = [&](auto & ctx) {}; + auto const search_parser = + omit[*(char_ - parser)] >> + -lexeme[eps[before] >> skip[parser] >> eps[after]]; + + { + std::string str = "abc"; + std::optional result; + EXPECT_TRUE(parse(str, search_parser, char_(' '), result)); + EXPECT_EQ(*result, "abc"); + + { + std::string str = "abc"; + auto first = detail::text::detail::begin(str); + auto last = detail::text::detail::end(str); + auto const result = + prefix_parse(first, last, search_parser, char_(' ')); + static_assert(std::is_same_v< + decltype(result), + std::optional> const>); + EXPECT_TRUE(result); + EXPECT_EQ(**result, "abc"); + } + } + { + std::string str = " abc"; + std::optional result; + EXPECT_TRUE(parse(str, search_parser, char_(' '), result)); + EXPECT_EQ(*result, "abc"); + + { + std::string str = " abc"; + auto const result = parse(str, search_parser, char_(' ')); + static_assert(std::is_same_v< + decltype(result), + std::optional> const>); + EXPECT_TRUE(result); + EXPECT_EQ(**result, "abc"); + } + } + } + + { + auto const parser = int_ % ','; + + // Follows the parser used in transform_replace(). + auto before = [&](auto & ctx) {}; + auto after = [&](auto & ctx) {}; + auto const search_parser = + omit[*(char_ - parser)] >> + -lexeme[eps[before] >> skip[parser] >> eps[after]]; + + { + std::string str = "1, 2, 4"; + std::optional> result; + EXPECT_TRUE(parse(str, search_parser, char_(' '), result)); + EXPECT_EQ(*result, std::vector({1, 2, 4})); + + { + std::string str = "1, 2, 4"; + auto const result = parse(str, search_parser, char_(' ')); + static_assert( + std::is_same_v< + decltype(result), + std::optional>> const>); + EXPECT_TRUE(result); + EXPECT_EQ(**result, std::vector({1, 2, 4})); + } + } + { + std::string str = " 1, 2, 4"; + std::optional> result; + EXPECT_TRUE(parse(str, search_parser, char_(' '), result)); + EXPECT_EQ(*result, std::vector({1, 2, 4})); + + { + std::string str = " 1, 2, 4"; + auto const result = parse(str, search_parser, char_(' ')); + static_assert( + std::is_same_v< + decltype(result), + std::optional>> const>); + EXPECT_TRUE(result); + EXPECT_EQ(**result, std::vector({1, 2, 4})); + } + } + } } TEST(parser, skip) diff --git a/test/replace.cpp b/test/replace.cpp index 562a8805..b9988b1a 100644 --- a/test/replace.cpp +++ b/test/replace.cpp @@ -140,7 +140,7 @@ TEST(replace, replace) #endif { char const str[] = "aaXYZbaabaXYZ"; - auto r = str | bp::replace(bp::lit("XYZ"), "foo"); + const auto r = str | bp::replace(bp::lit("XYZ"), "foo"); int count = 0; std::string_view const strs[] = {"aa", "foo", "baaba", "foo"}; for (auto subrange : r) { @@ -385,8 +385,8 @@ TEST(replace, join_compat) { char const str[] = "XYZXYZaaXYZbaabaXYZXYZ"; auto rng = str | bp::as_utf32 | - bp::replace(bp::lit("XYZ"), "foo" | bp::as_utf8) | - std::views::join; + bp::replace(bp::lit("XYZ"), "foo" | bp::as_utf8) | + std::views::join; std::string replace_result; for (auto ch : rng) { static_assert(std::is_same_v); diff --git a/test/split.cpp b/test/split.cpp index dd79a637..8ea55c64 100644 --- a/test/split.cpp +++ b/test/split.cpp @@ -189,7 +189,7 @@ TEST(split, split_unicode) { char const str_[] = "aaXYZbaabaXYZ"; auto str = str_ | bp::as_utf8; - auto r = str | bp::split(bp::lit("XYZ"), bp::trace::off); + const auto r = str | bp::split(bp::lit("XYZ"), bp::trace::off); int count = 0; int const offsets[] = {0, 2, 5, 10, 13, 13}; for (auto subrange : r) { diff --git a/test/transform_replace.cpp b/test/transform_replace.cpp new file mode 100644 index 00000000..74d5cb5c --- /dev/null +++ b/test/transform_replace.cpp @@ -0,0 +1,835 @@ +/** + * Copyright (C) 2024 T. Zachary Laine + * + * Distributed under the Boost Software License, Version 1.0. (See + * accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ + +#include + +#include + +#include "ill_formed.hpp" + +#include + +#if !defined(_MSC_VER) || BOOST_PARSER_USE_CONCEPTS + +namespace bp = boost::parser; + +auto f_str = [](std::vector const & ints) { + std::string retval; + for (auto x : ints) { + retval += std::to_string(x); + retval += '_'; + } + return retval; +}; +auto f_str_ref = [](std::vector const & ints) -> std::string & { + static std::string retval; + for (auto x : ints) { + retval += std::to_string(x); + retval += '_'; + } + return retval; +}; + +#if BOOST_PARSER_USE_CONCEPTS +namespace deduction { + using namespace std::literals; + std::string str; + auto const parser = bp::int_ % ','; + auto const skip = bp::ws; + using attr_t = std::vector; + + auto deduced_1 = bp::transform_replace_view( + str, + parser, + skip, + bp::detail::utf_rvalue_shim( + f_str), + bp::trace::on); + auto deduced_2 = bp::transform_replace_view( + str, + parser, + skip, + bp::detail::utf_rvalue_shim( + f_str)); + auto deduced_3 = bp::transform_replace_view( + str, + parser, + bp::detail::utf_rvalue_shim( + f_str), + bp::trace::on); + auto deduced_4 = bp::transform_replace_view( + str, + parser, + bp::detail::utf_rvalue_shim( + f_str)); +} +#endif + +namespace detail_attr_type { + constexpr auto int_char_p = bp::int_ >> bp::char_; + + static_assert( + std::is_same_v< + bp::detail::range_attr_t, + bp::tuple>); + + static_assert( + std::is_same_v< + bp::detail:: + range_attr_t, + bp::tuple>); + + constexpr auto ints_p = *bp::int_; + static_assert( + std::is_same_v< + bp::detail::range_attr_t, + std::vector>); +} + +#if defined(__cpp_char8_t) +auto f_u8str = [](std::vector ints) { + std::u8string retval; + for (auto x : ints) { + auto const s = std::to_string(x); + retval.insert(retval.end(), s.begin(), s.end()); + retval += '_'; + } + return retval; +}; +// NOTE: *const* & return type! +auto f_u8str_ref = [](std::vector ints) -> std::u8string const & { + static std::u8string retval; + for (auto x : ints) { + auto const s = std::to_string(x); + retval.insert(retval.end(), s.begin(), s.end()); + retval += '_'; + } + return retval; +}; +#endif +auto f_u16str = [](std::vector ints) { + std::u16string retval; + for (auto x : ints) { + auto const s = std::to_string(x); + retval.insert(retval.end(), s.begin(), s.end()); + retval += '_'; + } + return retval; +}; +auto f_u16str_ref = [](std::vector ints) -> std::u16string & { + static std::u16string retval; + for (auto x : ints) { + auto const s = std::to_string(x); + retval.insert(retval.end(), s.begin(), s.end()); + retval += '_'; + } + return retval; +}; +auto f_u32str = [](std::vector ints) { + std::u32string retval; + for (auto x : ints) { + auto const s = std::to_string(x); + retval.insert(retval.end(), s.begin(), s.end()); + retval += '_'; + } + return retval; +}; +auto f_u32str_ref = [](std::vector ints) -> std::u32string & { + static std::u32string retval; + for (auto x : ints) { + auto const s = std::to_string(x); + retval.insert(retval.end(), s.begin(), s.end()); + retval += '_'; + } + return retval; +}; + +namespace detail_utf_rvalue_shim { + constexpr auto ints_p = *bp::int_; + + using attr_t = std::vector; + + // char -> char + + bp::detail::utf_rvalue_shim + char_char_shim(f_str); + static_assert( + std::is_same_v); + static_assert(bp::detail::transform_replacement_for< + decltype(char_char_shim), + std::string, + decltype(ints_p.parser_)>); + + bp::detail::utf_rvalue_shim + char_char_ref_shim(f_str_ref); + static_assert( + std::is_same_v); + static_assert(bp::detail::transform_replacement_for< + decltype(char_char_ref_shim), + std::string, + decltype(ints_p.parser_)>); + +#if defined(__cpp_char8_t) && BOOST_PARSER_USE_CONCEPTS + // char8_t -> char8_t + + bp::detail::utf_rvalue_shim + u8_u8_shim(f_u8str); + static_assert( + std::is_same_v); + static_assert(bp::detail::transform_replacement_for< + decltype(u8_u8_shim), + std::u8string, + decltype(ints_p.parser_)>); + + bp::detail::utf_rvalue_shim + u8_u8_ref_shim(f_u8str_ref); + static_assert(std::is_same_v< + decltype(u8_u8_ref_shim(attr_t{})), + std::u8string const &>); + static_assert(bp::detail::transform_replacement_for< + decltype(u8_u8_ref_shim), + std::u8string, + decltype(ints_p.parser_)>); + + // char8_t -> char16_t + + bp::detail::utf_rvalue_shim + u8_u16_shim(f_u16str); + static_assert(std::is_same_v< + decltype(u8_u16_shim(attr_t{})), + bp::detail::text::utf_view< + bp::detail::text::format::utf8, + std::ranges::owning_view> &>); + static_assert(bp::detail::transform_replacement_for< + decltype(u8_u16_shim), + std::u8string, + decltype(ints_p.parser_)>); + + bp::detail::utf_rvalue_shim + u8_u16_ref_shim(f_u16str_ref); + static_assert(std::is_same_v< + decltype(u8_u16_ref_shim(attr_t{})), + bp::detail::text::utf_view< + bp::detail::text::format::utf8, + std::ranges::ref_view> &>); + static_assert(bp::detail::transform_replacement_for< + decltype(u8_u16_ref_shim), + std::u8string, + decltype(ints_p.parser_)>); + + // char8_t -> char32_t + + bp::detail::utf_rvalue_shim + u8_u32_shim(f_u32str); + static_assert(std::is_same_v< + decltype(u8_u32_shim(attr_t{})), + bp::detail::text::utf_view< + bp::detail::text::format::utf8, + std::ranges::owning_view> &>); + static_assert(bp::detail::transform_replacement_for< + decltype(u8_u32_shim), + std::u8string, + decltype(ints_p.parser_)>); + + bp::detail::utf_rvalue_shim + u8_u32_ref_shim(f_u32str_ref); + static_assert(std::is_same_v< + decltype(u8_u32_ref_shim(attr_t{})), + bp::detail::text::utf_view< + bp::detail::text::format::utf8, + std::ranges::ref_view> &>); + static_assert(bp::detail::transform_replacement_for< + decltype(u8_u32_ref_shim), + std::u8string, + decltype(ints_p.parser_)>); + + // char16_t -> char8_t + + bp::detail::utf_rvalue_shim + u16_u8_shim(f_u8str); + static_assert(std::is_same_v< + decltype(u16_u8_shim(attr_t{})), + bp::detail::text::utf_view< + bp::detail::text::format::utf16, + std::ranges::owning_view> &>); + static_assert(bp::detail::transform_replacement_for< + decltype(u16_u8_shim), + std::u16string, + decltype(ints_p.parser_)>); + + bp::detail::utf_rvalue_shim + u16_u8_ref_shim(f_u8str_ref); + static_assert(std::is_same_v< + decltype(u16_u8_ref_shim(attr_t{})), + bp::detail::text::utf_view< + bp::detail::text::format::utf16, + std::ranges::ref_view> &>); + static_assert(bp::detail::transform_replacement_for< + decltype(u16_u8_ref_shim), + std::u16string, + decltype(ints_p.parser_)>); + + // char32_t -> char8_t + + bp::detail::utf_rvalue_shim + u32_u8_shim(f_u8str); + static_assert(std::is_same_v< + decltype(u32_u8_shim(attr_t{})), + bp::detail::text::utf_view< + bp::detail::text::format::utf32, + std::ranges::owning_view> &>); + static_assert(bp::detail::transform_replacement_for< + decltype(u32_u8_shim), + std::u32string, + decltype(ints_p.parser_)>); + + bp::detail::utf_rvalue_shim + u32_u8_ref_shim(f_u8str_ref); + static_assert(std::is_same_v< + decltype(u32_u8_ref_shim(attr_t{})), + bp::detail::text::utf_view< + bp::detail::text::format::utf32, + std::ranges::ref_view> &>); + static_assert(bp::detail::transform_replacement_for< + decltype(u32_u8_ref_shim), + std::u32string, + decltype(ints_p.parser_)>); +#endif + // char16_t -> char16_t + + bp::detail::utf_rvalue_shim + u16_u16_shim(f_u16str); + static_assert( + std::is_same_v); + static_assert(bp::detail::transform_replacement_for< + decltype(u16_u16_shim), + std::u16string, + decltype(ints_p.parser_)>); + + bp::detail::utf_rvalue_shim + u16_u16_ref_shim(f_u16str_ref); + static_assert( + std::is_same_v); + static_assert(bp::detail::transform_replacement_for< + decltype(u16_u16_ref_shim), + std::u16string, + decltype(ints_p.parser_)>); + + // char16_t -> char32_t + + bp::detail::utf_rvalue_shim + u16_u32_shim(f_u32str); +#if BOOST_PARSER_USE_CONCEPTS + static_assert(std::is_same_v< + decltype(u16_u32_shim(attr_t{})), + bp::detail::text::utf_view< + bp::detail::text::format::utf16, + std::ranges::owning_view> &>); +#else + static_assert( + std::is_same_v< + decltype(u16_u32_shim(attr_t{})), + bp::detail::text::utf16_view< + bp::detail::text::detail::owning_view> &>); +#endif + static_assert(bp::detail::transform_replacement_for< + decltype(u16_u32_shim), + std::u16string, + decltype(ints_p.parser_)>); + + bp::detail::utf_rvalue_shim + u16_u32_ref_shim(f_u32str_ref); +#if BOOST_PARSER_USE_CONCEPTS + static_assert(std::is_same_v< + decltype(u16_u32_ref_shim(attr_t{})), + bp::detail::text::utf_view< + bp::detail::text::format::utf16, + std::ranges::ref_view> &>); +#else + static_assert(std::is_same_v< + decltype(u16_u32_ref_shim(attr_t{})), + bp::detail::text::utf16_view< + bp::detail::text::detail::ref_view> &>); +#endif + static_assert(bp::detail::transform_replacement_for< + decltype(u16_u32_ref_shim), + std::u16string, + decltype(ints_p.parser_)>); + + // char32_t -> char32_t + + bp::detail::utf_rvalue_shim + u32_u32_shim(f_u32str); + static_assert( + std::is_same_v); + static_assert(bp::detail::transform_replacement_for< + decltype(u32_u32_shim), + std::u32string, + decltype(ints_p.parser_)>); + + bp::detail::utf_rvalue_shim + u32_u32_ref_shim(f_u32str_ref); + static_assert( + std::is_same_v); + static_assert(bp::detail::transform_replacement_for< + decltype(u32_u32_ref_shim), + std::u32string, + decltype(ints_p.parser_)>); + + // char32_t -> char16_t + + bp::detail::utf_rvalue_shim + u32_u16_shim(f_u16str); +#if BOOST_PARSER_USE_CONCEPTS + static_assert(std::is_same_v< + decltype(u32_u16_shim(attr_t{})), + bp::detail::text::utf_view< + bp::detail::text::format::utf32, + std::ranges::owning_view> &>); +#else + static_assert( + std::is_same_v< + decltype(u32_u16_shim(attr_t{})), + bp::detail::text::utf32_view< + bp::detail::text::detail::owning_view> &>); +#endif + static_assert(bp::detail::transform_replacement_for< + decltype(u32_u16_shim), + std::u32string, + decltype(ints_p.parser_)>); + + bp::detail::utf_rvalue_shim + u32_u16_ref_shim(f_u16str_ref); +#if BOOST_PARSER_USE_CONCEPTS + static_assert(std::is_same_v< + decltype(u32_u16_ref_shim(attr_t{})), + bp::detail::text::utf_view< + bp::detail::text::format::utf32, + std::ranges::ref_view> &>); +#else + static_assert(std::is_same_v< + decltype(u32_u16_ref_shim(attr_t{})), + bp::detail::text::utf32_view< + bp::detail::text::detail::ref_view> &>); +#endif + static_assert(bp::detail::transform_replacement_for< + decltype(u32_u16_ref_shim), + std::u32string, + decltype(ints_p.parser_)>); +} + +TEST(transform_replace, detail_attr_search_repack_shim) +{ + using namespace bp::literals; + + { + std::string str = ""; + auto parser = bp::string("XYZ"); + + // Follows body of attr_search_impl() that constructs a custom parser + // from the given one. + auto first = bp::detail::text::detail::begin(str); + auto const last = bp::detail::text::detail::end(str); + auto match_first = first; + auto match_last = first; + auto before = [&match_first](auto & ctx) { + match_first = _where(ctx).begin(); + }; + auto after = [&match_last](auto & ctx) { + match_last = _where(ctx).begin(); + }; + auto const search_parser = + bp::omit[*(bp::char_ - parser)] >> + -bp::lexeme[bp::eps[before] >> bp::skip[parser] >> bp::eps[after]]; + + auto result = bp::prefix_parse( + first, last, search_parser, bp::ws, bp::trace::off); + static_assert(std::is_same_v< + decltype(result), + std::optional>>); + static_assert(std::is_same_v< + decltype(bp::prefix_parse( + first, last, search_parser, bp::ws, bp::trace::off)), + std::optional>>); + } + { + std::string str = ""; + auto parser = bp::string("XYZ"); + bp::detail::attr_search_impl(str, parser, bp::ws, bp::trace::off); + } + { + std::string str = ""; + auto result = bp::detail::attr_search_repack_shim( + str, bp::string("XYZ"), bp::ws, bp::trace::off); + auto subrng = bp::get(result, 0_c); + EXPECT_EQ(subrng.begin(), std::begin(str)); + EXPECT_EQ(subrng.end(), std::begin(str)); + auto result_str = bp::get(result, 1_c); + EXPECT_EQ(result_str, ""); + } + { + char const str[] = "not here"; + auto result = bp::detail::attr_search_repack_shim( + str, bp::string("XYZ"), bp::ws, bp::trace::off); + auto subrng = bp::get(result, 0_c); + EXPECT_EQ(subrng.begin(), std::end(str)); + EXPECT_EQ(subrng.end(), std::end(str)); + auto result_str = bp::get(result, 1_c); + EXPECT_EQ(result_str, ""); + } + { + char const str[] = "aaXYZb"; + auto result = bp::detail::attr_search_repack_shim( + str, bp::string("XYZ"), bp::ws, bp::trace::off); + auto subrng = bp::get(result, 0_c); + EXPECT_EQ(subrng.begin(), str + 2); + EXPECT_EQ(subrng.end(), str + 5); + auto result_str = bp::get(result, 1_c); + EXPECT_EQ(result_str, "XYZ"); + } + { + char const str[] = "XYZab"; + auto result = bp::detail::attr_search_repack_shim( + str, bp::string("XYZ"), bp::ws, bp::trace::off); + auto subrng = bp::get(result, 0_c); + EXPECT_EQ(subrng.begin(), str + 0); + EXPECT_EQ(subrng.end(), str + 3); + auto result_str = bp::get(result, 1_c); + EXPECT_EQ(result_str, "XYZ"); + } + { + char const str[] = "gbXYZ"; + auto result = bp::detail::attr_search_repack_shim( + str, bp::string("XYZ"), bp::ws, bp::trace::off); + auto subrng = bp::get(result, 0_c); + EXPECT_EQ(subrng.begin(), str + 2); + EXPECT_EQ(subrng.end(), str + 5); + auto result_str = bp::get(result, 1_c); + EXPECT_EQ(result_str, "XYZ"); + } + { + char const str[] = "XYZ"; + auto result = bp::detail::attr_search_repack_shim( + str, bp::string("XYZ"), bp::ws, bp::trace::off); + auto subrng = bp::get(result, 0_c); + EXPECT_EQ(subrng.begin(), str + 0); + EXPECT_EQ(subrng.end(), str + 3); + auto result_str = bp::get(result, 1_c); + EXPECT_EQ(result_str, "XYZ"); + } + { + char const str[] = "XXYZZ"; + auto result = bp::detail::attr_search_repack_shim( + str, bp::string("XYZ"), bp::ws, bp::trace::off); + auto subrng = bp::get(result, 0_c); + EXPECT_EQ(subrng.begin(), str + 1); + EXPECT_EQ(subrng.end(), str + 4); + auto result_str = bp::get(result, 1_c); + EXPECT_EQ(result_str, "XYZ"); + } + { + char const str[] = "XXYZZ"; + auto result = bp::detail::attr_search_repack_shim( + str, bp::string("XYZ"), bp::ws, bp::trace::off); + auto subrng = bp::get(result, 0_c); + EXPECT_EQ(subrng.begin(), str + 1); + EXPECT_EQ(subrng.end(), str + 4); + auto result_str = bp::get(result, 1_c); + EXPECT_EQ(result_str, "XYZ"); + } +} + +TEST(transform_replace, transform_replace) +{ + { + auto r = bp::transform_replace("", bp::int_ % ',', bp::ws, f_str); + int count = 0; + for (auto subrange : r) { + (void)subrange; + ++count; + } + EXPECT_EQ(count, 0); + } + { + char const str[] = "ab c 1, 2, 3 d e f"; + auto r = bp::transform_replace(str, bp::int_ % ',', bp::ws, f_str); + int count = 0; + std::string replace_result; + for (auto subrange : r) { + std::string str(subrange.begin(), subrange.end()); + replace_result += str; + ++count; + } + EXPECT_EQ(count, 3); + EXPECT_EQ(replace_result, "ab c 1_2_3_ d e f"); + } + { + char const str[] = "ab c 1, 2, 3 d e f"; + auto r = bp::transform_replace(str, bp::int_ % ',', bp::ws, f_str_ref); + int count = 0; + std::string replace_result; + for (auto subrange : r) { + std::string str(subrange.begin(), subrange.end()); + replace_result += str; + ++count; + } + EXPECT_EQ(count, 3); + EXPECT_EQ(replace_result, "ab c 1_2_3_ d e f"); + } + { + char const str[] = "a a 1,2,3baa ba1 ,2 , 3"; + auto r = str | bp::transform_replace( + bp::int_ % ',', bp::ws, f_str, bp::trace::off); + int count = 0; + std::string replace_result; + for (auto subrange : r) { + std::string str(subrange.begin(), subrange.end()); + replace_result += str; + ++count; + } + EXPECT_EQ(replace_result, "a a 1_2_3_baa ba1_2_3_"); + EXPECT_EQ(count, 4); + } + { + char const str[] = "aa1,2,3baaba1,2,3 4,5,6"; + auto r = str | bp::transform_replace(bp::int_ % ',', f_str); + int count = 0; + std::string replace_result; + for (auto subrange : r) { + std::string str(subrange.begin(), subrange.end()); + replace_result += str; + ++count; + } + EXPECT_EQ(replace_result, "aa1_2_3_baaba1_2_3_ 4_5_6_"); + EXPECT_EQ(count, 6); + } + { + char const str[] = "0,0aa1,2,3baaba1,2,3 4,5,6"; + auto r = str | bp::transform_replace(bp::int_ % ',', f_str); + int count = 0; + std::string replace_result; + for (auto subrange : r) { + std::string str(subrange.begin(), subrange.end()); + replace_result += str; + ++count; + } + EXPECT_EQ(replace_result, "0_0_aa1_2_3_baaba1_2_3_ 4_5_6_"); + EXPECT_EQ(count, 7); + } + { + char const str[] = "88,88 0,0aa1,2,3baaba1,2,3 4,5,6"; + auto r = str | bp::transform_replace(bp::int_ % ',', f_str); + int count = 0; + std::string replace_result; + for (auto subrange : r) { + std::string str(subrange.begin(), subrange.end()); + replace_result += str; + ++count; + } + EXPECT_EQ(replace_result, "88_88_ 0_0_aa1_2_3_baaba1_2_3_ 4_5_6_"); + EXPECT_EQ(count, 9); + } +} + +TEST(transform_replace, transform_replace_unicode) +{ + { + char const str_[] = ""; + auto str = str_ | bp::as_utf8; + auto r = bp::transform_replace(str, bp::int_ % ',', bp::ws, f_u16str); + int count = 0; + for (auto subrange : r) { + (void)subrange; + ++count; + } + EXPECT_EQ(count, 0); + } + { + char const * str_ = "aa2,3,4b"; + auto str = str_ | bp::as_utf16; + auto r = bp::transform_replace(str, bp::int_ % ',', bp::ws, f_u16str); + int count = 0; + std::string replace_result; + for (auto subrange : r) { + std::string str(subrange.begin(), subrange.end()); + replace_result += str; + ++count; + } + EXPECT_EQ(replace_result, "aa2_3_4_b"); + EXPECT_EQ(count, 3); + } + { + char const str_[] = "a a 3,4,5 baaba7, 8 ,9"; + auto str = str_ | bp::as_utf32; + auto r = str | bp::transform_replace( + bp::int_ % ',', bp::ws, f_u32str, bp::trace::off); + int count = 0; + std::string replace_result; + for (auto subrange : r) { + std::string str(subrange.begin(), subrange.end()); + replace_result += str; + ++count; + } + EXPECT_EQ(replace_result, "a a 3_4_5_ baaba7_8_9_"); + EXPECT_EQ(count, 4); + } + { + char const str_[] = "aa88,99baaba111,2222"; + auto str = str_ | bp::as_utf8; + const auto r = str | bp::transform_replace( + bp::int_ % ',', f_u16str, bp::trace::off); + int count = 0; + std::string replace_result; + for (auto subrange : r) { + std::string str(subrange.begin(), subrange.end()); + replace_result += str; + ++count; + } + EXPECT_EQ(replace_result, "aa88_99_baaba111_2222_"); + EXPECT_EQ(count, 4); + } + { + char const str_[] = "aa88,99baaba111,2222"; + auto str = str_ | bp::as_utf16; + auto r = str | bp::transform_replace(bp::int_ % ',', f_u32str); + int count = 0; + std::string replace_result; + for (auto subrange : r) { + std::string str(subrange.begin(), subrange.end()); + replace_result += str; + ++count; + } + EXPECT_EQ(replace_result, "aa88_99_baaba111_2222_"); + EXPECT_EQ(count, 4); + } + { + char const str_[] = "aa88,99baaba111,2222 3,4"; + auto str = str_ | bp::as_utf32; + auto r = str | bp::transform_replace(bp::int_ % ',', f_u16str); + int count = 0; + std::string replace_result; + for (auto subrange : r) { + std::string str(subrange.begin(), subrange.end()); + replace_result += str; + ++count; + } + EXPECT_EQ(replace_result, "aa88_99_baaba111_2222_ 3_4_"); + EXPECT_EQ(count, 6); + } + { + char const str_[] = "1aa88,99baaba111,2222 3,4"; + auto str = str_ | bp::as_utf8; + auto r = str | bp::transform_replace(bp::int_ % ',', f_u32str); + int count = 0; + std::string replace_result; + for (auto subrange : r) { + std::string str(subrange.begin(), subrange.end()); + replace_result += str; + ++count; + } + EXPECT_EQ(replace_result, "1_aa88_99_baaba111_2222_ 3_4_"); + EXPECT_EQ(count, 7); + } +} + +#if BOOST_PARSER_USE_CONCEPTS && (!defined(__GNUC__) || 12 <= __GNUC__) +// Older GCCs don't like the use of temporaries like the std::string("foo") +// below. This causes | join to break. +TEST(transform_replace, join_compat) +{ + { + char const str_[] = "1aa88,99baaba111,2222 3,4"; + auto str = str_ | bp::as_utf16; + auto rng = str | bp::transform_replace(bp::int_ % ',', f_u32str) | + std::views::join; + std::string transform_replace_result; + for (auto ch : rng) { + static_assert(std::is_same_v); + transform_replace_result.push_back(ch); + } + EXPECT_EQ(transform_replace_result, "1_aa88_99_baaba111_2222_ 3_4_"); + } + { + char const str[] = "1aa88,99baaba111,2222 3,4"; + auto rng = str | bp::as_utf32 | + bp::transform_replace(bp::int_ % ',', f_u8str) | + std::views::join; + std::string transform_replace_result; + for (auto ch : rng) { + static_assert(std::is_same_v); + transform_replace_result.push_back(ch); + } + EXPECT_EQ(transform_replace_result, "1_aa88_99_baaba111_2222_ 3_4_"); + } + + { + char const str[] = "1aa88,99baaba111,2222 3,4"; + auto rng = str | bp::transform_replace(bp::int_ % ',', f_str) | + std::views::join; + std::string transform_replace_result; + for (auto ch : rng) { + transform_replace_result.push_back(ch); + } + EXPECT_EQ(transform_replace_result, "1_aa88_99_baaba111_2222_ 3_4_"); + } + { + std::string str = "1aa88,99baaba111,2222 3,4"; + auto rng = str | bp::transform_replace(bp::int_ % ',', f_str) | + std::views::join; + std::string transform_replace_result; + for (auto ch : rng) { + transform_replace_result.push_back(ch); + } + EXPECT_EQ(transform_replace_result, "1_aa88_99_baaba111_2222_ 3_4_"); + } + { + std::string const str = "1aa88,99baaba111,2222 3,4"; + auto rng = str | bp::transform_replace(bp::int_ % ',', f_str) | + std::views::join; + std::string transform_replace_result; + for (auto ch : rng) { + transform_replace_result.push_back(ch); + } + EXPECT_EQ(transform_replace_result, "1_aa88_99_baaba111_2222_ 3_4_"); + } + { + auto rng = std::string("1aa88,99baaba111,2222 3,4") | + bp::transform_replace(bp::int_ % ',', f_str) | + std::views::join; + std::string transform_replace_result; + for (auto ch : rng) { + transform_replace_result.push_back(ch); + } + EXPECT_EQ(transform_replace_result, "1_aa88_99_baaba111_2222_ 3_4_"); + } +} +#endif + +TEST(transform_replace, doc_examples) +{ + { + auto string_sum = [](std::vector const & ints) { + return std::to_string(std::accumulate(ints.begin(), ints.end(), 0)); + }; + + auto rng = "There are groups of [1, 2, 3, 4, 5] in the set." | + bp::transform_replace( + '[' >> bp::int_ % ',' >> ']', bp::ws, string_sum); + int count = 0; + // Prints "There are groups of 15 in the set". + for (auto subrange : rng) { + for (auto ch : subrange) { + std::cout << ch; + } + ++count; + } + std::cout << "\n"; + assert(count == 3); + } +} +#endif