From 85320bd6d7c02781480d9e271b05ac7c91c8c185 Mon Sep 17 00:00:00 2001 From: Zach Laine Date: Sun, 15 Feb 2026 15:20:27 -0600 Subject: [PATCH] Add overloads of repeat() that take a parser param; the given parser is used as a separator. Implements #231. --- doc/tables.qbk | 10 ++ include/boost/parser/parser.hpp | 102 +++++++---- test/parser.cpp | 288 ++++++++++++++++++++++++++++++++ 3 files changed, 365 insertions(+), 35 deletions(-) diff --git a/doc/tables.qbk b/doc/tables.qbk index a3bea4ea..a70c846e 100644 --- a/doc/tables.qbk +++ b/doc/tables.qbk @@ -317,6 +317,16 @@ the input they match unless otherwise stated in the table below.] [ `std::string` if `_ATTR_np_(p)` is `char` or `char32_t`, otherwise `std::vector<_ATTR_np_(p)>` ] [ The special value _inf_ may be used for the upper bound; it indicates unlimited repetition. `decltype(_RES_np_(arg0))` and `decltype(_RES_np_(arg1))` each must be implicitly convertible to `int64_t`. Matching _e_ an unlimited number of times creates an infinite loop, which is undefined behavior in C++. _Parser_ will assert in debug mode when it encounters `_rpt_np_(n, _inf_)[_e_]` (this applies to unconditional _e_ only). ]] + [[ `_rpt_np_(arg0, p1)[p2]` ] + [ Matches iff `p2` matches exactly `_RES_np_(arg0)` times, matching `p1` in between each pair of consecutive matches of `p2`. Equivalent to: _e_ for `_RES_np_(arg0) <= 0`, and `_rpt_np_(_RES_np_(arg0) - 1)[p2 >> p1] >> p2` otherwise. ] + [ `std::string` if `_ATTR_np_(p2)` is `char` or `char32_t`, otherwise `std::vector<_ATTR_np_(p2)>` ] + [ The special value _inf_ may be used; it indicates unlimited repetition. `decltype(_RES_np_(arg0))` must be implicitly convertible to `int64_t`. Matching _e_ an unlimited number of times creates an infinite loop, which is undefined behavior in C++. _Parser_ will assert in debug mode when it encounters `_rpt_np_(_inf_)[_e_]` (this applies to unconditional _e_ only). ]] + + [[ `_rpt_np_(arg0, arg1, p1)[p2]` ] + [ Matches iff `p2` matches between `_RES_np_(arg0)` and `_RES_np_(arg1)` times, inclusively, matching `p1` in between each pair of consecutive matches of `p2`. Equivalent to: _e_ for `_RES_np_(arg1) <= 0`, and `_rpt_np_(_RES_np_(arg0) - 1, _RES_np_(arg1) - 1)[p2 >> p1] >> p2` otherwise. ] + [ `std::string` if `_ATTR_np_(p2)` is `char` or `char32_t`, otherwise `std::vector<_ATTR_np_(p2)>` ] + [ The special value _inf_ may be used for the upper bound; it indicates unlimited repetition. `decltype(_RES_np_(arg0))` and `decltype(_RES_np_(arg1))` each must be implicitly convertible to `int64_t`. Matching _e_ an unlimited number of times creates an infinite loop, which is undefined behavior in C++. _Parser_ will assert in debug mode when it encounters `_rpt_np_(n, _inf_)[_e_]` (this applies to unconditional _e_ only). ]] + [[ `_if_np_(pred)[p]` ] [ Equivalent to `_e_(pred) >> p`. ] [ `_ATTR_np_(p)` ] diff --git a/include/boost/parser/parser.hpp b/include/boost/parser/parser.hpp index 9b256abc..1c5585f3 100644 --- a/include/boost/parser/parser.hpp +++ b/include/boost/parser/parser.hpp @@ -3212,18 +3212,44 @@ namespace boost { namespace parser { int64_t count = 0; - for (int64_t end = detail::resolve(context, min_); count != end; - ++count) { + auto const iteration = [&](auto prev_first, auto on_fail) { + if constexpr (!detail::is_nope_v) { + if (count) { + detail::skip(first, last, skip, flags); + delimiter_parser_.call( + first, + last, + context, + skip, + detail::disable_attrs(flags), + success); + if (!success) { + on_fail(prev_first); + return false; + } + } + } + detail::skip(first, last, skip, flags); attr_t attr{}; parser_.call( first, last, context, skip, flags, success, attr); if (!success) { - detail::assign(retval, Attribute()); - return; + on_fail(prev_first); + return false; } detail::move_back( retval, std::move(attr), detail::gen_attrs(flags)); + return true; + }; + + for (int64_t end = detail::resolve(context, min_); count != end; + ++count) { + if (!iteration(first, [&](auto prev_first) { + detail::assign(retval, Attribute()); + })) { + return; + } } int64_t const end = detail::resolve(context, max_); @@ -3234,37 +3260,12 @@ namespace boost { namespace parser { !detail::is_unconditional_eps{} || end < Inf); for (; count != end; ++count) { - auto const prev_first = first; - // This is only ever used in delimited_parser, which - // always has a min=1; we therefore know we're after a - // previous element when this executes. - if constexpr (!detail::is_nope_v) { - detail::skip(first, last, skip, flags); - delimiter_parser_.call( - first, - last, - context, - skip, - detail::disable_attrs(flags), - success); - if (!success) { + if (!iteration(first, [&](auto prev_first) { success = true; first = prev_first; - break; - } + })) { + return; } - - detail::skip(first, last, skip, flags); - attr_t attr{}; - parser_.call( - first, last, context, skip, flags, success, attr); - if (!success) { - success = true; - first = prev_first; - break; - } - detail::move_back( - retval, std::move(attr), detail::gen_attrs(flags)); } } } @@ -6588,20 +6589,24 @@ namespace boost { namespace parser { /** Represents a `repeat_parser` as a directive (e.g. `repeat[other_parser]`). */ - template + template< + typename MinType, + typename MaxType, + typename DelimiterParser = detail::nope> struct repeat_directive { template constexpr auto operator[](parser_interface rhs) const noexcept { using repeat_parser_type = - repeat_parser; + repeat_parser; return parser_interface{ - repeat_parser_type{rhs.parser_, min_, max_}}; + repeat_parser_type{rhs.parser_, min_, max_, delimiter_}}; } MinType min_; MaxType max_; + DelimiterParser delimiter_; }; /** Returns a `repeat_directive` that repeats exactly `n` times, and whose @@ -6613,6 +6618,18 @@ namespace boost { namespace parser { return repeat_directive{n, n}; } + /** Returns a `repeat_directive` that repeats exactly `n` times, where the + items parsed are delimited by `DelimiterParser`. The value returned + has an `operator[]` that returns a + `parser_interface>` from a given parser of type + `parser_interface

`. */ + template + inline repeat_directive + repeat(T n, parser_interface sep) noexcept + { + return repeat_directive{n, n, sep.parser_}; + } + /** Returns a `repeat_directive` that repeats between `min_` and `max_` times, inclusive, and whose `operator[]` returns a `parser_interface>` from a given parser of type @@ -6624,6 +6641,21 @@ namespace boost { namespace parser { return repeat_directive{min_, max_}; } + /** Returns a `repeat_directive` that repeats between `min_` and `max_` + times, inclusive, where the items parsed are delimited by + `DelimiterParser`. The value returned has an `operator[]` that + returns a `parser_interface>` from a given parser of + type `parser_interface

`. */ + template + inline repeat_directive repeat( + MinType min_, + MaxType max_, + parser_interface sep) noexcept + { + return repeat_directive{ + min_, max_, sep.parser_}; + } + /** A directive that represents a `perm_parser`, where the items parsed are delimited by `DelimiterParser` (e.g. `delimiter(delimter_parser)[some_perm_parser]`). This directive diff --git a/test/parser.cpp b/test/parser.cpp index 9ea882e2..ff87d47e 100644 --- a/test/parser.cpp +++ b/test/parser.cpp @@ -1092,6 +1092,72 @@ int main() // repeat { + { + constexpr auto parser = repeat(2)[string("zs")]; + + { + std::string str = ""; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + { + std::string str = "z"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + { + std::string str = "zs"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + { + std::string str = "zszs"; + std::vector chars; + BOOST_TEST(parse(str, parser, chars)); + BOOST_TEST(chars == std::vector({"zs", "zs"})); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(chars); + BOOST_TEST( + *chars == std::vector({"zs", "zs"})); + } + } + { + std::string str = "zszszs"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + } { constexpr auto parser = repeat(2, 3)[string("zs")]; @@ -1145,6 +1211,228 @@ int main() *chars == std::vector({"zs", "zs"})); } } + { + std::string str = "zszszs"; + std::vector chars; + BOOST_TEST(parse(str, parser, chars)); + BOOST_TEST( + chars == std::vector({"zs", "zs", "zs"})); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(chars); + BOOST_TEST( + *chars == std::vector({"zs", "zs", "zs"})); + } + } + { + std::string str = "zszszszs"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + } + { + auto parser = repeat(2, char_(','))[string("zs")]; + + { + std::string str = ""; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + { + std::string str = "z"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + { + std::string str = "zs"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + { + std::string str = "zszs"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector()); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + { + std::string str = "zs,zs"; + std::vector chars; + BOOST_TEST(parse(str, parser, chars)); + BOOST_TEST(chars == std::vector({"zs", "zs"})); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(chars); + BOOST_TEST( + *chars == std::vector({"zs", "zs"})); + } + } + { + std::string str = "zs,zs,"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector()); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + { + std::string str = "zs,zs,zs"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector()); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + } + { + auto parser = repeat(2, 3, char_(','))[string("zs")]; + + { + std::string str = ""; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + { + std::string str = "z"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + { + std::string str = "zs"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + { + std::string str = "zs,"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + { + std::string str = "zs,zs,"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } + { + std::string str = "zs,zs"; + std::vector chars; + BOOST_TEST(parse(str, parser, chars)); + BOOST_TEST(chars == std::vector({"zs", "zs"})); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(chars); + BOOST_TEST( + *chars == std::vector({"zs", "zs"})); + } + } + { + std::string str = "zs,zs,zs"; + std::vector chars; + BOOST_TEST(parse(str, parser, chars)); + BOOST_TEST( + chars == std::vector({"zs", "zs", "zs"})); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(chars); + BOOST_TEST( + *chars == std::vector({"zs", "zs", "zs"})); + } + } + { + std::string str = "zs,zs,zs,zs"; + std::vector chars; + BOOST_TEST(!parse(str, parser, chars)); + BOOST_TEST(chars == std::vector{}); + + { + std::optional> const chars = + parse(str, parser); + BOOST_TEST(!chars); + } + } } { constexpr auto parser = *char_ >> eps >> *string("str");