2
0
mirror of https://github.com/boostorg/parser.git synced 2026-01-20 04:42:22 +00:00

21 Commits

Author SHA1 Message Date
Zach Laine
c674e94c3d Don't reuse the attribute-generating path in rule_parser's out-param overload
of call.  Doing so was wiping out previous partial results, in cases like foo
>> bar, where foo produces a T, and bar is a rule that produces vector<T>.

Fixes #248.
2025-07-27 17:50:38 -05:00
Zach Laine
84ee288b02 Attempt to fix odd error in happy path of code submitted with issue 223. 2025-07-26 21:13:33 -05:00
Zach Laine
39faa9ddbe Pass the sentinel type as a template parameter to the iterator template in
project_view, as a workaround to the presence/absence of a disambiguating
template keyword in iterator's implementation.  Neither adding it nor temoving
it works for all builds.  Also, re-enable the C++17 MSVC 2022 Github build.

Fixes #252.
2025-07-26 21:12:37 -05:00
Zach Laine
b2927abc6c Disable C++17 on MSVC 2022 in Github CI. 2025-07-26 20:17:56 -05:00
Zach Laine
5d6d2f7b84 Add missing special case for parsing a sequence of optional<T>s, writing the
results into a sequence container of Ts.

Fixes #223.
2025-07-26 20:15:15 -05:00
Zach Laine
fd6c56df1b Publicize project_view::{interator,sentinel} in attempt to fix VS 2022 build. 2025-07-13 15:56:35 -05:00
Zach Laine
af41e6a7c2 Add missing template keyword disambiguator in attempt to fix VS 2022 build. 2025-07-13 15:34:21 -05:00
Zach Laine
0b93a586f1 Use an R-string instead of using so many backslashes in the quoted string
examples.

Fixes #239.
2025-07-12 16:08:53 -05:00
Zach Laine
ed9a06123b Comment out unused dont_assign param in second overload defined by
BOOST_PARSER_DEFINE_IMPL.

Fixes #237.
2025-07-12 15:15:52 -05:00
Zach Laine
8ff46f394a Spelling corrections in tutorial.
Fixes #238.
2025-07-12 15:13:20 -05:00
Zach Laine
8c9ad7bdb3 Document why there are no Spirit-style charater class parsers (alnum, punct,
etc.) in the Rationale section of the docs.

Fixes #224.
2025-07-12 15:01:46 -05:00
Adem Budak
d8abe8f29e Fix some typos on documentation 2025-07-12 14:46:28 -05:00
Zach Laine
810adb43f6 Use a move asssignment instead of a copy assignment when returning a result
via detail::make_parse_result().

Supercedes PR #247.
2025-07-12 14:42:47 -05:00
Zach Laine
5788fb6967 Add missing 'template ' after dot when naming a dependent template
instantiation.

Fixes #221.
2025-05-06 01:55:23 -05:00
Rene Rivera
ec7df8a0af Add support for modular build structure. 2025-05-06 01:54:32 -05:00
Zach Laine
a93a1d2647 Use detail::hl::make_tuple() instead of CTAD in test to fix build breakage on
some compilers.
2025-04-13 14:26:47 -05:00
Zach Laine
927f35f115 Provide a way to specify radix, and min/max digits for {u,}int_parser, without
using the template parameters directly, since this also requires the user to
type parser_interface.

Fixes #220.
2025-04-12 19:59:46 -05:00
Zach Laine
87617fdec0 std::tuple -> tuple in test to fix build with -DBUILD_WITH_HANA=true. 2025-04-12 19:33:11 -05:00
Zach Laine
ead639e630 Add missing cxxstd to meta/libraries.json. 2025-04-12 13:56:00 -05:00
Zach Laine
a3ca1193b2 Add error reporting when encountering unexpected (left over) code points at
the end of an otherwise-successful parse, when doing non-prefix parsing.
2025-03-30 16:06:41 -05:00
Zach Laine
07153117ff Doc copy editing.
Fixes #217.
2025-03-30 16:06:02 -05:00
16 changed files with 436 additions and 99 deletions

24
build.jam Normal file
View File

@@ -0,0 +1,24 @@
# Copyright René Ferdinand Rivera Morell 2025
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
require-b2 5.2 ;
constant boost_dependencies :
/boost/assert//boost_assert
/boost/charconv//boost_charconv
/boost/hana//boost_hana
/boost/type_index//boost_type_index ;
project /boost/parser
;
explicit
[ alias boost_parser : : :
: <library>$(boost_dependencies) <include>include ]
[ alias all : boost_parser test ]
;
call-if : boost-library parser
;

View File

@@ -39,7 +39,7 @@ rule run_doxygen ( files * : name : expand ? )
}
run_doxygen [ glob $(here)/../../../boost/parser/*.hpp : $(here)/../../../boost/parser/concepts.hpp ] : "Headers" ;
run_doxygen [ glob $(here)/../include/boost/parser/*.hpp : $(here)/../include/boost/parser/concepts.hpp ] : "Headers" ;
install images_standalone : [ glob *.png ] : <location>html/parser/img ;
explicit images_standalone ;

View File

@@ -325,4 +325,24 @@ always equal to `A()` if the parser fails. It is equal to whatever the parser
sets it to _emdash_ or its previous value, if the parser does not mutate it
_emdash_ if the parse succeeds.
[heading There are no _Spirit_-style character class parsers]
_Spirit_ has these character class parsers that recognize the same set of
characters as the C standard library's character class functions. For
instance, _Spirit_'s `alnum` recognizes the characters recognized by
`std::isalnum()`, its `punct` recognizes the characters recognized by
`std::ispunct()`, etc.
The problem with this is that those `std::is*()` functions are badly broken.
They do not even work correctly for ASCII values. This is because they use
the C standard library's locale mechanism, which can be set to anything the
current platform supports, and can be set by any code anywhere in your
program; the locale is mutable global state. So, even if you use the default
"C locale in your program, if you link against a library that sets the locale
to something that breaks ASCII character recognition (an EBCDIC locale, for
instance), your program is now incorrect, regardless of the code you wrote.
For this reason, I firmly believe that no one, anywhere, should use those C
functions in production code, and I am not supporting their use via _Parser_.
[endsect]

View File

@@ -230,7 +230,7 @@ the input they match unless otherwise stated in the table below.]
[[ _ui_ ]
[ Matches an unsigned integral value. ]
[ `unsigned int` ]
[]]
[ To specify a base/radix of `N`, use _ui_`.base<N>()`. To specify exactly `D` digits, use _ui_`.digits<D>()`. To specify a minimum of `LO` digits and a maximum of `HI` digits, use _ui_`.digits<LO, HI>()`. These calls can be chained, as in _ui_`.base<2>().digits<8>()`. ]]
[[ `_ui_(arg0)` ]
[ Matches exactly the unsigned integral value `_RES_np_(arg0)`. ]
@@ -270,7 +270,7 @@ the input they match unless otherwise stated in the table below.]
[[ _i_ ]
[ Matches a signed integral value. ]
[ `int` ]
[]]
[ To specify a base/radix of `N`, use _i_`.base<N>()`. To specify exactly `D` digits, use _i_`.digits<D>()`. To specify a minimum of `LO` digits and a maximum of `HI` digits, use _i_`.digits<LO, HI>()`. These calls can be chained, as in _i_`.base<2>().digits<8>()`. ]]
[[ `_i_(arg0)` ]
[ Matches exactly the signed integral value `_RES_np_(arg0)`. ]
@@ -505,7 +505,7 @@ attribute type is `char32_t`:
static_assert(std::is_same_v<decltype(result), std::optional<char32_t>>));
The good news is that usually you don't parse characters individually. When
you parse with _ch_, you usually parse repetition of then, which will produce
you parse with _ch_, you usually parse repetition of them, which will produce
a _std_str_, regardless of whether you're in Unicode parsing mode or not. If
you do need to parse individual characters, and want to lock down their
attribute type, you can use _cp_ and/or _cu_ to enforce a non-polymorphic
@@ -551,7 +551,7 @@ tables below:
[[`p1 || p2`] [`_bp_tup_<_ATTR_np_(p1), _ATTR_np_(p2)>`]]
[[`p1 || p2 || p3`] [`_bp_tup_<_ATTR_np_(p1), _ATTR_np_(p2), _ATTR_np_(p3)>`]]
[[`p1 % p2`] [`std::string` if `_ATTR_np_(p)` is `char` or `char32_t`, otherwise `std::vector<_ATTR_np_(p1)>`]]
[[`p1 % p2`] [`std::string` if `_ATTR_np_(p1)` is `char` or `char32_t`, otherwise `std::vector<_ATTR_np_(p1)>`]]
[[`p[a]`] [None.]]

View File

@@ -584,7 +584,7 @@ things:
* This rule object itself is called `doubles`.
* We've given `doubles` the diagnstic text `"doubles"` so that _Parser_ knows
* We've given `doubles` the diagnostic text `"doubles"` so that _Parser_ knows
how to refer to it when producing a trace of the parser during debugging.
Ok, so if `doubles` is a parser, what does it do? We define the rule's
@@ -828,7 +828,7 @@ the same character must be used on both sides.
[quoted_string_example_4]
Another common thing to do in a quoted string parser is to recognize escape
sequences. If you have simple escape sequencecs that do not require any real
sequences. If you have simple escape sequences that do not require any real
parsing, like say the simple escape sequences from C++, you can provide a
_symbols_ object as well. The template parameter `T` to _symbols_t_ must be
`char` or `char32_t`. You don't need to include the escaped backslash or the
@@ -837,10 +837,10 @@ escaped quote character, since those always work.
[quoted_string_example_5]
Additionally, with each of the forms shown above, you can optionally provide a
parser as a final argument, to will be used to parse each character inside the
quotes. You have to provide an actual full parser here; you cannot provide a
character or string literal. If you do not provide a character parser, _ch_
is used.
parser as a final argument, which will be used to parse each character inside
the quotes. You have to provide an actual full parser here; you cannot
provide a character or string literal. If you do not provide a character
parser, _ch_ is used.
[quoted_string_example_6]
@@ -1159,7 +1159,7 @@ erase and clear for the current parse, and another that applies only to
subsequent parses. The full set of operations can be found in the _symbols_
API docs.
[mpte There are two versions of each of the _symbols_ `*_for_next_parse()`
[note There are two versions of each of the _symbols_ `*_for_next_parse()`
functions _emdash_ one that takes a context, and one that does not. The one
with the context is meant to be used within a semantic action. The one
without the context is for use outside of any parse.]
@@ -1248,22 +1248,25 @@ these parsers is in a subsequent section. The attributes are repeated here so
you can use see all the properties of the parsers in one place.]
If you have an integral type `IntType` that is not covered by any of the
_Parser_ parsers, you can use a more verbose declaration to declare a parser
for `IntType`. If `IntType` were unsigned, you would use `uint_parser`. If
it were signed, you would use `int_parser`. For example:
_Parser_ parsers, you can explicitly specify a base/radix or bounds on the
number of digits. You do this by calling the `base()` and `digits()` member
functions on an existing parser of the right integral type. So if `IntType`
were unsigned, you would use `uint_`. If it were signed, you would use
`int_`. For example:
constexpr parser_interface<int_parser<IntType>> hex_int;
constexpr auto hex_int = bp::uint_.base<16>();
`uint_parser` and `int_parser` accept three more non-type template parameters
after the type parameter. They are `Radix`, `MinDigits`, and `MaxDigits`.
`Radix` defaults to `10`, `MinDigits` to `1`, and `MaxDigits` to `-1`, which
is a sentinel value meaning that there is no max number of digits.
You simply chain together the constraints you want to use, like
`.base<16>().digits<2>()` or .digits<4>().base<8>()`.
So, if you wanted to parse exactly eight hexadecimal digits in a row in order
to recognize Unicode character literals like C++ has (e.g. `\Udeadbeef`), you
could use this parser for the digits at the end:
constexpr parser_interface<uint_parser<unsigned int, 16, 8, 8>> hex_int;
constexpr auto hex_4_def = bp::uint_.base<16>().digits<8>();
If you want to specify an acceptable range of digits, use `.digits<LO, HI>()`.
Both `HI` and `LO` are inclusive bounds.
[endsect]
@@ -1283,7 +1286,7 @@ parsers; we won't say much about them here.
[heading Interaction with sequence, alternative, and permutation parsers]
Sequence, alternative, and permutation parsers do not nest in most cases.
(Let's consider just sequence parsers to keep thinkgs simple, but most of this
(Let's consider just sequence parsers to keep things simple, but most of this
logic applies to alternative parsers as well.) `a >> b >> c` is the same as
`(a >> b) >> c` and `a >> (b >> c)`, and they are each represented by a single
_seq_p_ with three subparsers, `a`, `b`, and `c`. However, if something
@@ -1692,7 +1695,7 @@ the following steps applied:
wrapped in a `std::optional`, like `std::optional<std::variant</*...*/>>`;
* duplicates in the `std::variant` template parameters `<T1, T2, ... Tn>` are
removed; every type that appears does so exacly once;
removed; every type that appears does so exactly once;
* if the attribute is `std::variant<T>` or `std::optional<std::variant<T>>`,
the attribute becomes instead `T` or `std::optional<T>`, respectively; and
@@ -2490,8 +2493,8 @@ the earlier expectation:
]
Not nearly as nice. The problem is that the expectation is on `(value %
',')`. So, even thought we gave `value` reasonable dianostic text, we put the
text on the wrong thing. We can introduce a new rule to put the diagnstic
',')`. So, even thought we gave `value` reasonable diagnostic text, we put the
text on the wrong thing. We can introduce a new rule to put the diagnostic
text in the right place.
namespace bp = boost::parser;
@@ -2577,7 +2580,7 @@ Also, consider this rule:
bp::rule<struct ints_tag, std::vector<int>> ints = "ints";
auto const ints_def = bp::int_ >> ints | bp::eps;
What is the default attribute type for ints_def? It sure looks like
What is the default attribute type for `ints_def`? It sure looks like
`std::optional<std::vector<int>>`. Inside the evaluation of `ints`, _Parser_
must evaluate `ints_def`, and then produce a `std::vector<int>` _emdash_ the
return type of `ints` _emdash_ from it. How? How do you turn a
@@ -2585,7 +2588,7 @@ return type of `ints` _emdash_ from it. How? How do you turn a
seems obvious, but the metaprogramming that properly handles this simple
example and the general case is certainly beyond me.
_Parser_ has a specific semantic for what consitutes a recursive rule. Each
_Parser_ has a specific semantic for what constitutes a recursive rule. Each
rule has a tag type associated with it, and if _Parser_ enters a rule with a
certain tag `Tag`, and the currently-evaluating rule (if there is one) also
has the tag `Tag`, then rule instance being entered is considered to be a
@@ -2659,7 +2662,7 @@ semantics, is a lot easier to read, and is a lot less code.]
The _r_ template takes another template parameter we have not discussed yet.
You can pass a third parameter `LocalState` to _r_, which will be defaulted
csontructed by the _r_, and made available within semantic actions used in the
constructed by the _r_, and made available within semantic actions used in the
rule as `_locals_np_(ctx)`. This gives your rule some local state, if it
needs it. The type of `LocalState` can be anything regular. It could be a
single value, a struct containing multiple values, or a tuple, among others.
@@ -3399,9 +3402,9 @@ _w_eh_ (see _p_api_). If you do not set one, _default_eh_ will be used.
[heading How diagnostics are generated]
_Parser_ only generates error messages like the ones in this page at failed
expectation points, like `a > b`, where you have successfully parsed `a`, but
then cannot successfully parse `b`. This may seem limited to you. It's
actually the best that we can do.
expectation points (like `a > b`, where you have successfully parsed `a`, but
then cannot successfully parse `b`), and at an unexpected end of input. This
may seem limited to you. It's actually the best that we can do.
In order for error handling to happen other than at expectation points, we
have to know that there is no further processing that might take place. This
@@ -3409,21 +3412,26 @@ is true because _Parser_ has `P1 | P2 | ... | Pn` parsers ("`or_parser`s").
If any one of these parsers `Pi` fails to match, it is not allowed to fail the
parse _emdash_ the next one (`Pi+1`) might match. If we get to the end of the
alternatives of the or_parser and `Pn` fails, we still cannot fail the
top-level parse, because the `or_parser` might be a subparser within a parent
`or_parser`.
top-level parse, because this `or_parser` might be a subparser within a parent
`or_parser`. The only exception to this is when: we have finished the
top-level parse; the top-level parse is *not* a prefix parse; and there is
still a part of the input range that is left over. In that case, there is an
implicit expectation that the end of the parse and the end of input are the
same location, and this implicit expectation has just been violated.
Ok, so what might we do? Perhaps we could at least indicate when we ran into
end-of-input. But we cannot, for exactly the same reason already stated. For
any parser `P`, reaching end-of-input is a failure for `P`, but not
necessarily for the whole parse.
Note that we cannot fail the top-level parse when we run into end-of-input.
We cannot for exactly the same reason already stated. For any parser `P`,
reaching end-of-input is a failure for `P`, but not necessarily for the whole
parse.
Perhaps we could record the farthest point ever reached during the parse, and
report that at the top level, if the top level parser fails. That would be
little help without knowing which parser was active when we reached that
point. This would require some sort of repeated memory allocation, since in
_Parser_ the progress point of the parser is stored exclusively on the stack
_emdash_ by the time we fail the top-level parse, all those far-reaching stack
frames are long gone. Not the best.
Ok, so what other kinds of error reporting might we do? Perhaps we could
record the farthest point ever reached during the parse, and report that at
the top level, if the top level parser fails. That would be little help
without knowing which parser was active when we reached that point. This
would require some sort of repeated memory allocation, since in _Parser_ the
progress point of the parser is stored exclusively on the stack _emdash_ by
the time we fail the top-level parse, all those far-reaching stack frames are
long gone. Not the best.
Worse still, knowing how far you got in the parse and which parser was active
is not very useful. Consider this.
@@ -3440,15 +3448,16 @@ Was the error in the input putting the `'a'` at the beginning or putting the
failed, and never mention `c_b`, you are potentially just steering them in the
wrong direction.
All error messages must come from failed expectation points. Consider parsing
JSON. If you open a list with `'['`, you know that you're parsing a list, and
if the list is ill-formed, you'll get an error message saying so. If you open
an object with `'{'`, the same thing is possible _emdash_ when missing the
matching `'}'`, you can tell the user, "That's not an object", and this is
useful feedback. The same thing with a partially parsed number, etc. If the
JSON parser does not build in expectations like matched braces and brackets,
how can _Parser_ know that a missing `'}'` is really a problem, and that no
later parser will match the input even without the `'}'`?
All error messages must come from failed expectation points (or unexpected end
of input). Consider parsing JSON. If you open a list with `'['`, you know
that you're parsing a list, and if the list is ill-formed, you'll get an error
message saying so. If you open an object with `'{'`, the same thing is
possible _emdash_ when missing the matching `'}'`, you can tell the user,
"That's not an object", and this is useful feedback. The same thing with a
partially parsed number, etc. If the JSON parser does not build in
expectations like matched braces and brackets, how can _Parser_ know that a
missing `'}'` is really a problem, and that no later parser will match the
input even without the `'}'`?
[important The bottom line is that you should build expectation points into
your parsers using `operator>` as much as possible.]
@@ -3551,7 +3560,7 @@ We just define a `logging_error_handler`, and pass it by reference to _w_eh_,
which decorates the top-level parser with the error handler. We *could not*
have written `bp::with_error_handler(parser,
logging_error_handler("parse.log"))`, because _w_eh_ does not accept rvalues.
This is becuse the error handler eventually goes into the parse context. The
This is because the error handler eventually goes into the parse context. The
parse context only stores pointers and iterators, keeping it cheap to copy.
If we run the example and give it the input `"1,"`, this shows up in the log
@@ -3599,7 +3608,7 @@ to `_trace_::off`.
If we trace a substantial parser, we will see a *lot* of output. Each code
point of the input must be considered, one at a time, to see if a certain rule
matches. An an example, let's trace a parse using the JSON parser from
matches. As an example, let's trace a parse using the JSON parser from
_ex_json_. The input is `"null"`. `null` is one of the types that a
Javascript value can have; the top-level parser in the JSON parser example is:
@@ -3786,7 +3795,7 @@ _Parser_ seldom allocates memory. The exceptions to this are:
which implies allocation. You can avoid this allocation by explicitly using
a different sequence container for the attribute that does not allocate.
`boost::container::static_vector` or C++26's `std::inplace_vector` may be
useful as such replacements.
useful for such replacements.
With the exception of allocating the name of the parser that was expected in a
failed expectation situation, _Parser_ does not does not allocate unless you
@@ -3863,9 +3872,9 @@ Some things to note:
want to know how to fix their input. For either rule, the fix is the same:
put a hexadecimal escape sequence there.
- `single_escaped_char` has a terrible-looking name. However, it's not really
used as a name anywhere per se. In error messages, it works nicely, though.
The error will be "Expected '"', '\', '/', 'b', 'f', 'n', 'r', or 't' here",
- `single_escaped_char` has a terrible-looking name. However, it's not
actually used as a name. In error messages, it works nicely, though. The
error will be "Expected '"', '\', '/', 'b', 'f', 'n', 'r', or 't' here",
which is pretty helpful.
[heading Have a simple test that you can run to find ill-formed-code-as-asserts]

View File

@@ -126,7 +126,7 @@ namespace json {
}
};
bp::parser_interface<bp::uint_parser<uint32_t, 16, 4, 4>> const hex_4_def;
auto const hex_4_def = boost::parser::uint_.base<16>().digits<4>();
auto const escape_seq_def = "\\u" > hex_4;

View File

@@ -151,12 +151,10 @@ namespace json {
}
};
// This is the verbose form of declaration for the integer and unsigned
// integer parsers int_parser and uint_parser. In this case, we don't
// want to use boost::parser::hex directly, since it has a variable number
// of digits. We want to match exactly 4 digits, and this is how we
// declare a hexadecimal parser that matches exactly 4.
bp::parser_interface<bp::uint_parser<uint32_t, 16, 4, 4>> const hex_4_def;
// We don't want to use boost::parser::hex directly, since it has a
// variable number of digits. We want to match exactly 4 digits, and this
// is how we declare a hexadecimal parser that matches exactly 4.
auto const hex_4_def = boost::parser::uint_.base<16>().digits<4>();
// We use > here instead of >>, because once we see \u, we know that
// exactly four hex digits must follow -- no other production rule starts

View File

@@ -88,10 +88,12 @@ namespace boost::parser::detail { namespace text {
{
V base_ = V();
template<bool Const>
class iterator;
// HACK: SentType is here to work around irritating big-3
// implementation inconsistencies.
template<bool Const>
class sentinel;
template<bool Const, typename SentType = sentinel<Const>>
class iterator;
public:
constexpr project_view()
@@ -140,7 +142,7 @@ namespace boost::parser::detail { namespace text {
#else
template<typename V, typename F>
#endif
template<bool Const>
template<bool Const, typename SentType>
class project_view<V, F>::iterator
: public boost::parser::detail::stl_interfaces::proxy_iterator_interface<
iterator<Const>,
@@ -161,7 +163,7 @@ namespace boost::parser::detail { namespace text {
decltype(detail::function_for_tag<F>(0))
#endif
;
using sentinel = project_view<V, F>::sentinel<Const>;
using sentinel = SentType;
friend boost::parser::detail::stl_interfaces::access;
iterator_type & base_reference() noexcept { return it_; }
@@ -169,7 +171,7 @@ namespace boost::parser::detail { namespace text {
iterator_type it_ = iterator_type();
friend project_view<V, F>::sentinel<Const>;
friend project_view<V, F>::template sentinel<Const>;
template<bool OtherConst>
#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS

View File

@@ -1548,7 +1548,7 @@ namespace boost { namespace parser {
{
std::optional<T> retval;
if (success)
retval = x;
retval = std::move(x);
return retval;
}
@@ -2715,20 +2715,28 @@ namespace boost { namespace parser {
}
}
template<typename I, typename S, typename T>
std::optional<T>
if_full_parse(I & first, S last, std::optional<T> retval)
template<typename I, typename S, typename ErrorHandler, typename T>
T if_full_parse(
I initial_first,
I & first,
S last,
ErrorHandler const & error_handler,
T retval)
{
if (first != last)
retval = std::nullopt;
return retval;
}
template<typename I, typename S>
bool if_full_parse(I & first, S last, bool retval)
{
if (first != last)
retval = false;
return retval;
if (first != last) {
if (retval && error_handler(
initial_first,
last,
parse_error<I>(first, "end of input")) ==
error_handler_result::rethrow) {
throw;
}
if constexpr (std::is_same_v<T, bool>)
retval = false;
else
retval = std::nullopt;
}
return std::move(retval);
}
// The notion of comaptibility is that, given a parser with the
@@ -2801,6 +2809,8 @@ namespace boost { namespace parser {
{
if constexpr (is_nope_v<ParserAttr>) {
return nope{};
} else if constexpr (is_optional_v<ParserAttr>) {
return ParserAttr{};
} else {
using value_type = range_value_t<GivenContainerAttr>;
return std::conditional_t<
@@ -5401,9 +5411,28 @@ namespace boost { namespace parser {
if constexpr (CanUseCallbacks && Context::use_callbacks) {
call(first, last, context, skip, flags, success);
} else {
auto attr = call(first, last, context, skip, flags, success);
if (success)
detail::assign(retval, std::move(attr));
locals_type locals = detail::make_locals<locals_type>(context);
auto params = detail::resolve_rule_params(context, params_);
tag_type * const tag_ptr = nullptr;
auto const rule_context = detail::make_rule_context(
context, tag_ptr, retval, locals, params);
[[maybe_unused]] auto _ = detail::scoped_trace(
*this, first, last, rule_context, flags, retval);
bool dont_assign = false;
parse_rule(
tag_ptr,
first,
last,
rule_context,
skip,
flags,
success,
dont_assign,
retval);
if (!success || dont_assign)
retval = Attribute_();
}
}
@@ -5413,6 +5442,29 @@ namespace boost { namespace parser {
#endif
namespace detail {
template<typename T>
using base_member_function_template_expr =
decltype(std::declval<T>().template base<2>());
template<typename T>
constexpr bool has_base_member_function_template_v =
is_detected_v<base_member_function_template_expr, T>;
template<typename T>
using has_digits1_member_function_template_expr =
decltype(std::declval<T>().template digits<1>());
template<typename T>
constexpr bool has_digits1_member_function_template_v =
is_detected_v<has_digits1_member_function_template_expr, T>;
template<typename T>
using has_digits2_member_function_template_expr =
decltype(std::declval<T>().template digits<1, 2>());
template<typename T>
constexpr bool has_digits2_member_function_template_v =
is_detected_v<has_digits2_member_function_template_expr, T>;
}
// Parser interface.
template<typename Parser, typename GlobalState, typename ErrorHandler>
@@ -5752,7 +5804,7 @@ namespace boost { namespace parser {
return parser_.call(first, last, context, skip, flags, success);
}
/** Applies `parser_`, assiging the parsed attribute, if any, to
/** Applies `parser_`, assinging the parsed attribute, if any, to
`attr`, unless the attribute is reported via callback. */
template<
typename Iter,
@@ -5772,6 +5824,60 @@ namespace boost { namespace parser {
parser_.call(first, last, context, skip, flags, success, attr);
}
/** Returns a new `parser_interface` constructed from
`parser_.base<Radix2>()`. Note that this only works for integral
numeric parsers like `int_` and `uint_`. */
template<int Radix2>
constexpr auto base() const noexcept
{
if constexpr (detail::has_base_member_function_template_v<
parser_type>) {
return parser::parser_interface{
parser_.template base<Radix2>()};
} else {
static_assert(
detail::has_base_member_function_template_v<parser_type>,
"Only certain parsers have a .base<>() member function. "
"This is not one of them.");
}
}
/** Returns a new `parser_interface` constructed from
`parser_.digits<Digits>()`. Note that this only works for
integral numeric parsers like `int_` and `uint_`. */
template<int Digits>
constexpr auto digits() const noexcept
{
if constexpr (detail::has_digits1_member_function_template_v<
parser_type>) {
return parser::parser_interface{
parser_.template digits<Digits>()};
} else {
static_assert(
detail::has_digits1_member_function_template_v<parser_type>,
"Only certain parsers have a .base<>() member function. "
"This is not one of them.");
}
}
/** Returns a new `parser_interface` constructed from
`parser_.digits<MinDigits2, MaxDigits2>()`. Note that this only
works for integral numeric parsers like `int_` and `uint_`. */
template<int MinDigits2, int MaxDigits2>
constexpr auto digits() const noexcept
{
if constexpr (detail::has_digits2_member_function_template_v<
parser_type>) {
return parser::parser_interface{
parser_.template digits<MinDigits2, MaxDigits2>()};
} else {
static_assert(
detail::has_digits2_member_function_template_v<parser_type>,
"Only certain parsers have a .base<>() member function. "
"This is not one of them.");
}
}
parser_type parser_;
global_state_type globals_;
error_handler_type error_handler_;
@@ -6063,7 +6169,7 @@ namespace boost { namespace parser {
SkipParser const & skip, \
boost::parser::detail::flags flags, \
bool & success, \
bool & dont_assign, \
bool & /*dont_assign*/, \
Attribute & retval) \
{ \
auto const & parser = BOOST_PARSER_PP_CAT(rule_name_, _def); \
@@ -7830,7 +7936,7 @@ namespace boost { namespace parser {
lower;
/** The lower case character parser. Matches the full set of Unicode
lower case code points (class "Lu"). */
upper case code points (class "Lu"). */
inline BOOST_PARSER_ALGO_CONSTEXPR
parser_interface<char_set_parser<detail::upper_case_chars>>
upper;
@@ -7918,7 +8024,11 @@ namespace boost { namespace parser {
typename Expected>
struct uint_parser
{
static_assert(2 <= Radix && Radix <= 36, "Unsupported radix.");
static_assert(
Radix == 2 || Radix == 8 || Radix == 10 || Radix == 16,
"Unsupported radix.");
static_assert(1 <= MinDigits);
static_assert(MaxDigits == -1 || MinDigits <= MaxDigits);
constexpr uint_parser() {}
explicit constexpr uint_parser(Expected expected) : expected_(expected)
@@ -7984,6 +8094,33 @@ namespace boost { namespace parser {
return parser_interface{parser_t{expected}};
}
/** Returns a `uint_parser` identical to `*this`, except that it
parses digits as base-`Radix2` instead of base-`Radix`. */
template<int Radix2>
constexpr auto base() const noexcept
{
return uint_parser<T, Radix2, MinDigits, MaxDigits, Expected>{
expected_};
}
/** Returns a `uint_parser` identical to `*this`, except that it only
accepts numbers exactly `Digits` digits. */
template<int Digits>
constexpr auto digits() const noexcept
{
return uint_parser<T, Radix, Digits, Digits, Expected>{expected_};
}
/** Returns a `uint_parser` identical to `*this`, except that it
only accepts numbers `D` digits long, where `D` is in
[`MinDigits2`, MaxDigits2`]. */
template<int MinDigits2, int MaxDigits2>
constexpr auto digits() const noexcept
{
return uint_parser<T, Radix, MinDigits2, MaxDigits2, Expected>{
expected_};
}
Expected expected_;
};
@@ -8031,6 +8168,8 @@ namespace boost { namespace parser {
static_assert(
Radix == 2 || Radix == 8 || Radix == 10 || Radix == 16,
"Unsupported radix.");
static_assert(1 <= MinDigits);
static_assert(MaxDigits == -1 || MinDigits <= MaxDigits);
constexpr int_parser() {}
explicit constexpr int_parser(Expected expected) : expected_(expected)
@@ -8096,6 +8235,33 @@ namespace boost { namespace parser {
return parser_interface{parser_t{expected}};
}
/** Returns an `int_parser` identical to `*this`, except that it
parses digits as base-`Radix2` instead of base-`Radix`. */
template<int Radix2>
constexpr auto base() const noexcept
{
return int_parser<T, Radix2, MinDigits, MaxDigits, Expected>{
expected_};
}
/** Returns an `int_parser` identical to `*this`, except that it only
accepts numbers exactly `Digits` digits. */
template<int Digits>
constexpr auto digits() const noexcept
{
return int_parser<T, Radix, Digits, Digits, Expected>{expected_};
}
/** Returns an `int_parser` identical to `*this`, except that it
only accepts numbers `D` digits long, where `D` is in
[`MinDigits2`, MaxDigits2`]. */
template<int MinDigits2, int MaxDigits2>
constexpr auto digits() const noexcept
{
return int_parser<T, Radix, MinDigits2, MaxDigits2, Expected>{
expected_};
}
Expected expected_;
};
@@ -8817,9 +8983,12 @@ namespace boost { namespace parser {
auto r_ = detail::make_input_subrange(r);
auto first = r_.begin();
auto const last = r_.end();
auto const initial_first = first;
return reset = detail::if_full_parse(
initial_first,
first,
last,
parser.error_handler_,
parser::prefix_parse(first, last, parser, attr, trace_mode));
}
@@ -8922,8 +9091,13 @@ namespace boost { namespace parser {
auto r_ = detail::make_input_subrange(r);
auto first = r_.begin();
auto const last = r_.end();
auto const initial_first = first;
return detail::if_full_parse(
first, last, parser::prefix_parse(first, last, parser, trace_mode));
initial_first,
first,
last,
parser.error_handler_,
parser::prefix_parse(first, last, parser, trace_mode));
}
/** Parses `[first, last)` using `parser`, skipping all input recognized
@@ -9058,9 +9232,12 @@ namespace boost { namespace parser {
auto r_ = detail::make_input_subrange(r);
auto first = r_.begin();
auto const last = r_.end();
auto const initial_first = first;
return reset = detail::if_full_parse(
initial_first,
first,
last,
parser.error_handler_,
parser::prefix_parse(
first, last, parser, skip, attr, trace_mode));
}
@@ -9169,9 +9346,12 @@ namespace boost { namespace parser {
auto r_ = detail::make_input_subrange(r);
auto first = r_.begin();
auto const last = r_.end();
auto const initial_first = first;
return detail::if_full_parse(
initial_first,
first,
last,
parser.error_handler_,
parser::prefix_parse(first, last, parser, skip, trace_mode));
}
@@ -9287,9 +9467,12 @@ namespace boost { namespace parser {
auto r_ = detail::make_input_subrange(r);
auto first = r_.begin();
auto const last = r_.end();
auto const initial_first = first;
return detail::if_full_parse(
initial_first,
first,
last,
parser.error_handler_,
parser::callback_prefix_parse(first, last, parser, callbacks));
}
@@ -9423,9 +9606,12 @@ namespace boost { namespace parser {
auto r_ = detail::make_input_subrange(r);
auto first = r_.begin();
auto const last = r_.end();
auto const initial_first = first;
return detail::if_full_parse(
initial_first,
first,
last,
parser.error_handler_,
parser::callback_prefix_parse(
first, last, parser, skip, callbacks, trace_mode));
}

View File

@@ -404,8 +404,8 @@ namespace boost { namespace parser {
and at most `MaxDigits`, producing an attribute of type `T`. Fails on
any other input. The parse will also fail if `Expected` is anything
but `detail::nope` (which it is by default), and the produced
attribute is not equal to `expected_`. `Radix` must be in `[2,
36]`. */
attribute is not equal to `expected_`. `Radix` must be one of `2`,
`8`, `10`, or `16`. */
template<
typename T,
int Radix = 10,

View File

@@ -4,5 +4,6 @@
"authors": [ "T. Zachary Laine" ],
"maintainers": [ "Zach Laine <whatwasthataddress -at- gmail.com>" ],
"description": "A parser combinator library.",
"category": [ "Parsing" ]
"category": [ "Parsing" ],
"cxxstd": "17"
}

View File

@@ -8,6 +8,7 @@ import testing ;
project
: requirements <library>/boost/charconv//boost_charconv
<library>/boost/parser//boost_parser
;
compile compile_all_t.cpp ;

View File

@@ -258,6 +258,92 @@ void github_issue_209()
std::end(bp::detail::char_set<detail::upper_case_chars>::chars)));
}
void github_issue_223()
{
namespace bp = boost::parser;
// failing case
{
std::vector<char> v;
const auto parser = *('x' | bp::char_('y'));
bp::parse("xy", parser, bp::ws, v);
BOOST_TEST(v.size() == 1);
BOOST_TEST(v == std::vector<char>({'y'}));
// the assert fails since there are two elements in the vector: '\0'
// and 'y'. Seems pretty surprising to me
}
// working case
{
const auto parser = *('x' | bp::char_('y'));
const auto result = bp::parse("xy", parser, bp::ws);
BOOST_TEST(result->size() == 1);
BOOST_TEST(*(*result)[0] == 'y');
// success, the vector has only one 'y' element
}
}
namespace github_issue_248_ {
namespace bp = boost::parser;
static constexpr bp::rule<struct symbol, int> symbol = "//";
static constexpr bp::rule<struct vector, std::vector<int>> list =
"<int>(,<int>)*";
static constexpr bp::rule<struct working, std::vector<int>> working =
"working";
static constexpr bp::rule<struct failing, std::vector<int>> failing =
"failing";
static auto const symbol_def = bp::symbols<int>{{"//", 0}};
static constexpr auto list_def = bp::int_ % ',';
static constexpr auto working_def = -symbol >> (bp::int_ % ',');
static constexpr auto failing_def = -symbol >> list;
BOOST_PARSER_DEFINE_RULES(symbol, list, working, failing);
}
void github_issue_248()
{
namespace bp = boost::parser;
using namespace github_issue_248_;
{
auto const result = bp::parse("//1,2,3", working, bp::ws);
auto const expected = std::vector<int>{0, 1, 2, 3};
BOOST_TEST(result.has_value());
bool const equal = std::equal(
result->begin(), result->end(), expected.begin(), expected.end());
BOOST_TEST(equal);
if (!equal) {
std::cout << "contents of *result:\n";
for (auto x : *result) {
std::cout << x << '\n';
}
std::cout << '\n';
}
}
{
auto const result = bp::parse("//1,2,3", failing, bp::ws);
auto const expected = std::vector<int>{0, 1, 2, 3};
BOOST_TEST(result.has_value());
bool const equal = std::equal(
result->begin(), result->end(), expected.begin(), expected.end());
BOOST_TEST(equal);
if (!equal) {
std::cout << "contents of *result:\n";
for (auto x : *result) {
std::cout << x << '\n';
}
std::cout << '\n';
}
}
}
int main()
{
@@ -268,5 +354,7 @@ int main()
github_issue_90();
github_issue_125();
github_issue_209();
github_issue_223();
github_issue_248();
return boost::report_errors();
}

View File

@@ -292,6 +292,15 @@ int main()
}
BOOST_TEST(parse(str, parser_1));
BOOST_TEST(!parse(str, parser_2));
{
BOOST_TEST(!parse(str, char_));
std::ostringstream err, warn;
stream_error_handler eh("", err, warn);
BOOST_TEST(!parse(str, with_error_handler(char_, eh)));
BOOST_TEST(
err.str() ==
"1:1: error: Expected end of input here:\nab\n ^\n");
}
}
{
std::string str = "ab";

View File

@@ -323,8 +323,7 @@ int main()
assert(result1);
std::cout << *result1 << "\n"; // Prints: some text
auto result2 =
bp::parse("\"some \\\"text\\\"\"", bp::quoted_string, bp::ws);
auto result2 = bp::parse(R"("some \"text\"")", bp::quoted_string, bp::ws);
assert(result2);
std::cout << *result2 << "\n"; // Prints: some "text"
//]

View File

@@ -316,7 +316,7 @@ int main()
add_parser >> roman_numerals >> next_delete_parser >>
roman_numerals);
BOOST_TEST(result);
BOOST_TEST(*result == std::tuple(100, 100));
BOOST_TEST(*result == detail::hl::make_tuple(100, 100));
}
{