mirror of
https://github.com/boostorg/parser.git
synced 2026-01-20 04:42:22 +00:00
Compare commits
21 Commits
boost-1.88
...
boost-1.89
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c674e94c3d | ||
|
|
84ee288b02 | ||
|
|
39faa9ddbe | ||
|
|
b2927abc6c | ||
|
|
5d6d2f7b84 | ||
|
|
fd6c56df1b | ||
|
|
af41e6a7c2 | ||
|
|
0b93a586f1 | ||
|
|
ed9a06123b | ||
|
|
8ff46f394a | ||
|
|
8c9ad7bdb3 | ||
|
|
d8abe8f29e | ||
|
|
810adb43f6 | ||
|
|
5788fb6967 | ||
|
|
ec7df8a0af | ||
|
|
a93a1d2647 | ||
|
|
927f35f115 | ||
|
|
87617fdec0 | ||
|
|
ead639e630 | ||
|
|
a3ca1193b2 | ||
|
|
07153117ff |
24
build.jam
Normal file
24
build.jam
Normal file
@@ -0,0 +1,24 @@
|
||||
# Copyright René Ferdinand Rivera Morell 2025
|
||||
# Distributed under the Boost Software License, Version 1.0.
|
||||
# (See accompanying file LICENSE_1_0.txt or copy at
|
||||
# http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
require-b2 5.2 ;
|
||||
|
||||
constant boost_dependencies :
|
||||
/boost/assert//boost_assert
|
||||
/boost/charconv//boost_charconv
|
||||
/boost/hana//boost_hana
|
||||
/boost/type_index//boost_type_index ;
|
||||
|
||||
project /boost/parser
|
||||
;
|
||||
|
||||
explicit
|
||||
[ alias boost_parser : : :
|
||||
: <library>$(boost_dependencies) <include>include ]
|
||||
[ alias all : boost_parser test ]
|
||||
;
|
||||
|
||||
call-if : boost-library parser
|
||||
;
|
||||
@@ -39,7 +39,7 @@ rule run_doxygen ( files * : name : expand ? )
|
||||
|
||||
}
|
||||
|
||||
run_doxygen [ glob $(here)/../../../boost/parser/*.hpp : $(here)/../../../boost/parser/concepts.hpp ] : "Headers" ;
|
||||
run_doxygen [ glob $(here)/../include/boost/parser/*.hpp : $(here)/../include/boost/parser/concepts.hpp ] : "Headers" ;
|
||||
|
||||
install images_standalone : [ glob *.png ] : <location>html/parser/img ;
|
||||
explicit images_standalone ;
|
||||
|
||||
@@ -325,4 +325,24 @@ always equal to `A()` if the parser fails. It is equal to whatever the parser
|
||||
sets it to _emdash_ or its previous value, if the parser does not mutate it
|
||||
_emdash_ if the parse succeeds.
|
||||
|
||||
[heading There are no _Spirit_-style character class parsers]
|
||||
|
||||
_Spirit_ has these character class parsers that recognize the same set of
|
||||
characters as the C standard library's character class functions. For
|
||||
instance, _Spirit_'s `alnum` recognizes the characters recognized by
|
||||
`std::isalnum()`, its `punct` recognizes the characters recognized by
|
||||
`std::ispunct()`, etc.
|
||||
|
||||
The problem with this is that those `std::is*()` functions are badly broken.
|
||||
They do not even work correctly for ASCII values. This is because they use
|
||||
the C standard library's locale mechanism, which can be set to anything the
|
||||
current platform supports, and can be set by any code anywhere in your
|
||||
program; the locale is mutable global state. So, even if you use the default
|
||||
"C locale in your program, if you link against a library that sets the locale
|
||||
to something that breaks ASCII character recognition (an EBCDIC locale, for
|
||||
instance), your program is now incorrect, regardless of the code you wrote.
|
||||
|
||||
For this reason, I firmly believe that no one, anywhere, should use those C
|
||||
functions in production code, and I am not supporting their use via _Parser_.
|
||||
|
||||
[endsect]
|
||||
|
||||
@@ -230,7 +230,7 @@ the input they match unless otherwise stated in the table below.]
|
||||
[[ _ui_ ]
|
||||
[ Matches an unsigned integral value. ]
|
||||
[ `unsigned int` ]
|
||||
[]]
|
||||
[ To specify a base/radix of `N`, use _ui_`.base<N>()`. To specify exactly `D` digits, use _ui_`.digits<D>()`. To specify a minimum of `LO` digits and a maximum of `HI` digits, use _ui_`.digits<LO, HI>()`. These calls can be chained, as in _ui_`.base<2>().digits<8>()`. ]]
|
||||
|
||||
[[ `_ui_(arg0)` ]
|
||||
[ Matches exactly the unsigned integral value `_RES_np_(arg0)`. ]
|
||||
@@ -270,7 +270,7 @@ the input they match unless otherwise stated in the table below.]
|
||||
[[ _i_ ]
|
||||
[ Matches a signed integral value. ]
|
||||
[ `int` ]
|
||||
[]]
|
||||
[ To specify a base/radix of `N`, use _i_`.base<N>()`. To specify exactly `D` digits, use _i_`.digits<D>()`. To specify a minimum of `LO` digits and a maximum of `HI` digits, use _i_`.digits<LO, HI>()`. These calls can be chained, as in _i_`.base<2>().digits<8>()`. ]]
|
||||
|
||||
[[ `_i_(arg0)` ]
|
||||
[ Matches exactly the signed integral value `_RES_np_(arg0)`. ]
|
||||
@@ -505,7 +505,7 @@ attribute type is `char32_t`:
|
||||
static_assert(std::is_same_v<decltype(result), std::optional<char32_t>>));
|
||||
|
||||
The good news is that usually you don't parse characters individually. When
|
||||
you parse with _ch_, you usually parse repetition of then, which will produce
|
||||
you parse with _ch_, you usually parse repetition of them, which will produce
|
||||
a _std_str_, regardless of whether you're in Unicode parsing mode or not. If
|
||||
you do need to parse individual characters, and want to lock down their
|
||||
attribute type, you can use _cp_ and/or _cu_ to enforce a non-polymorphic
|
||||
@@ -551,7 +551,7 @@ tables below:
|
||||
[[`p1 || p2`] [`_bp_tup_<_ATTR_np_(p1), _ATTR_np_(p2)>`]]
|
||||
[[`p1 || p2 || p3`] [`_bp_tup_<_ATTR_np_(p1), _ATTR_np_(p2), _ATTR_np_(p3)>`]]
|
||||
|
||||
[[`p1 % p2`] [`std::string` if `_ATTR_np_(p)` is `char` or `char32_t`, otherwise `std::vector<_ATTR_np_(p1)>`]]
|
||||
[[`p1 % p2`] [`std::string` if `_ATTR_np_(p1)` is `char` or `char32_t`, otherwise `std::vector<_ATTR_np_(p1)>`]]
|
||||
|
||||
[[`p[a]`] [None.]]
|
||||
|
||||
|
||||
117
doc/tutorial.qbk
117
doc/tutorial.qbk
@@ -584,7 +584,7 @@ things:
|
||||
|
||||
* This rule object itself is called `doubles`.
|
||||
|
||||
* We've given `doubles` the diagnstic text `"doubles"` so that _Parser_ knows
|
||||
* We've given `doubles` the diagnostic text `"doubles"` so that _Parser_ knows
|
||||
how to refer to it when producing a trace of the parser during debugging.
|
||||
|
||||
Ok, so if `doubles` is a parser, what does it do? We define the rule's
|
||||
@@ -828,7 +828,7 @@ the same character must be used on both sides.
|
||||
[quoted_string_example_4]
|
||||
|
||||
Another common thing to do in a quoted string parser is to recognize escape
|
||||
sequences. If you have simple escape sequencecs that do not require any real
|
||||
sequences. If you have simple escape sequences that do not require any real
|
||||
parsing, like say the simple escape sequences from C++, you can provide a
|
||||
_symbols_ object as well. The template parameter `T` to _symbols_t_ must be
|
||||
`char` or `char32_t`. You don't need to include the escaped backslash or the
|
||||
@@ -837,10 +837,10 @@ escaped quote character, since those always work.
|
||||
[quoted_string_example_5]
|
||||
|
||||
Additionally, with each of the forms shown above, you can optionally provide a
|
||||
parser as a final argument, to will be used to parse each character inside the
|
||||
quotes. You have to provide an actual full parser here; you cannot provide a
|
||||
character or string literal. If you do not provide a character parser, _ch_
|
||||
is used.
|
||||
parser as a final argument, which will be used to parse each character inside
|
||||
the quotes. You have to provide an actual full parser here; you cannot
|
||||
provide a character or string literal. If you do not provide a character
|
||||
parser, _ch_ is used.
|
||||
|
||||
[quoted_string_example_6]
|
||||
|
||||
@@ -1159,7 +1159,7 @@ erase and clear for the current parse, and another that applies only to
|
||||
subsequent parses. The full set of operations can be found in the _symbols_
|
||||
API docs.
|
||||
|
||||
[mpte There are two versions of each of the _symbols_ `*_for_next_parse()`
|
||||
[note There are two versions of each of the _symbols_ `*_for_next_parse()`
|
||||
functions _emdash_ one that takes a context, and one that does not. The one
|
||||
with the context is meant to be used within a semantic action. The one
|
||||
without the context is for use outside of any parse.]
|
||||
@@ -1248,22 +1248,25 @@ these parsers is in a subsequent section. The attributes are repeated here so
|
||||
you can use see all the properties of the parsers in one place.]
|
||||
|
||||
If you have an integral type `IntType` that is not covered by any of the
|
||||
_Parser_ parsers, you can use a more verbose declaration to declare a parser
|
||||
for `IntType`. If `IntType` were unsigned, you would use `uint_parser`. If
|
||||
it were signed, you would use `int_parser`. For example:
|
||||
_Parser_ parsers, you can explicitly specify a base/radix or bounds on the
|
||||
number of digits. You do this by calling the `base()` and `digits()` member
|
||||
functions on an existing parser of the right integral type. So if `IntType`
|
||||
were unsigned, you would use `uint_`. If it were signed, you would use
|
||||
`int_`. For example:
|
||||
|
||||
constexpr parser_interface<int_parser<IntType>> hex_int;
|
||||
constexpr auto hex_int = bp::uint_.base<16>();
|
||||
|
||||
`uint_parser` and `int_parser` accept three more non-type template parameters
|
||||
after the type parameter. They are `Radix`, `MinDigits`, and `MaxDigits`.
|
||||
`Radix` defaults to `10`, `MinDigits` to `1`, and `MaxDigits` to `-1`, which
|
||||
is a sentinel value meaning that there is no max number of digits.
|
||||
You simply chain together the constraints you want to use, like
|
||||
`.base<16>().digits<2>()` or .digits<4>().base<8>()`.
|
||||
|
||||
So, if you wanted to parse exactly eight hexadecimal digits in a row in order
|
||||
to recognize Unicode character literals like C++ has (e.g. `\Udeadbeef`), you
|
||||
could use this parser for the digits at the end:
|
||||
|
||||
constexpr parser_interface<uint_parser<unsigned int, 16, 8, 8>> hex_int;
|
||||
constexpr auto hex_4_def = bp::uint_.base<16>().digits<8>();
|
||||
|
||||
If you want to specify an acceptable range of digits, use `.digits<LO, HI>()`.
|
||||
Both `HI` and `LO` are inclusive bounds.
|
||||
|
||||
[endsect]
|
||||
|
||||
@@ -1283,7 +1286,7 @@ parsers; we won't say much about them here.
|
||||
[heading Interaction with sequence, alternative, and permutation parsers]
|
||||
|
||||
Sequence, alternative, and permutation parsers do not nest in most cases.
|
||||
(Let's consider just sequence parsers to keep thinkgs simple, but most of this
|
||||
(Let's consider just sequence parsers to keep things simple, but most of this
|
||||
logic applies to alternative parsers as well.) `a >> b >> c` is the same as
|
||||
`(a >> b) >> c` and `a >> (b >> c)`, and they are each represented by a single
|
||||
_seq_p_ with three subparsers, `a`, `b`, and `c`. However, if something
|
||||
@@ -1692,7 +1695,7 @@ the following steps applied:
|
||||
wrapped in a `std::optional`, like `std::optional<std::variant</*...*/>>`;
|
||||
|
||||
* duplicates in the `std::variant` template parameters `<T1, T2, ... Tn>` are
|
||||
removed; every type that appears does so exacly once;
|
||||
removed; every type that appears does so exactly once;
|
||||
|
||||
* if the attribute is `std::variant<T>` or `std::optional<std::variant<T>>`,
|
||||
the attribute becomes instead `T` or `std::optional<T>`, respectively; and
|
||||
@@ -2490,8 +2493,8 @@ the earlier expectation:
|
||||
]
|
||||
|
||||
Not nearly as nice. The problem is that the expectation is on `(value %
|
||||
',')`. So, even thought we gave `value` reasonable dianostic text, we put the
|
||||
text on the wrong thing. We can introduce a new rule to put the diagnstic
|
||||
',')`. So, even thought we gave `value` reasonable diagnostic text, we put the
|
||||
text on the wrong thing. We can introduce a new rule to put the diagnostic
|
||||
text in the right place.
|
||||
|
||||
namespace bp = boost::parser;
|
||||
@@ -2577,7 +2580,7 @@ Also, consider this rule:
|
||||
bp::rule<struct ints_tag, std::vector<int>> ints = "ints";
|
||||
auto const ints_def = bp::int_ >> ints | bp::eps;
|
||||
|
||||
What is the default attribute type for ints_def? It sure looks like
|
||||
What is the default attribute type for `ints_def`? It sure looks like
|
||||
`std::optional<std::vector<int>>`. Inside the evaluation of `ints`, _Parser_
|
||||
must evaluate `ints_def`, and then produce a `std::vector<int>` _emdash_ the
|
||||
return type of `ints` _emdash_ from it. How? How do you turn a
|
||||
@@ -2585,7 +2588,7 @@ return type of `ints` _emdash_ from it. How? How do you turn a
|
||||
seems obvious, but the metaprogramming that properly handles this simple
|
||||
example and the general case is certainly beyond me.
|
||||
|
||||
_Parser_ has a specific semantic for what consitutes a recursive rule. Each
|
||||
_Parser_ has a specific semantic for what constitutes a recursive rule. Each
|
||||
rule has a tag type associated with it, and if _Parser_ enters a rule with a
|
||||
certain tag `Tag`, and the currently-evaluating rule (if there is one) also
|
||||
has the tag `Tag`, then rule instance being entered is considered to be a
|
||||
@@ -2659,7 +2662,7 @@ semantics, is a lot easier to read, and is a lot less code.]
|
||||
|
||||
The _r_ template takes another template parameter we have not discussed yet.
|
||||
You can pass a third parameter `LocalState` to _r_, which will be defaulted
|
||||
csontructed by the _r_, and made available within semantic actions used in the
|
||||
constructed by the _r_, and made available within semantic actions used in the
|
||||
rule as `_locals_np_(ctx)`. This gives your rule some local state, if it
|
||||
needs it. The type of `LocalState` can be anything regular. It could be a
|
||||
single value, a struct containing multiple values, or a tuple, among others.
|
||||
@@ -3399,9 +3402,9 @@ _w_eh_ (see _p_api_). If you do not set one, _default_eh_ will be used.
|
||||
[heading How diagnostics are generated]
|
||||
|
||||
_Parser_ only generates error messages like the ones in this page at failed
|
||||
expectation points, like `a > b`, where you have successfully parsed `a`, but
|
||||
then cannot successfully parse `b`. This may seem limited to you. It's
|
||||
actually the best that we can do.
|
||||
expectation points (like `a > b`, where you have successfully parsed `a`, but
|
||||
then cannot successfully parse `b`), and at an unexpected end of input. This
|
||||
may seem limited to you. It's actually the best that we can do.
|
||||
|
||||
In order for error handling to happen other than at expectation points, we
|
||||
have to know that there is no further processing that might take place. This
|
||||
@@ -3409,21 +3412,26 @@ is true because _Parser_ has `P1 | P2 | ... | Pn` parsers ("`or_parser`s").
|
||||
If any one of these parsers `Pi` fails to match, it is not allowed to fail the
|
||||
parse _emdash_ the next one (`Pi+1`) might match. If we get to the end of the
|
||||
alternatives of the or_parser and `Pn` fails, we still cannot fail the
|
||||
top-level parse, because the `or_parser` might be a subparser within a parent
|
||||
`or_parser`.
|
||||
top-level parse, because this `or_parser` might be a subparser within a parent
|
||||
`or_parser`. The only exception to this is when: we have finished the
|
||||
top-level parse; the top-level parse is *not* a prefix parse; and there is
|
||||
still a part of the input range that is left over. In that case, there is an
|
||||
implicit expectation that the end of the parse and the end of input are the
|
||||
same location, and this implicit expectation has just been violated.
|
||||
|
||||
Ok, so what might we do? Perhaps we could at least indicate when we ran into
|
||||
end-of-input. But we cannot, for exactly the same reason already stated. For
|
||||
any parser `P`, reaching end-of-input is a failure for `P`, but not
|
||||
necessarily for the whole parse.
|
||||
Note that we cannot fail the top-level parse when we run into end-of-input.
|
||||
We cannot for exactly the same reason already stated. For any parser `P`,
|
||||
reaching end-of-input is a failure for `P`, but not necessarily for the whole
|
||||
parse.
|
||||
|
||||
Perhaps we could record the farthest point ever reached during the parse, and
|
||||
report that at the top level, if the top level parser fails. That would be
|
||||
little help without knowing which parser was active when we reached that
|
||||
point. This would require some sort of repeated memory allocation, since in
|
||||
_Parser_ the progress point of the parser is stored exclusively on the stack
|
||||
_emdash_ by the time we fail the top-level parse, all those far-reaching stack
|
||||
frames are long gone. Not the best.
|
||||
Ok, so what other kinds of error reporting might we do? Perhaps we could
|
||||
record the farthest point ever reached during the parse, and report that at
|
||||
the top level, if the top level parser fails. That would be little help
|
||||
without knowing which parser was active when we reached that point. This
|
||||
would require some sort of repeated memory allocation, since in _Parser_ the
|
||||
progress point of the parser is stored exclusively on the stack _emdash_ by
|
||||
the time we fail the top-level parse, all those far-reaching stack frames are
|
||||
long gone. Not the best.
|
||||
|
||||
Worse still, knowing how far you got in the parse and which parser was active
|
||||
is not very useful. Consider this.
|
||||
@@ -3440,15 +3448,16 @@ Was the error in the input putting the `'a'` at the beginning or putting the
|
||||
failed, and never mention `c_b`, you are potentially just steering them in the
|
||||
wrong direction.
|
||||
|
||||
All error messages must come from failed expectation points. Consider parsing
|
||||
JSON. If you open a list with `'['`, you know that you're parsing a list, and
|
||||
if the list is ill-formed, you'll get an error message saying so. If you open
|
||||
an object with `'{'`, the same thing is possible _emdash_ when missing the
|
||||
matching `'}'`, you can tell the user, "That's not an object", and this is
|
||||
useful feedback. The same thing with a partially parsed number, etc. If the
|
||||
JSON parser does not build in expectations like matched braces and brackets,
|
||||
how can _Parser_ know that a missing `'}'` is really a problem, and that no
|
||||
later parser will match the input even without the `'}'`?
|
||||
All error messages must come from failed expectation points (or unexpected end
|
||||
of input). Consider parsing JSON. If you open a list with `'['`, you know
|
||||
that you're parsing a list, and if the list is ill-formed, you'll get an error
|
||||
message saying so. If you open an object with `'{'`, the same thing is
|
||||
possible _emdash_ when missing the matching `'}'`, you can tell the user,
|
||||
"That's not an object", and this is useful feedback. The same thing with a
|
||||
partially parsed number, etc. If the JSON parser does not build in
|
||||
expectations like matched braces and brackets, how can _Parser_ know that a
|
||||
missing `'}'` is really a problem, and that no later parser will match the
|
||||
input even without the `'}'`?
|
||||
|
||||
[important The bottom line is that you should build expectation points into
|
||||
your parsers using `operator>` as much as possible.]
|
||||
@@ -3551,7 +3560,7 @@ We just define a `logging_error_handler`, and pass it by reference to _w_eh_,
|
||||
which decorates the top-level parser with the error handler. We *could not*
|
||||
have written `bp::with_error_handler(parser,
|
||||
logging_error_handler("parse.log"))`, because _w_eh_ does not accept rvalues.
|
||||
This is becuse the error handler eventually goes into the parse context. The
|
||||
This is because the error handler eventually goes into the parse context. The
|
||||
parse context only stores pointers and iterators, keeping it cheap to copy.
|
||||
|
||||
If we run the example and give it the input `"1,"`, this shows up in the log
|
||||
@@ -3599,7 +3608,7 @@ to `_trace_::off`.
|
||||
|
||||
If we trace a substantial parser, we will see a *lot* of output. Each code
|
||||
point of the input must be considered, one at a time, to see if a certain rule
|
||||
matches. An an example, let's trace a parse using the JSON parser from
|
||||
matches. As an example, let's trace a parse using the JSON parser from
|
||||
_ex_json_. The input is `"null"`. `null` is one of the types that a
|
||||
Javascript value can have; the top-level parser in the JSON parser example is:
|
||||
|
||||
@@ -3786,7 +3795,7 @@ _Parser_ seldom allocates memory. The exceptions to this are:
|
||||
which implies allocation. You can avoid this allocation by explicitly using
|
||||
a different sequence container for the attribute that does not allocate.
|
||||
`boost::container::static_vector` or C++26's `std::inplace_vector` may be
|
||||
useful as such replacements.
|
||||
useful for such replacements.
|
||||
|
||||
With the exception of allocating the name of the parser that was expected in a
|
||||
failed expectation situation, _Parser_ does not does not allocate unless you
|
||||
@@ -3863,9 +3872,9 @@ Some things to note:
|
||||
want to know how to fix their input. For either rule, the fix is the same:
|
||||
put a hexadecimal escape sequence there.
|
||||
|
||||
- `single_escaped_char` has a terrible-looking name. However, it's not really
|
||||
used as a name anywhere per se. In error messages, it works nicely, though.
|
||||
The error will be "Expected '"', '\', '/', 'b', 'f', 'n', 'r', or 't' here",
|
||||
- `single_escaped_char` has a terrible-looking name. However, it's not
|
||||
actually used as a name. In error messages, it works nicely, though. The
|
||||
error will be "Expected '"', '\', '/', 'b', 'f', 'n', 'r', or 't' here",
|
||||
which is pretty helpful.
|
||||
|
||||
[heading Have a simple test that you can run to find ill-formed-code-as-asserts]
|
||||
|
||||
@@ -126,7 +126,7 @@ namespace json {
|
||||
}
|
||||
};
|
||||
|
||||
bp::parser_interface<bp::uint_parser<uint32_t, 16, 4, 4>> const hex_4_def;
|
||||
auto const hex_4_def = boost::parser::uint_.base<16>().digits<4>();
|
||||
|
||||
auto const escape_seq_def = "\\u" > hex_4;
|
||||
|
||||
|
||||
@@ -151,12 +151,10 @@ namespace json {
|
||||
}
|
||||
};
|
||||
|
||||
// This is the verbose form of declaration for the integer and unsigned
|
||||
// integer parsers int_parser and uint_parser. In this case, we don't
|
||||
// want to use boost::parser::hex directly, since it has a variable number
|
||||
// of digits. We want to match exactly 4 digits, and this is how we
|
||||
// declare a hexadecimal parser that matches exactly 4.
|
||||
bp::parser_interface<bp::uint_parser<uint32_t, 16, 4, 4>> const hex_4_def;
|
||||
// We don't want to use boost::parser::hex directly, since it has a
|
||||
// variable number of digits. We want to match exactly 4 digits, and this
|
||||
// is how we declare a hexadecimal parser that matches exactly 4.
|
||||
auto const hex_4_def = boost::parser::uint_.base<16>().digits<4>();
|
||||
|
||||
// We use > here instead of >>, because once we see \u, we know that
|
||||
// exactly four hex digits must follow -- no other production rule starts
|
||||
|
||||
@@ -88,10 +88,12 @@ namespace boost::parser::detail { namespace text {
|
||||
{
|
||||
V base_ = V();
|
||||
|
||||
template<bool Const>
|
||||
class iterator;
|
||||
// HACK: SentType is here to work around irritating big-3
|
||||
// implementation inconsistencies.
|
||||
template<bool Const>
|
||||
class sentinel;
|
||||
template<bool Const, typename SentType = sentinel<Const>>
|
||||
class iterator;
|
||||
|
||||
public:
|
||||
constexpr project_view()
|
||||
@@ -140,7 +142,7 @@ namespace boost::parser::detail { namespace text {
|
||||
#else
|
||||
template<typename V, typename F>
|
||||
#endif
|
||||
template<bool Const>
|
||||
template<bool Const, typename SentType>
|
||||
class project_view<V, F>::iterator
|
||||
: public boost::parser::detail::stl_interfaces::proxy_iterator_interface<
|
||||
iterator<Const>,
|
||||
@@ -161,7 +163,7 @@ namespace boost::parser::detail { namespace text {
|
||||
decltype(detail::function_for_tag<F>(0))
|
||||
#endif
|
||||
;
|
||||
using sentinel = project_view<V, F>::sentinel<Const>;
|
||||
using sentinel = SentType;
|
||||
|
||||
friend boost::parser::detail::stl_interfaces::access;
|
||||
iterator_type & base_reference() noexcept { return it_; }
|
||||
@@ -169,7 +171,7 @@ namespace boost::parser::detail { namespace text {
|
||||
|
||||
iterator_type it_ = iterator_type();
|
||||
|
||||
friend project_view<V, F>::sentinel<Const>;
|
||||
friend project_view<V, F>::template sentinel<Const>;
|
||||
|
||||
template<bool OtherConst>
|
||||
#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
|
||||
|
||||
@@ -1548,7 +1548,7 @@ namespace boost { namespace parser {
|
||||
{
|
||||
std::optional<T> retval;
|
||||
if (success)
|
||||
retval = x;
|
||||
retval = std::move(x);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@@ -2715,20 +2715,28 @@ namespace boost { namespace parser {
|
||||
}
|
||||
}
|
||||
|
||||
template<typename I, typename S, typename T>
|
||||
std::optional<T>
|
||||
if_full_parse(I & first, S last, std::optional<T> retval)
|
||||
template<typename I, typename S, typename ErrorHandler, typename T>
|
||||
T if_full_parse(
|
||||
I initial_first,
|
||||
I & first,
|
||||
S last,
|
||||
ErrorHandler const & error_handler,
|
||||
T retval)
|
||||
{
|
||||
if (first != last)
|
||||
retval = std::nullopt;
|
||||
return retval;
|
||||
}
|
||||
template<typename I, typename S>
|
||||
bool if_full_parse(I & first, S last, bool retval)
|
||||
{
|
||||
if (first != last)
|
||||
retval = false;
|
||||
return retval;
|
||||
if (first != last) {
|
||||
if (retval && error_handler(
|
||||
initial_first,
|
||||
last,
|
||||
parse_error<I>(first, "end of input")) ==
|
||||
error_handler_result::rethrow) {
|
||||
throw;
|
||||
}
|
||||
if constexpr (std::is_same_v<T, bool>)
|
||||
retval = false;
|
||||
else
|
||||
retval = std::nullopt;
|
||||
}
|
||||
return std::move(retval);
|
||||
}
|
||||
|
||||
// The notion of comaptibility is that, given a parser with the
|
||||
@@ -2801,6 +2809,8 @@ namespace boost { namespace parser {
|
||||
{
|
||||
if constexpr (is_nope_v<ParserAttr>) {
|
||||
return nope{};
|
||||
} else if constexpr (is_optional_v<ParserAttr>) {
|
||||
return ParserAttr{};
|
||||
} else {
|
||||
using value_type = range_value_t<GivenContainerAttr>;
|
||||
return std::conditional_t<
|
||||
@@ -5401,9 +5411,28 @@ namespace boost { namespace parser {
|
||||
if constexpr (CanUseCallbacks && Context::use_callbacks) {
|
||||
call(first, last, context, skip, flags, success);
|
||||
} else {
|
||||
auto attr = call(first, last, context, skip, flags, success);
|
||||
if (success)
|
||||
detail::assign(retval, std::move(attr));
|
||||
locals_type locals = detail::make_locals<locals_type>(context);
|
||||
auto params = detail::resolve_rule_params(context, params_);
|
||||
tag_type * const tag_ptr = nullptr;
|
||||
auto const rule_context = detail::make_rule_context(
|
||||
context, tag_ptr, retval, locals, params);
|
||||
|
||||
[[maybe_unused]] auto _ = detail::scoped_trace(
|
||||
*this, first, last, rule_context, flags, retval);
|
||||
|
||||
bool dont_assign = false;
|
||||
parse_rule(
|
||||
tag_ptr,
|
||||
first,
|
||||
last,
|
||||
rule_context,
|
||||
skip,
|
||||
flags,
|
||||
success,
|
||||
dont_assign,
|
||||
retval);
|
||||
if (!success || dont_assign)
|
||||
retval = Attribute_();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5413,6 +5442,29 @@ namespace boost { namespace parser {
|
||||
|
||||
#endif
|
||||
|
||||
namespace detail {
|
||||
template<typename T>
|
||||
using base_member_function_template_expr =
|
||||
decltype(std::declval<T>().template base<2>());
|
||||
template<typename T>
|
||||
constexpr bool has_base_member_function_template_v =
|
||||
is_detected_v<base_member_function_template_expr, T>;
|
||||
|
||||
template<typename T>
|
||||
using has_digits1_member_function_template_expr =
|
||||
decltype(std::declval<T>().template digits<1>());
|
||||
template<typename T>
|
||||
constexpr bool has_digits1_member_function_template_v =
|
||||
is_detected_v<has_digits1_member_function_template_expr, T>;
|
||||
|
||||
template<typename T>
|
||||
using has_digits2_member_function_template_expr =
|
||||
decltype(std::declval<T>().template digits<1, 2>());
|
||||
template<typename T>
|
||||
constexpr bool has_digits2_member_function_template_v =
|
||||
is_detected_v<has_digits2_member_function_template_expr, T>;
|
||||
}
|
||||
|
||||
// Parser interface.
|
||||
|
||||
template<typename Parser, typename GlobalState, typename ErrorHandler>
|
||||
@@ -5752,7 +5804,7 @@ namespace boost { namespace parser {
|
||||
return parser_.call(first, last, context, skip, flags, success);
|
||||
}
|
||||
|
||||
/** Applies `parser_`, assiging the parsed attribute, if any, to
|
||||
/** Applies `parser_`, assinging the parsed attribute, if any, to
|
||||
`attr`, unless the attribute is reported via callback. */
|
||||
template<
|
||||
typename Iter,
|
||||
@@ -5772,6 +5824,60 @@ namespace boost { namespace parser {
|
||||
parser_.call(first, last, context, skip, flags, success, attr);
|
||||
}
|
||||
|
||||
/** Returns a new `parser_interface` constructed from
|
||||
`parser_.base<Radix2>()`. Note that this only works for integral
|
||||
numeric parsers like `int_` and `uint_`. */
|
||||
template<int Radix2>
|
||||
constexpr auto base() const noexcept
|
||||
{
|
||||
if constexpr (detail::has_base_member_function_template_v<
|
||||
parser_type>) {
|
||||
return parser::parser_interface{
|
||||
parser_.template base<Radix2>()};
|
||||
} else {
|
||||
static_assert(
|
||||
detail::has_base_member_function_template_v<parser_type>,
|
||||
"Only certain parsers have a .base<>() member function. "
|
||||
"This is not one of them.");
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns a new `parser_interface` constructed from
|
||||
`parser_.digits<Digits>()`. Note that this only works for
|
||||
integral numeric parsers like `int_` and `uint_`. */
|
||||
template<int Digits>
|
||||
constexpr auto digits() const noexcept
|
||||
{
|
||||
if constexpr (detail::has_digits1_member_function_template_v<
|
||||
parser_type>) {
|
||||
return parser::parser_interface{
|
||||
parser_.template digits<Digits>()};
|
||||
} else {
|
||||
static_assert(
|
||||
detail::has_digits1_member_function_template_v<parser_type>,
|
||||
"Only certain parsers have a .base<>() member function. "
|
||||
"This is not one of them.");
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns a new `parser_interface` constructed from
|
||||
`parser_.digits<MinDigits2, MaxDigits2>()`. Note that this only
|
||||
works for integral numeric parsers like `int_` and `uint_`. */
|
||||
template<int MinDigits2, int MaxDigits2>
|
||||
constexpr auto digits() const noexcept
|
||||
{
|
||||
if constexpr (detail::has_digits2_member_function_template_v<
|
||||
parser_type>) {
|
||||
return parser::parser_interface{
|
||||
parser_.template digits<MinDigits2, MaxDigits2>()};
|
||||
} else {
|
||||
static_assert(
|
||||
detail::has_digits2_member_function_template_v<parser_type>,
|
||||
"Only certain parsers have a .base<>() member function. "
|
||||
"This is not one of them.");
|
||||
}
|
||||
}
|
||||
|
||||
parser_type parser_;
|
||||
global_state_type globals_;
|
||||
error_handler_type error_handler_;
|
||||
@@ -6063,7 +6169,7 @@ namespace boost { namespace parser {
|
||||
SkipParser const & skip, \
|
||||
boost::parser::detail::flags flags, \
|
||||
bool & success, \
|
||||
bool & dont_assign, \
|
||||
bool & /*dont_assign*/, \
|
||||
Attribute & retval) \
|
||||
{ \
|
||||
auto const & parser = BOOST_PARSER_PP_CAT(rule_name_, _def); \
|
||||
@@ -7830,7 +7936,7 @@ namespace boost { namespace parser {
|
||||
lower;
|
||||
|
||||
/** The lower case character parser. Matches the full set of Unicode
|
||||
lower case code points (class "Lu"). */
|
||||
upper case code points (class "Lu"). */
|
||||
inline BOOST_PARSER_ALGO_CONSTEXPR
|
||||
parser_interface<char_set_parser<detail::upper_case_chars>>
|
||||
upper;
|
||||
@@ -7918,7 +8024,11 @@ namespace boost { namespace parser {
|
||||
typename Expected>
|
||||
struct uint_parser
|
||||
{
|
||||
static_assert(2 <= Radix && Radix <= 36, "Unsupported radix.");
|
||||
static_assert(
|
||||
Radix == 2 || Radix == 8 || Radix == 10 || Radix == 16,
|
||||
"Unsupported radix.");
|
||||
static_assert(1 <= MinDigits);
|
||||
static_assert(MaxDigits == -1 || MinDigits <= MaxDigits);
|
||||
|
||||
constexpr uint_parser() {}
|
||||
explicit constexpr uint_parser(Expected expected) : expected_(expected)
|
||||
@@ -7984,6 +8094,33 @@ namespace boost { namespace parser {
|
||||
return parser_interface{parser_t{expected}};
|
||||
}
|
||||
|
||||
/** Returns a `uint_parser` identical to `*this`, except that it
|
||||
parses digits as base-`Radix2` instead of base-`Radix`. */
|
||||
template<int Radix2>
|
||||
constexpr auto base() const noexcept
|
||||
{
|
||||
return uint_parser<T, Radix2, MinDigits, MaxDigits, Expected>{
|
||||
expected_};
|
||||
}
|
||||
|
||||
/** Returns a `uint_parser` identical to `*this`, except that it only
|
||||
accepts numbers exactly `Digits` digits. */
|
||||
template<int Digits>
|
||||
constexpr auto digits() const noexcept
|
||||
{
|
||||
return uint_parser<T, Radix, Digits, Digits, Expected>{expected_};
|
||||
}
|
||||
|
||||
/** Returns a `uint_parser` identical to `*this`, except that it
|
||||
only accepts numbers `D` digits long, where `D` is in
|
||||
[`MinDigits2`, MaxDigits2`]. */
|
||||
template<int MinDigits2, int MaxDigits2>
|
||||
constexpr auto digits() const noexcept
|
||||
{
|
||||
return uint_parser<T, Radix, MinDigits2, MaxDigits2, Expected>{
|
||||
expected_};
|
||||
}
|
||||
|
||||
Expected expected_;
|
||||
};
|
||||
|
||||
@@ -8031,6 +8168,8 @@ namespace boost { namespace parser {
|
||||
static_assert(
|
||||
Radix == 2 || Radix == 8 || Radix == 10 || Radix == 16,
|
||||
"Unsupported radix.");
|
||||
static_assert(1 <= MinDigits);
|
||||
static_assert(MaxDigits == -1 || MinDigits <= MaxDigits);
|
||||
|
||||
constexpr int_parser() {}
|
||||
explicit constexpr int_parser(Expected expected) : expected_(expected)
|
||||
@@ -8096,6 +8235,33 @@ namespace boost { namespace parser {
|
||||
return parser_interface{parser_t{expected}};
|
||||
}
|
||||
|
||||
/** Returns an `int_parser` identical to `*this`, except that it
|
||||
parses digits as base-`Radix2` instead of base-`Radix`. */
|
||||
template<int Radix2>
|
||||
constexpr auto base() const noexcept
|
||||
{
|
||||
return int_parser<T, Radix2, MinDigits, MaxDigits, Expected>{
|
||||
expected_};
|
||||
}
|
||||
|
||||
/** Returns an `int_parser` identical to `*this`, except that it only
|
||||
accepts numbers exactly `Digits` digits. */
|
||||
template<int Digits>
|
||||
constexpr auto digits() const noexcept
|
||||
{
|
||||
return int_parser<T, Radix, Digits, Digits, Expected>{expected_};
|
||||
}
|
||||
|
||||
/** Returns an `int_parser` identical to `*this`, except that it
|
||||
only accepts numbers `D` digits long, where `D` is in
|
||||
[`MinDigits2`, MaxDigits2`]. */
|
||||
template<int MinDigits2, int MaxDigits2>
|
||||
constexpr auto digits() const noexcept
|
||||
{
|
||||
return int_parser<T, Radix, MinDigits2, MaxDigits2, Expected>{
|
||||
expected_};
|
||||
}
|
||||
|
||||
Expected expected_;
|
||||
};
|
||||
|
||||
@@ -8817,9 +8983,12 @@ namespace boost { namespace parser {
|
||||
auto r_ = detail::make_input_subrange(r);
|
||||
auto first = r_.begin();
|
||||
auto const last = r_.end();
|
||||
auto const initial_first = first;
|
||||
return reset = detail::if_full_parse(
|
||||
initial_first,
|
||||
first,
|
||||
last,
|
||||
parser.error_handler_,
|
||||
parser::prefix_parse(first, last, parser, attr, trace_mode));
|
||||
}
|
||||
|
||||
@@ -8922,8 +9091,13 @@ namespace boost { namespace parser {
|
||||
auto r_ = detail::make_input_subrange(r);
|
||||
auto first = r_.begin();
|
||||
auto const last = r_.end();
|
||||
auto const initial_first = first;
|
||||
return detail::if_full_parse(
|
||||
first, last, parser::prefix_parse(first, last, parser, trace_mode));
|
||||
initial_first,
|
||||
first,
|
||||
last,
|
||||
parser.error_handler_,
|
||||
parser::prefix_parse(first, last, parser, trace_mode));
|
||||
}
|
||||
|
||||
/** Parses `[first, last)` using `parser`, skipping all input recognized
|
||||
@@ -9058,9 +9232,12 @@ namespace boost { namespace parser {
|
||||
auto r_ = detail::make_input_subrange(r);
|
||||
auto first = r_.begin();
|
||||
auto const last = r_.end();
|
||||
auto const initial_first = first;
|
||||
return reset = detail::if_full_parse(
|
||||
initial_first,
|
||||
first,
|
||||
last,
|
||||
parser.error_handler_,
|
||||
parser::prefix_parse(
|
||||
first, last, parser, skip, attr, trace_mode));
|
||||
}
|
||||
@@ -9169,9 +9346,12 @@ namespace boost { namespace parser {
|
||||
auto r_ = detail::make_input_subrange(r);
|
||||
auto first = r_.begin();
|
||||
auto const last = r_.end();
|
||||
auto const initial_first = first;
|
||||
return detail::if_full_parse(
|
||||
initial_first,
|
||||
first,
|
||||
last,
|
||||
parser.error_handler_,
|
||||
parser::prefix_parse(first, last, parser, skip, trace_mode));
|
||||
}
|
||||
|
||||
@@ -9287,9 +9467,12 @@ namespace boost { namespace parser {
|
||||
auto r_ = detail::make_input_subrange(r);
|
||||
auto first = r_.begin();
|
||||
auto const last = r_.end();
|
||||
auto const initial_first = first;
|
||||
return detail::if_full_parse(
|
||||
initial_first,
|
||||
first,
|
||||
last,
|
||||
parser.error_handler_,
|
||||
parser::callback_prefix_parse(first, last, parser, callbacks));
|
||||
}
|
||||
|
||||
@@ -9423,9 +9606,12 @@ namespace boost { namespace parser {
|
||||
auto r_ = detail::make_input_subrange(r);
|
||||
auto first = r_.begin();
|
||||
auto const last = r_.end();
|
||||
auto const initial_first = first;
|
||||
return detail::if_full_parse(
|
||||
initial_first,
|
||||
first,
|
||||
last,
|
||||
parser.error_handler_,
|
||||
parser::callback_prefix_parse(
|
||||
first, last, parser, skip, callbacks, trace_mode));
|
||||
}
|
||||
|
||||
@@ -404,8 +404,8 @@ namespace boost { namespace parser {
|
||||
and at most `MaxDigits`, producing an attribute of type `T`. Fails on
|
||||
any other input. The parse will also fail if `Expected` is anything
|
||||
but `detail::nope` (which it is by default), and the produced
|
||||
attribute is not equal to `expected_`. `Radix` must be in `[2,
|
||||
36]`. */
|
||||
attribute is not equal to `expected_`. `Radix` must be one of `2`,
|
||||
`8`, `10`, or `16`. */
|
||||
template<
|
||||
typename T,
|
||||
int Radix = 10,
|
||||
|
||||
@@ -4,5 +4,6 @@
|
||||
"authors": [ "T. Zachary Laine" ],
|
||||
"maintainers": [ "Zach Laine <whatwasthataddress -at- gmail.com>" ],
|
||||
"description": "A parser combinator library.",
|
||||
"category": [ "Parsing" ]
|
||||
"category": [ "Parsing" ],
|
||||
"cxxstd": "17"
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import testing ;
|
||||
|
||||
project
|
||||
: requirements <library>/boost/charconv//boost_charconv
|
||||
<library>/boost/parser//boost_parser
|
||||
;
|
||||
|
||||
compile compile_all_t.cpp ;
|
||||
|
||||
@@ -258,6 +258,92 @@ void github_issue_209()
|
||||
std::end(bp::detail::char_set<detail::upper_case_chars>::chars)));
|
||||
}
|
||||
|
||||
void github_issue_223()
|
||||
{
|
||||
namespace bp = boost::parser;
|
||||
|
||||
// failing case
|
||||
{
|
||||
std::vector<char> v;
|
||||
const auto parser = *('x' | bp::char_('y'));
|
||||
bp::parse("xy", parser, bp::ws, v);
|
||||
|
||||
BOOST_TEST(v.size() == 1);
|
||||
BOOST_TEST(v == std::vector<char>({'y'}));
|
||||
|
||||
// the assert fails since there are two elements in the vector: '\0'
|
||||
// and 'y'. Seems pretty surprising to me
|
||||
}
|
||||
|
||||
// working case
|
||||
{
|
||||
const auto parser = *('x' | bp::char_('y'));
|
||||
const auto result = bp::parse("xy", parser, bp::ws);
|
||||
|
||||
BOOST_TEST(result->size() == 1);
|
||||
BOOST_TEST(*(*result)[0] == 'y');
|
||||
|
||||
// success, the vector has only one 'y' element
|
||||
}
|
||||
}
|
||||
|
||||
namespace github_issue_248_ {
|
||||
namespace bp = boost::parser;
|
||||
|
||||
static constexpr bp::rule<struct symbol, int> symbol = "//";
|
||||
static constexpr bp::rule<struct vector, std::vector<int>> list =
|
||||
"<int>(,<int>)*";
|
||||
static constexpr bp::rule<struct working, std::vector<int>> working =
|
||||
"working";
|
||||
static constexpr bp::rule<struct failing, std::vector<int>> failing =
|
||||
"failing";
|
||||
|
||||
static auto const symbol_def = bp::symbols<int>{{"//", 0}};
|
||||
static constexpr auto list_def = bp::int_ % ',';
|
||||
static constexpr auto working_def = -symbol >> (bp::int_ % ',');
|
||||
static constexpr auto failing_def = -symbol >> list;
|
||||
|
||||
BOOST_PARSER_DEFINE_RULES(symbol, list, working, failing);
|
||||
}
|
||||
|
||||
void github_issue_248()
|
||||
{
|
||||
namespace bp = boost::parser;
|
||||
|
||||
using namespace github_issue_248_;
|
||||
|
||||
{
|
||||
auto const result = bp::parse("//1,2,3", working, bp::ws);
|
||||
auto const expected = std::vector<int>{0, 1, 2, 3};
|
||||
BOOST_TEST(result.has_value());
|
||||
bool const equal = std::equal(
|
||||
result->begin(), result->end(), expected.begin(), expected.end());
|
||||
BOOST_TEST(equal);
|
||||
if (!equal) {
|
||||
std::cout << "contents of *result:\n";
|
||||
for (auto x : *result) {
|
||||
std::cout << x << '\n';
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
}
|
||||
{
|
||||
auto const result = bp::parse("//1,2,3", failing, bp::ws);
|
||||
auto const expected = std::vector<int>{0, 1, 2, 3};
|
||||
BOOST_TEST(result.has_value());
|
||||
bool const equal = std::equal(
|
||||
result->begin(), result->end(), expected.begin(), expected.end());
|
||||
BOOST_TEST(equal);
|
||||
if (!equal) {
|
||||
std::cout << "contents of *result:\n";
|
||||
for (auto x : *result) {
|
||||
std::cout << x << '\n';
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
@@ -268,5 +354,7 @@ int main()
|
||||
github_issue_90();
|
||||
github_issue_125();
|
||||
github_issue_209();
|
||||
github_issue_223();
|
||||
github_issue_248();
|
||||
return boost::report_errors();
|
||||
}
|
||||
|
||||
@@ -292,6 +292,15 @@ int main()
|
||||
}
|
||||
BOOST_TEST(parse(str, parser_1));
|
||||
BOOST_TEST(!parse(str, parser_2));
|
||||
{
|
||||
BOOST_TEST(!parse(str, char_));
|
||||
std::ostringstream err, warn;
|
||||
stream_error_handler eh("", err, warn);
|
||||
BOOST_TEST(!parse(str, with_error_handler(char_, eh)));
|
||||
BOOST_TEST(
|
||||
err.str() ==
|
||||
"1:1: error: Expected end of input here:\nab\n ^\n");
|
||||
}
|
||||
}
|
||||
{
|
||||
std::string str = "ab";
|
||||
|
||||
@@ -323,8 +323,7 @@ int main()
|
||||
assert(result1);
|
||||
std::cout << *result1 << "\n"; // Prints: some text
|
||||
|
||||
auto result2 =
|
||||
bp::parse("\"some \\\"text\\\"\"", bp::quoted_string, bp::ws);
|
||||
auto result2 = bp::parse(R"("some \"text\"")", bp::quoted_string, bp::ws);
|
||||
assert(result2);
|
||||
std::cout << *result2 << "\n"; // Prints: some "text"
|
||||
//]
|
||||
|
||||
@@ -316,7 +316,7 @@ int main()
|
||||
add_parser >> roman_numerals >> next_delete_parser >>
|
||||
roman_numerals);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == std::tuple(100, 100));
|
||||
BOOST_TEST(*result == detail::hl::make_tuple(100, 100));
|
||||
}
|
||||
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user