2
0
mirror of https://github.com/boostorg/parser.git synced 2026-01-20 04:42:22 +00:00

8 Commits

Author SHA1 Message Date
Zach Laine
88abb615aa Add pre_- and post_parser members to parser_modifiers; these are used as
look-ahead/-behind positive or negative assertions.

When combining parsers using op>> and op>, if either side is an expect_parser
and the other side has an open pre_-/post_parser slot, take the subparser out
of the expect_parser, and use it directly in the pre_-/post_parser slot.

See #160.
2024-12-16 19:24:39 -06:00
Zach Laine
54f8eecfe6 Change the no_case[] directive to use the parser_modifiers struct, like omit[]
does; remove no_case_parser.

See #160.
2024-12-16 19:24:39 -06:00
Zach Laine
fcd257abca Generalize with_parser_mods(), so that some functions can be recursive, and
others can be non-recursive.

See #160.
2024-12-16 19:24:39 -06:00
Zach Laine
2f7e5964a6 Apply the same sort of change as the previous two commits, but to all the
parsers not already modified.  Generalize with_parser_mods().  Remove
omit_parser, since it is now moot.

See #160.
2024-12-16 19:24:39 -06:00
Zach Laine
2fff00b3e1 Change string_parser to support the use of parser_modifiers, and change lit()
to return a parameterized string_parser instead of string_parser wrapped in an
omit_parser.

See #160.
2024-12-16 19:24:39 -06:00
Zach Laine
28e9b61735 Add a new template, parser_modifiers, that will hold common modifications to
parsers, like turning off attribute generation (replacing omit_parser).
Change char_parser to support the use of parser_modifiers, and change lit() to
return a parameterized char_parser instead of char_parser wrapped in an
omit_parser.

See #160.
2024-12-16 19:24:39 -06:00
Zach Laine
354586dd76 Github runner macos-12 -> macos-13. 2024-12-16 19:24:39 -06:00
Zach Laine
305bba875b Grooming. 2024-12-09 01:57:04 -06:00
40 changed files with 5083 additions and 10434 deletions

View File

@@ -1,4 +1,4 @@
name: macos-12 - Clang 14
name: macos-13 - Clang 14
on:
push:
@@ -16,7 +16,7 @@ jobs:
matrix:
cxx_std: [17]
runs-on: macos-12
runs-on: macos-13
steps:
- uses: actions/checkout@v4

View File

@@ -42,7 +42,6 @@
[import ../test/parser.cpp]
[import ../test/parser_rule.cpp]
[import ../test/parser_quoted_string.cpp]
[import ../test/lexer_and_parser.cpp]
[import ../include/boost/parser/concepts.hpp]
[import ../include/boost/parser/error_handling_fwd.hpp]
@@ -110,16 +109,6 @@
[def _trans_replace_vs_ [classref boost::parser::transform_replace_view `boost::parser::transform_replace_view`s]]
[def _lex_ [classref boost::parser::lexer_t `boost::parser::lexer_t`]]
[def _tok_ [classref boost::parser::token `boost::parser::token`]]
[def _toks_ [classref boost::parser::token `boost::parser::token`s]]
[def _tok_spec_ [classref boost::parser::token_spec_t `boost::parser::token_spec_t`]]
[def _tok_specs_ [classref boost::parser::token_spec_t `boost::parser::token_spec_t`s]]
[def _tok_chs_ [globalref boost::parser::token_chars `boost::parser::token_chars`]]
[def _to_tok_ [globalref boost::parser::to_tokens `boost::parser::to_tokens`]]
[def _tok_v_ [classref boost::parser::tokens_view `boost::parser::tokens_view`]]
[def _ch_id_ [globalref boost::parser::character_id `boost::parser::character_id`]]
[def _std_str_ `std::string`]
[def _std_vec_char_ `std::vector<char>`]
[def _std_vec_char32_ `std::vector<char32_t>`]
@@ -264,12 +253,6 @@
[def _udls_ [@https://en.cppreference.com/w/cpp/language/user_literal UDLs]]
[def _yaml_ [@https://yaml.org/spec/1.2/spec.html YAML 1.2]]
[def _nttp_ [@https://en.cppreference.com/w/cpp/language/template_parameters NTTP]]
[def _nttps_ [@https://en.cppreference.com/w/cpp/language/template_parameters NTTPs]]
[def _ctre_ [@https://github.com/hanickadot/compile-time-regular-expressions CTRE]]
[def _pcre_ [@https://www.pcre.org PCRE]]
[def _Spirit_ [@https://www.boost.org/doc/libs/release/libs/spirit Boost.Spirit]]
[def _spirit_reals_ [@https://www.boost.org/doc/libs/release/libs/spirit/doc/html/spirit/qi/reference/numeric/real.html real number parsers]]

View File

@@ -595,220 +595,3 @@ same attribute generation rules.
[[`p1 | p2[a] | p3`] [`std::optional<std::variant<_ATTR_np_(p1), _ATTR_np_(p3)>>`]]
]
]
[template table_token_parsers_and_their_semantics
This table lists all the _Parser_ parsers usable during token parsing. For
the callable parsers, a separate entry exists for each possible arity of
arguments. For a parser `p`, if there is no entry for `p` without arguments,
`p` is a function, and cannot itself be used as a parser; it must be called.
In the table below:
* each entry is a global object usable directly in your parsers, unless
otherwise noted;
* "code point" is used to refer to the elements of the input range, which
assumes that the parse is being done in the Unicode-aware code path (if the
parse is being done in the non-Unicode code path, read "code point" as
"`char`");
* _RES_ is a notional macro that expands to the resolution of parse argument
or evaluation of a parse predicate (see _parsers_uses_);
* "`_RES_np_(pred) == true`" is a shorthand notation for "`_RES_np_(pred)` is
contextually convertible to `bool` and `true`"; likewise for `false`;
* `c` is a character of some character type;
* `str` is a string literal of type `CharType const[]`, for some character
type `Char\Type`;
* `pred` is a parse predicate;
* `arg0`, `arg1`, `arg2`, ... are parse arguments;
* `a` is a semantic action;
* `r` is an object whose type models `parsable_range`;
* `tok` is a token parser created using _tok_spec_; and
* `p`, `p1`, `p2`, ... are parsers.
[note The definition of `parsable_range` is:
[parsable_range_concept]
]
[note Some of the parsers in this table consume no input. All parsers consume
the input they match unless otherwise stated in the table below.]
[table Token Parsers and Their Semantics
[[Parser] [Semantics] [Attribute Type] [Notes]]
[[ `tok` ]
[ Matches any token with the same ID as `tok`. ]
[ The attribute type given when specifying `tok`, or a string view if unspecified. The attribute type must be a specialization of `std::basic_string_view`, an integral type, or a floating point type. ]
[]]
[[ `tok(arg0)` ]
[ Matches exactly the value `_RES_np_(arg0)`. ]
[ The attribute type given when specifying `tok`. The attribute type must be a an integral type or a floating point type. ]
[ This case applies only when `arg0` is *not* a range. ]]
[[ `tok(r)` ]
[ Matches exactly the value `r`. ]
[ The attribute type given when specifying `tok`. The attribute type must be a specialization of `std::basic_string_view`. ]
[ This overload does *not* take parse arguments. ]]
[[ _e_ ]
[ Matches /epsilon/, the empty string. Always matches, and consumes no input. ]
[ None. ]
[ Matching _e_ an unlimited number of times creates an infinite loop, which is undefined behavior in C++. _Parser_ will assert in debug mode when it encounters `*_e_`, `+_e_`, etc (this applies to unconditional _e_ only). ]]
[[ `_e_(pred)` ]
[ Fails to match the input if `_RES_np_(pred) == false`. Otherwise, the semantics are those of _e_. ]
[ None. ]
[]]
[[ _ws_ ]
[ Matches a single whitespace code point (see note), according to the Unicode White_Space property. ]
[ None. ]
[ For more info, see the [@https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt Unicode properties]. _ws_ may consume one code point or two. It only consumes two code points when it matches `"\r\n"`. ]]
[[ _eol_ ]
[ Matches a single newline (see note), following the "hard" line breaks in the Unicode line breaking algorithm. ]
[ None. ]
[ For more info, see the [@https://unicode.org/reports/tr14 Unicode Line Breaking Algorithm]. _eol_ may consume one code point or two. It only consumes two code points when it matches `"\r\n"`. ]]
[[ _eoi_ ]
[ Matches only at the end of input, and consumes no input. ]
[ None. ]
[]]
[[ _attr_np_`(arg0)` ]
[ Always matches, and consumes no input. Generates the attribute `_RES_np_(arg0)`. ]
[ `decltype(_RES_np_(arg0))`. ]
[ An important use case for `_attr_` is to provide a default attribute value as a trailing alternative. For instance, an *optional* comma-delmited list is: `int_ % ',' | attr(std::vector<int>)`. Without the "`| attr(...)`", at least one `int_` match would be required. ]]
[[ _ch_ ]
[ Matches any single code point. ]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See _attr_gen_. ]
[ Only matches tokens with the ID _ch_id_. ]]
[[ `_ch_(arg0)` ]
[ Matches exactly the code point `_RES_np_(arg0)`. ]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See _attr_gen_. ]
[ Only matches tokens with the ID _ch_id_. ]]
[[ `_ch_(arg0, arg1)` ]
[ Matches the next code point `n` in the input, if `_RES_np_(arg0) <= n && n <= _RES_np_(arg1)`. ]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See _attr_gen_. ]
[ Only matches tokens with the ID _ch_id_. ]]
[[ `_ch_(r)` ]
[ Matches the next code point `n` in the input, if `n` is one of the code points in `r`. ]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See _attr_gen_. ]
[ `r` is taken to be in a UTF encoding. The exact UTF used depends on `r`'s element type. If you do not pass UTF encoded ranges for `r`, the behavior of _ch_ is undefined. Note that ASCII is a subset of UTF-8, so ASCII is fine. EBCDIC is not. `r` is not copied; a reference to it is taken. The lifetime of `_ch_(r)` must be within the lifetime of `r`. This overload of _ch_ does *not* take parse arguments. Only matches tokens with the ID _ch_id_. ]]
[[ _cp_ ]
[ Matches a single code point. ]
[ `char32_t` ]
[ Similar to _ch_, but with a fixed `char32_t` attribute type; _cp_ has all the same call operator overloads as _ch_, though they are not repeated here, for brevity. Only matches tokens with the ID _ch_id_. ]]
[[ _cu_ ]
[ Matches a single code point. ]
[ `char` ]
[ Similar to _ch_, but with a fixed `char` attribute type; _cu_ has all the same call operator overloads as _ch_, though they are not repeated here, for brevity. Even though the name "`cu`" suggests that this parser match at the code unit level, it does not. The name refers to the attribute type generated, much like the names _i_ versus _ui_. Only matches tokens with the ID _ch_id_. ]]
[[ `_blank_` ]
[ Equivalent to `_ws_ - _eol_`. ]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
[ Only matches tokens with the ID _ch_id_. ]]
[[ `_control_` ]
[ Matches a single control-character code point. ]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
[ Only matches tokens with the ID _ch_id_. ]]
[[ `_digit_` ]
[ Matches a single decimal digit code point. ]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
[ Only matches tokens with the ID _ch_id_. ]]
[[ `_punct_` ]
[ Matches a single punctuation code point. ]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
[ Only matches tokens with the ID _ch_id_. ]]
[[ `_hex_digit_` ]
[ Matches a single hexidecimal digit code point. ]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
[ Only matches tokens with the ID _ch_id_. ]]
[[ `_lower_` ]
[ Matches a single lower-case code point. ]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
[ Only matches tokens with the ID _ch_id_. ]]
[[ `_upper_` ]
[ Matches a single upper-case code point. ]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
[ Only matches tokens with the ID _ch_id_. ]]
[[ _lit_np_`(c)`]
[ Matches exactly the given code point `c`. ]
[ None. ]
[_lit_ does *not* take parse arguments. Only matches tokens with the ID _ch_id_. ]]
[[ `c_l` ]
[ Matches exactly the given code point `c`. ]
[ None. ]
[ This is a _udl_ that represents `_lit_np_(c)`, for example `'F'_l`. Only matches tokens with the ID _ch_id_. ]]
[[ _lit_np_`(r)`]
[ Matches exactly the given string `r`. ]
[ None. ]
[ _lit_ does *not* take parse arguments. _str_ matches the entire token or not at all. Only matches tokens with an attribute type that is a specialization of `std::basic_string_view`. ]]
[[ `str_l` ]
[ Matches exactly the given string `str`. ]
[ None. ]
[ This is a _udl_ that represents `_lit_np_(s)`, for example `"a string"_l`. Only matches tokens with an attribute type that is a specialization of `std::basic_string_view`. ]]
[[ `_str_np_(r)`]
[ Matches exactly `r`, and generates the match as an attribute. ]
[ _std_str_ ]
[ _str_ does *not* take parse arguments. _str_ matches the entire token or not at all. Only matches tokens with an attribute type that is a specialization of `std::basic_string_view`. ]]
[[ `str_p`]
[ Matches exactly `str`, and generates the match as an attribute. ]
[ _std_str_ ]
[ This is a _udl_ that represents `_str_np_(s)`, for example `"a string"_p`. Only matches tokens with an attribute type that is a specialization of `std::basic_string_view`. ]]
[[ `_rpt_np_(arg0)[p]` ]
[ Matches iff `p` matches exactly `_RES_np_(arg0)` times. ]
[ `std::string` if `_ATTR_np_(p)` is `char` or `char32_t`, otherwise `std::vector<_ATTR_np_(p)>` ]
[ The special value _inf_ may be used; it indicates unlimited repetition. `decltype(_RES_np_(arg0))` must be implicitly convertible to `int64_t`. Matching _e_ an unlimited number of times creates an infinite loop, which is undefined behavior in C++. _Parser_ will assert in debug mode when it encounters `_rpt_np_(_inf_)[_e_]` (this applies to unconditional _e_ only). ]]
[[ `_rpt_np_(arg0, arg1)[p]` ]
[ Matches iff `p` matches between `_RES_np_(arg0)` and `_RES_np_(arg1)` times, inclusively. ]
[ `std::string` if `_ATTR_np_(p)` is `char` or `char32_t`, otherwise `std::vector<_ATTR_np_(p)>` ]
[ The special value _inf_ may be used for the upper bound; it indicates unlimited repetition. `decltype(_RES_np_(arg0))` and `decltype(_RES_np_(arg1))` each must be implicitly convertible to `int64_t`. Matching _e_ an unlimited number of times creates an infinite loop, which is undefined behavior in C++. _Parser_ will assert in debug mode when it encounters `_rpt_np_(n, _inf_)[_e_]` (this applies to unconditional _e_ only). ]]
[[ `_if_np_(pred)[p]` ]
[ Equivalent to `_e_(pred) >> p`. ]
[ `std::optional<_ATTR_np_(p)>` ]
[ It is an error to write `_if_np_(pred)`. That is, it is an error to omit the conditionally matched parser `p`. ]]
[[ `_sw_np_(arg0)(arg1, p1)(arg2, p2) ...` ]
[ Equivalent to `p1` when `_RES_np_(arg0) == _RES_np_(arg1)`, `p2` when `_RES_np_(arg0) == _RES_np_(arg2)`, etc. If there is such no `argN`, the behavior of _sw_ is undefined. ]
[ `std::variant<_ATTR_np_(p1), _ATTR_np_(p2), ...>` ]
[ It is an error to write `_sw_np_(arg0)`. That is, it is an error to omit the conditionally matched parsers `p1`, `p2`, .... ]]
[[ _symbols_t_ ]
[ _symbols_ is an associative container of key, value pairs. Each key is a _std_str_ and each value has type `T`. In the Unicode parsing path, the strings are considered to be UTF-8 encoded; in the non-Unicode path, no encoding is assumed. _symbols_ Matches the longest prefix `pre` of the input that is equal to one of the keys `k`. If the length `len` of `pre` is zero, and there is no zero-length key, it does not match the input. If `len` is positive, the generated attribute is the value associated with `k`.]
[ `T` ]
[ Unlike the other entries in this table, _symbols_ is a type, not an object. ]]
]
]

View File

@@ -75,10 +75,6 @@ matches the input. _ATTR_ is a notional macro that expands to the attribute
type of the parser passed to it; `_ATTR_np_(_d_)` is `double`. This is
similar to the _attr_ type trait.
/Token parsing/ is parsing using _Parser_'s optional support for
lexing/tokenizing first, and parsing the resulting tokens, as opposed to the
normal operation of _Parser_, in which input characters are parsed.
Next, we'll look at some simple programs that parse using _Parser_. We'll
start small and build up from there.
@@ -1167,7 +1163,7 @@ without the context is for use outside of any parse.]
_Parser_ comes with all the parsers most parsing tasks will ever need. Each
one is a `constexpr` object, or a `constexpr` function. Some of the
non-functions are also callable, such as _ch_, which may be used directly, or
with arguments, as in `_ch_('a', 'z')`. Any parser that can be called,
with arguments, as in _ch_`('a', 'z')`. Any parser that can be called,
whether a function or callable object, will be called a /callable parser/ from
now on. Note that there are no nullary callable parsers; they each take one
or more arguments.
@@ -3665,452 +3661,6 @@ Some things to be aware of when looking at _Parser_ trace output:
[endsect]
[section Token parsing / Using a Lexer]
_Parser_ has optional support for lexing before parsing. The optional support
is based on an external dependency, _ctre_. _ctre_ produces a sequence of
tokens by matching a set of regexes that you provide. Each regex is used to
match against the input to produce one token with an ID associated with that
regex. When you call _p_, you pass it a lazy range of tokens that adapts the
input, and _p_ parses the tokens, not the underlying characters. When you
backtrack, you just move back to an earlier token, not an earlier place in the
underlying sequence of characters.
[heading A basic example]
Let's look at an example of how to do token parsing. First, you must include
the lexer header before the parser header.
[tokens_basics_headers]
The inclusion of this optional header is what enables token parsing.
Character parsing ("normal" parsing) is unaffected by this header inclusion
_emdash_ you can always do character parsing.
[important _ctre_ is a header-only library, and it can be included as a single
header. It requires C++20 or later, _Parser_'s support for token parsing does
as well. _Parser_ uses the single-header version with Unicode support,
`ctre-unicode.hpp`.]
Then, you define a lexer and its tokens.
[tokens_basics_lexer]
Here, we first see three _tok_specs_. Each one consists of an _nttp_ regex
string literal and an _nttp_ token ID; the first one matches `"foo"`, and has
an ID of `0`, etc. _lex_ takes two template parameters. The first parameter
indicates that the value type of the parsed input sequence is `char`. The
second one indicates that the ID-type of all subsequent _tok_specs_ will be
`int`. We create a full lexer by starting with the `lexer<...>` expression,
followed by a piped-together sequence of _tok_specs_.
The final lexer `lexer` has a combined regex string, `"(foo)|(b.*r)|(b.+z)"`.
This string is built up at compile time, and is represented by an _nttp_. It
is the single regex given to _ctre_, which _ctre_ uses to produce a sequence
of matches from it.
`lexer` and `token_spec` are variable templates; they make variables from the
templates _lex_ and _tok_spec_, respectively. The are provided as a
notational convenience, just so you don't have to put `{}` after every lexer
and token spec you write. _lex_ and _tok_spec_ are empty classes. Their
configury is stored in _nttps_.
Next, you create a range of _toks_ from your input. This range of tokens is
what _p_ will parse.
[tokens_basics_input_range]
The input must model `std::ranges::contiguous_range`. This is due to the way
_ctre_ works; it produces a sequence of matches that are convertible to
`std::basic_string_view<CharType>`. In our case, since we are lexing a
sequence of `char`, _ctre_ will produce a sequence of `std::basic_string`
matches. Note that the value type/character type we specified for _lex_ above
must match the input sequence's value type/character type, or the program is
ill-formed. Also note that because we are lexing a contiguous range of
characters, you cannot use any of the `boost::parser::as_utf*` range adaptors
when doing token parsing.
Next, you define a parser.
[tokens_basics_parser]
This has the same semantics as the character parsers you've seen in the rest
of the documentation. Each _tok_spec_ has the same interface as a parser, so
it can be used with all the parser combining operations, like `operator>>`.
However, unlike when doing character parsing, when token parsing all the
terminal parsers are restricted to a subset of the terminal parsers that are
available in character parsing (see the full list in the table below). This
is because most of the parsers in _Parser_ parse sequences of characters. For
example, if you used `_i_(42)` above instead of `foo`, the _i_ parser would
try to match two consecutive values from the input sequence, and would expect
them to equal `'4'` and `'2'`, respectively. It would instead see two tokens,
and the comparisons would not even compile.
Finally, you can put everything together in a call to _p_.
[tokens_basics_parse]
As you can see, the parse succeeded, and we got three attributes out of it.
Each attribute has the type `std::string_view`.
[heading Capture groups]
Capture groups are valid regex syntax, but you cannot use them in your
_tok_spec_ regexes. For instance, `bp::token_spec<"(foo)+", 0>` (to match one
or more consecutive `"foo"`s) will compile and run, and you will get garbage
results. _Parser_ relies on the exact number and order of capture groups to
do its token generation. If you want to group a part of your regex, use a
non-capture group, like `"(?:foo)+"`.
[heading Whitespace in token parsing]
Using the parser above, what if we tried to parse the token range `"foo baz
bar" | bp::to_tokens(lexer)` instead? Turns out, we get the same answer. You
cannot use am explicit skipper when parsing tokens. However, parsers are much
simpler when you have a notion of a skipper, especially for whitespace. So,
_lex_ has one built in; it uses `"\\s+"` by default. Whitespace is matched,
but produces no tokens. If you want to change the whitespace/skipper regex,
you can provide it when specifying the lexer. For example, here is how you
would specify the whitespace/skipped tokens to be any sequence of whitespace
characters, or any C++-style trailing comment (`// ...`).
bp::lexer<char, int, "\\s+|\\/\\/.*$">
If whitespace information is important in your parse, simply provide `""` or
the more readable convenience constant `bp::no_ws` to `lexer<>` as the
whitespace regex, and make a regular token that matches whitespace. That way,
you'll see all the whitespace in the sequence of tokens that you parse.
[heading Token attribute types]
The parser we looked at in the initial simple example produced three
`std::string_view`s, one for each token we parsed. However, we may know that
a particular token is meant to match numbers. If this is the case, we can let
_Parser_ know that we expect the token to be interpretable as a particular
type of numeric value. I'm using "numeric" for brevity, but this includes
`bool` as well. For example:
[tokens_attrs]
The attribute types for these tokens are `bool`, `std::string_view`, and
`double`, respectively. `identifier` has attribute type `std::string_view`
because that is the default if you do not specify a type.
A _tok_ is essentially a variant of `std::basic_string_view<CharType>`, `long
long`, and `long double`. The latter two types were selected because they can
fit any value of an integral or floating-point type, respectively. Even
though _tok_ effectively erases the exact type when it is integral or
floating-point, the token parser retains the information of what the exact
type is. This is why `true_false` above has an attribute type of `bool` and
not `long long`.
_ctre_ produces a sequence of substrings. Each token produced by _Parser_
gets its numeric value (if it should have one) by parsing the substring from
_ctre_ with _emdash_ you guessed it _emdash_ a _Parser_ parser. The parser
for `bool` is just _b_; the one for `int` is _i_, etc. The integral-type
parsers all support a radix/base. If you specify an integral value type for
one of your tokens, you can also specify a base, like `bp::token_spec<"\\d+",
int, 16>` to parse hex-encoded `int`s.
Part of the advantage of doing lexing before parsing is that you don't have to
reparse everything over and over again. If the subsequence `"1.23456789"` is
found in the input, you only lex it once. After that, it's already in the
right form as a floating-point number; backtracking will not provoke reparsing
of those ten characters.
[heading Single-character tokens]
Just about any parser above a certain size will have punctuation of some sort
_emdash_ elements of the input, usually a single character, that delimit other
parts of the input, like commas and braces. To make it easier to specify such
tokens, _Parser_ provides _tok_chs_. You can give _tok_chs_ a list of
individual characters, and it will create a separate, single-character regex
for each one, and add it to your lexer. Each such token will have the special
ID _ch_id_.
Note that the single character you provide must be a `char` in the ASCII range
(that is, less than `128`). If you want to use a single character that is
outside the ASCII range, just make a normal _tok_spec_ for it. Here is an
example using _tok_chs_.
[tokens_token_char]
Just like in a character parser, we can use character literals to match the
single-character tokens (`'='` and `';'` in the example above). The character
literals are turned into _ch_ parsers. _ch_ parsers that you explicitly write
may be used as well. They will only match single-character tokens, though
(that is, tokens with the ID _ch_id_).
[heading The differences between parsing characters and parsing tokens]
Even though _ch_ and _str_ (and lots of other character parsers _emdash_ see
the table below) are available when doing token parsing, their semantics are
subtly different when using for token parsing. This is because token parsing
involves parsing chunks of input as tokens, rather than individual characters.
This may sound obvious, but the implications are not. Consider this example.
[tokens_string_in_character_vs_token_parsing]
Why doesn't the token parsing case work? In the character parsing case,
_str_np_ tries to match characters from the input, one at a time; it sees
`'='` followed by `';'`, so it matches. In the token parsing case, this does
not happen. Instead, the input is broken up into two tokens (one for `'='`
and one for `';'`). `_str_np_("=;")` tries to match the first token in its
entirety, but that token is a character token, not a token with a
`std::basic_string_view` attribute. Even if that token did have
a`std::basic_string_view` attribute, it would be `"="`, not `"=;"`, and so the
match would still fail.
So, even though string matching is available using _str_, make sure you
understand that _str_ is looking for 1) a token with a string view attribute,
and 2) a full match of the token's string view against the range provided to
_str_.
_ch_ is also a bit different, since it only matches character tokens that you
make with _tok_chs_. Such tokens have the token ID _ch_id_. _ch_ will
*never* match any other kind of token. This goes for all the character
parsers (_blank_, _punct_, _upper_, etc).
The character class parsers (e.g. _punct_) are also limited in token parsing
vs. their use in character parsing. _tok_chs_ limits characters to the ASCII
range for simplicity, and to discourage parsing of sequences of tokens to find
things that are detectable using _pcre_ directly. In other words, if you need
the full set of punctuation characters, use `"\p{P}"` in one of your token
regexes, rather than trying to parse punctuation characters out of the input
using _punct_. Because _tok_chs_ limits characters to the ASCII range, all
the matching for any character class parser (like _punct_) above the ASCII
range will fail.
[important Though the string and character parsers are available, they're a
bit clunky and should be avoided in most cases. Instead, use the character
handling from the _pcre_ regex language to make the tokens you want. The best
use of string and character parsers in your _Parser_ token parsers is as
literals like `"function"`, `'='`, etc.]
One more important difference between token and character parsing is the
effect that using _lexeme_ and/or _skip_ has. If you use _lexeme_ or _skip_,
you are changing the sequence tokens that must be in the token cache. As
such, whenever you *enter* or *leave* a _lexeme_ *or* _skip_ directive, the
token cache is flushed. The flushed tokens are everything from the current
token position to the end of the cache. If you write `bp::lexeme[p]`
frequently enough in your parsers, you could be in for some very uneven
performance.
[important Though you may be used to using _lexeme_ and _skip_ in character
parsing, prefer to write explicit token regexes that have equivalent
semantics, but operating during lexing rather than during parsing.]
[heading Parsing tokens with a specific value]
So far, we've only seen examples of parsing for a particular token. Sometimes
we want to match only occurrences of a given token with a particular value,
just like when we write something like `_ch_('a', 'z')` in a character parser.
Just as with _ch_ and most other _Parser_ parsers, you can just add the value
to match in parens after the token, like `true_false(true)` or
`identifier("exact string")`.
[heading Token IDs and diagnostics]
So far, we've only seen `int` used as the token ID type. Any integral type or
enum can be used, though. There are limitations on the values you can provide
for IDs. First, the values must all be nonnegative; negative values are
reserved for use by _Parser_. Second, the values must not exceed `2^23-1`; no
one is likely to have very many unique IDs, and token storage can be reduced a
bit by using 3 bytes for the ID instead of 4.
Using an enum has the advantage of making the code a lot clearer. For
instance:
enum class token_names { foo, bar };
auto const foo = bp::token_spec<"foo", token_names::foo>;
auto const bar = bp::token_spec<"b.r", token_names::bar>;
... reads a lot better than just using IDs like `0` and `1`.
There is another important advantage related to diagnostic messages. Consider
this parse.
constexpr auto lexer = bp::lexer<char, token_names> | foo;
bp::parse("bar" | bp::to_tokens(lexer), bp::eps > foo);
Here is what the diagnostic looks like.
[pre
1:0: error: Expected tok<0> here:
bar
^
]
If we added a specific string value we expect, that would be included.
bp::parse("bar" | bp::to_tokens(lexer), bp::eps > foo("foo"));
[pre
1:0: error: Expected tok<0>("foo") here:
bar
^
]
Instead of `"tok<N>"`, it might be nice to give the failed expectation a
user-friendly name. In character parsers we usually do this by giving _rs_
user-facing diagnostic text. This makes your parse failures much easier to
understand and correct. However, many _tok_specs_ may already have a nice
name, so why not use it? If you use enumerators for you token IDs, and make
their enumeration streamable, _Parser_ will detect this, and use the streamed
enumerator instead of `"tok<N>"`. Here is what we could have written instead.
enum class printable_tokens { foo, bar };
std::ostream & operator<<(std::ostream & os, printable_tokens tok)
{
switch (tok) {
case printable_tokens::foo: os << "foo"; break;
case printable_tokens::bar: os << "bar"; break;
}
return os;
}
auto const foo = bp::token_spec<"foo", printable_tokens::foo>;
auto const bar = bp::token_spec<"b.*r", printable_tokens::bar>;
constexpr auto lexer = bp::lexer<char, printable_tokens> | foo;
bp::parse("bar" | bp::to_tokens(lexer), bp::eps > foo);
That results in the enumerator being printed instead.
[pre
1:0: error: Expected foo here:
bar
^
]
[important If you provide a streamable enumeration as the token ID type, this
enables the alternate printing behavior described above. If you specify a
particular value for the token parser, that value is printed as the expected
value. So the diagnostic name for `bp::token_spec<"\\d+", 3>(42)` is
`tok<3>(42)` but the name for `bp::token_spec<"\\d+",
printable_tokens::foo>(42)` is just `42` (not `foo`).]
The takeaway here is that you should use a streamable enumeration for your ID
type. It makes your code easier to read, and produces better diagnostics.
[heading Token caching]
Given that I told you earlier that we will make a sequence of tokens and
backtrack within those tokens, you may be wondering where the tokens are
stored. The _tok_v_ (the type created by the range adaptor _to_tok_) uses
internal storage or user-provided external storage to store the tokens as they
are generated. Here is an example of using external storage.
[tokens_caching_simple]
The cache could have been a `boost::container::small_vector<bp::token, N>`, or
even a `static_vector` of appropriate size, to reduce or eliminate memory
allocations.
Note the size of the cache after the parse; it still contains some tokens.
This is a special case of a more general phenomenon: the token cache grows
without bound when there are no expectation points. This is because, without
expectation points, backtracking is unbounded (refer to the _expect_pts_
section to see why). If you can go back arbitrarily far in order to backtrack,
you need to be sure that there will be a token at the place you backtrack to.
However, if you use expectation points, the cache is trimmed. The prefix of
tokens before the expectation point is erased from the token cache.
[tokens_caching_expectation_point]
Note the use of `std::ref()` to pass a reference to `cache`. This is
necessary because _to_tok_ uses `std::bind_back()` (or a workalike in C++17
mode). As with the other binders in `std`, it does not gracefully propagate
bare lvalue references, so you have to use `std::ref()`.
[heading Lexing failures]
Parse failures that fail the top-level parse happen only at expectation
points. Lexing failures that fail the top-level parse can happen at any point
in the input. If there is no token regex that matches the current point of
the input, we cannot continue to lex. Lexing failures are usually caused by
bad input, or failure to specify the correct set of _tok_specs_ to cover all
valid input. However, it may also be that you have written an impossible
_tok_spec_. Consider this one.
constexpr auto bad_token = bp::token_spec<"foo", 0, int>;
This _tok_spec_ can never generate a valid token. It will match `"foo"` in
the input, but then it will try to parse `"foo"` as an `int`, which is
guaranteed to fail.
The takeaway here is that a lexing failure might be due to bad input, but it
can also be the sign of a bug in one or more of your _tok_specs_.
[heading The token parsers]
Many of the parsers that work in character parsing do not work in token
parsing, because they try to parse individual characters from the input.
Token parsing only provides tokens, not characters. This table describes all
the parsers compatible with token parsing.
[table_token_parsers_and_their_semantics]
[heading Directives and token parsing]
One directive that works in character parsing does not work in token parsing
_emdash_ the argument form of _skip_. The argument to _skip_ is a new
skipper, and this cannot be changed in the middle of tokenization. The set of
tokens and their regexes are fixed at compile time. The nullary form of
_skip_ works fine; all it does is re-enable skipping that has been turned off
by _lexeme_.
[heading The token parsing API]
Not all the _p_ and _cbp_ overloads can do token parsing. In particular, the
overloads that take a skipper are precluded, since the skipper must be built
into the lexer itself (see the section above about whitespace handling for
details).
[heading _ctre_ particulars]
There are a few details you might want to know about how _ctre_ works.
_ctre_ uses _pcre_ as its regex grammar.
"Maximum munch" appears not to be the way _ctre_ tokenizes input. For
instance, if you have _tok_spec_ A that matches `"<=="` and _tok_spec_ B that
matches `"<|>|<=|>=|==|!="`, the input characters `"<=="` will be tokenized as
`"<=="` if the lexer includes `A | B`, but will be parsed as `"<"` followed by
`"=="` if the lexer includes `B | A`.
_ctre_ uses `char32_t` for all its compile time strings. If you give it a
regex string literal like `bp::token_spec<"foo", 0>` (that is, an array of
`char`), it will be interpreted in one of two ways. By default, the `char`s
are copied into an array of `char32_t`, unmodified. This is fine if you
provide an ASCII regex, or a regex in a non-Unicode encoding. However, if you
define `CTRE_STRING_IS_UTF8` before including `<boost/parser/lexer.hpp>`, the
array of `char` will be interpreted as UTF-8, and will be transcoded to UTF-32
before being stored in the array of `char32_t`. All the `charN_t` character
types will be interpreted as UTF-N encoded, and will be transcoded to UTF-32
if needed. `wchar_t` is taken to mean UTF-32 *even on Windows*. Again, all
of this transcoding happens at compile time.
[heading Error handling details]
Error handling during token parsing mostly Just Works. That is, you don't
need to know or do anything special just because you are parsing tokens.
However, the error reporting functions all operate at the level of character
input, not tokens. The higher level functions provided in _err_fwd_hpp_ and
_err_hpp_ (like `write_formatted_message()`) simply get the iterators to the
underlying range of input before doing their work. The lower-level functions
provided in _err_fwd_hpp_ and _err_hpp_ (like `find_line_position()`) do not.
Each function's API documentation specifies whether or not it does this
"normalization" to underlying iterators. If you use the lower-level API
directly in your code, you can call one of the overloads of
`normalize_iterators()` to get the underlying iterators in the token parsing
case.
[endsect]
[section Memory Allocation]
_Parser_ seldom allocates memory. The exceptions to this are:

View File

@@ -31,9 +31,9 @@ struct logging_error_handler
// and rethrow. Returning fail fails the top-level parse; returning
// rethrow just re-throws the parse_error exception that got us here in
// the first place.
template<typename Iter, typename Sentinel, template<class> class Exception>
template<typename Iter, typename Sentinel>
bp::error_handler_result
operator()(Iter first, Sentinel last, Exception<Iter> const & e) const
operator()(Iter first, Sentinel last, bp::parse_error<Iter> const & e) const
{
bp::write_formatted_expectation_failure_error_message(
ofs_, filename_, first, last, e);

View File

@@ -12,8 +12,6 @@
#if defined(BOOST_PARSER_DOXYGEN) || BOOST_PARSER_USE_CONCEPTS
#include <boost/parser/lexer_fwd.hpp>
#include <ranges>
@@ -28,19 +26,15 @@ namespace boost { namespace parser {
std::same_as<std::remove_cv_t<T>, char16_t>||
std::same_as<std::remove_cv_t<T>, char32_t>;
template<typename T>
concept token_iter = is_token_v<std::iter_value_t<T>>;
template<typename T>
concept parsable_iter =
(std::forward_iterator<T> && code_unit<std::iter_value_t<T>>) ||
token_iter<T>;
std::forward_iterator<T> && code_unit<std::iter_value_t<T>>;
//[ parsable_range_like_concept
//[ parsable_range_concept
template<typename T>
concept parsable_range = (std::ranges::forward_range<T> &&
code_unit<std::ranges::range_value_t<T>>) ||
detail::is_tokens_view_v<T>;
concept parsable_range = std::ranges::forward_range<T> &&
code_unit<std::ranges::range_value_t<T>>;
//]
template<typename T>
@@ -49,6 +43,7 @@ namespace boost { namespace parser {
template<typename T>
concept parsable_range_like = parsable_range<T> || parsable_pointer<T>;
//]
template<typename T>
concept range_like = std::ranges::range<T> || parsable_pointer<T>;

View File

@@ -59,12 +59,6 @@
also defined. */
# define BOOST_PARSER_TRACE_TO_VS_OUTPUT
/** When lexing is enabled, each token contains its position within the
underlying range. To save a bit of space, an `unsiged int` is used for
this. If you parse input sequences longer than 2^32-1 characters, define
`BOOST_PARSER_TOKEN_POSITION_TYPE` to be a larger integral type. */
# define BOOST_PARSER_TOKEN_POSITION_TYPE unsigned int
#else
# ifdef BOOST_PARSER_NO_RUNTIME_ASSERTIONS
@@ -109,10 +103,6 @@
# define BOOST_PARSER_MAX_AGGREGATE_SIZE 25
#endif
#if !defined(BOOST_PARSER_TOKEN_POSITION_TYPE)
# define BOOST_PARSER_TOKEN_POSITION_TYPE unsigned int
#endif
// VS2019 and VS2017 need conditional constexpr in some places, even in C++17 mode.
#if !defined(_MSC_VER) || 1930 <= _MSC_VER
# define BOOST_PARSER_CONSTEXPR constexpr
@@ -126,18 +116,4 @@
# define BOOST_PARSER_TRACE_OSTREAM std::cout
#endif
#if defined(_MSC_VER)
# define BOOST_PARSER_DIAGNOSTIC_PUSH __pragma(warning(push))
# define BOOST_PARSER_DIAGNOSTIC_POP __pragma(warning(pop))
#elif defined(__clang_major__)
# define BOOST_PARSER_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
# define BOOST_PARSER_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
#elif defined(__GNUC__)
# define BOOST_PARSER_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
# define BOOST_PARSER_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
#else
# define BOOST_PARSER_DIAGNOSTIC_PUSH
# define BOOST_PARSER_DIAGNOSTIC_POP
#endif
#endif

View File

@@ -197,31 +197,6 @@ namespace boost { namespace parser { namespace detail::hl {
}
// fold_n
template<std::size_t I, std::size_t N>
struct fold_n_dispatch
{
template<typename F, typename State>
constexpr static auto call(State && s, F const & f)
{
if constexpr (I + 1 == N) {
return f((State &&)s, llong<I>{});
} else {
return fold_n_dispatch<I + 1, N>::call(
f((State &&)s, llong<I>{}), f);
}
}
};
template<std::size_t N, typename F, typename State>
constexpr auto fold_n(State && s, F const & f)
{
static_assert(0 < N, "fold_n must operate on sequences of length >= 1");
return hl::fold_n_dispatch<0, N>::call((State &&)s, (F &&)f);
}
// size
template<typename... Args>

View File

@@ -52,110 +52,118 @@ namespace boost { namespace parser { namespace detail {
typename Parser,
typename DelimiterParser,
typename MinType,
typename MaxType>
void print_parser(
typename MaxType,
typename ParserMods>
void print_parser_impl(
Context const & context,
repeat_parser<Parser, DelimiterParser, MinType, MaxType> const & parser,
repeat_parser<
Parser,
DelimiterParser,
MinType,
MaxType,
ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename Parser>
void print_parser(
template<typename Context, typename Parser, typename ParserMods>
void print_parser_impl(
Context const & context,
opt_parser<Parser> const & parser,
opt_parser<Parser, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename ParserTuple>
void print_parser(
template<typename Context, typename ParserTuple, typename ParserMods>
void print_parser_impl(
Context const & context,
or_parser<ParserTuple> const & parser,
or_parser<ParserTuple, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename ParserTuple>
void print_parser(
template<typename Context, typename ParserTuple, typename ParserMods>
void print_parser_impl(
Context const & context,
perm_parser<ParserTuple> const & parser,
perm_parser<ParserTuple, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<
typename Context,
typename ParserTuple,
typename BacktrackingTuple,
typename CombiningGroups>
void print_parser(
typename CombiningGroups,
typename ParserMods>
void print_parser_impl(
Context const & context,
seq_parser<ParserTuple, BacktrackingTuple, CombiningGroups> const &
parser,
seq_parser<
ParserTuple,
BacktrackingTuple,
CombiningGroups,
ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename Parser, typename Action>
void print_parser(
template<
typename Context,
typename Parser,
typename Action,
typename ParserMods>
void print_parser_impl(
Context const & context,
action_parser<Parser, Action> const & parser,
action_parser<Parser, Action, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename Parser, typename F>
void print_parser(
template<typename Context, typename Parser, typename F, typename ParserMods>
void print_parser_impl(
Context const & context,
transform_parser<Parser, F> const & parser,
transform_parser<Parser, F, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename Parser>
void print_parser(
template<typename Context, typename Parser, typename ParserMods>
void print_parser_impl(
Context const & context,
omit_parser<Parser> const & parser,
raw_parser<Parser, ParserMods> const & parser,
std::ostream & os,
int components = 0);
template<typename Context, typename Parser>
void print_parser(
Context const & context,
raw_parser<Parser> const & parser,
std::ostream & os,
int components = 0);
int components);
#if defined(BOOST_PARSER_DOXYGEN) || BOOST_PARSER_USE_CONCEPTS
template<typename Context, typename Parser>
void print_parser(
template<typename Context, typename Parser, typename ParserMods>
void print_parser_impl(
Context const & context,
string_view_parser<Parser> const & parser,
string_view_parser<Parser, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
#endif
template<typename Context, typename Parser>
void print_parser(
template<typename Context, typename Parser, typename ParserMods>
void print_parser_impl(
Context const & context,
lexeme_parser<Parser> const & parser,
lexeme_parser<Parser, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename Parser>
void print_parser(
template<
typename Context,
typename Parser,
typename SkipParser,
typename ParserMods>
void print_parser_impl(
Context const & context,
no_case_parser<Parser> const & parser,
skip_parser<Parser, SkipParser, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename Parser, typename SkipParser>
void print_parser(
template<
typename Context,
typename Parser,
expect_match_t ExpectMatch,
typename ParserMods>
void print_parser_impl(
Context const & context,
skip_parser<Parser, SkipParser> const & parser,
expect_parser<Parser, ExpectMatch, ParserMods> const & parser,
std::ostream & os,
int components = 0);
template<typename Context, typename Parser, bool FailOnMatch>
void print_parser(
Context const & context,
expect_parser<Parser, FailOnMatch> const & parser,
std::ostream & os,
int components = 0);
int components);
template<
typename Context,
@@ -163,143 +171,147 @@ namespace boost { namespace parser { namespace detail {
typename Parser,
typename Attribute,
typename LocalState,
typename ParamsTuple>
void print_parser(
typename ParamsTuple,
typename ParserMods>
void print_parser_impl(
Context const & context,
rule_parser<
UseCallbacks,
Parser,
Attribute,
LocalState,
ParamsTuple> const & parser,
ParamsTuple,
ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename T>
void print_parser(
template<typename Context, typename T, typename ParserMods>
void print_parser_impl(
Context const & context,
symbol_parser<T> const & parser,
symbol_parser<T, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename Predicate>
void print_parser(
template<typename Context, typename Predicate, typename ParserMods>
void print_parser_impl(
Context const & context,
eps_parser<Predicate> const & parser,
eps_parser<Predicate, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context>
void print_parser(
template<typename Context, typename ParserMods>
void print_parser_impl(
Context const & context,
eps_parser<nope> const & parser,
eps_parser<nope, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context>
void print_parser(
template<typename Context, typename ParserMods>
void print_parser_impl(
Context const & context,
eoi_parser const & parser,
eoi_parser<ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename Atribute>
void print_parser(
template<typename Context, typename Atribute, typename ParserMods>
void print_parser_impl(
Context const & context,
attr_parser<Atribute> const & parser,
attr_parser<Atribute, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename Expected, typename AttributeType>
void print_parser(
template<
typename Context,
typename Expected,
typename AttributeType,
typename ParserMods>
void print_parser_impl(
Context const & context,
char_parser<Expected, AttributeType> const & parser,
char_parser<Expected, AttributeType, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context>
void print_parser(
template<typename Context, typename ParserMods>
void print_parser_impl(
Context const & context,
digit_parser const & parser,
digit_parser<ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context>
void print_parser(
template<typename Context, typename ParserMods>
void print_parser_impl(
Context const & context,
char_subrange_parser<hex_digit_subranges> const & parser,
char_subrange_parser<hex_digit_subranges, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context>
void print_parser(
template<typename Context, typename ParserMods>
void print_parser_impl(
Context const & context,
char_subrange_parser<control_subranges> const & parser,
char_subrange_parser<control_subranges, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context>
void print_parser(
template<typename Context, typename ParserMods>
void print_parser_impl(
Context const & context,
char_set_parser<punct_chars> const & parser,
char_set_parser<punct_chars, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context>
void print_parser(
template<typename Context, typename ParserMods>
void print_parser_impl(
Context const & context,
char_set_parser<lower_case_chars> const & parser,
char_set_parser<lower_case_chars, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context>
void print_parser(
template<typename Context, typename ParserMods>
void print_parser_impl(
Context const & context,
char_set_parser<upper_case_chars> const & parser,
char_set_parser<upper_case_chars, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename Expected, typename AttributeType>
void print_parser(
template<
typename Context,
typename StrIter,
typename StrSentinel,
typename ParserMods>
void print_parser_impl(
Context const & context,
omit_parser<char_parser<Expected, AttributeType>> const & parser,
string_parser<StrIter, StrSentinel, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename StrIter, typename StrSentinel>
void print_parser(
template<
typename Context,
typename Quotes,
typename Escapes,
typename ParserMods>
void print_parser_impl(
Context const & context,
string_parser<StrIter, StrSentinel> const & parser,
quoted_string_parser<Quotes, Escapes, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename StrIter, typename StrSentinel>
void print_parser(
template<
typename Context,
bool NewlinesOnly,
bool NoNewlines,
typename ParserMods>
void print_parser_impl(
Context const & context,
omit_parser<string_parser<StrIter, StrSentinel>> const & parser,
ws_parser<NewlinesOnly, NoNewlines, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename Quotes, typename Escapes>
void print_parser(
template<typename Context, typename ParserMods>
void print_parser_impl(
Context const & context,
quoted_string_parser<Quotes, Escapes> const & parser,
bool_parser<ParserMods> const & parser,
std::ostream & os,
int components = 0);
template<typename Context, bool NewlinesOnly, bool NoNewlines>
void print_parser(
Context const & context,
ws_parser<NewlinesOnly, NoNewlines> const & parser,
std::ostream & os,
int components = 0);
template<typename Context>
void print_parser(
Context const & context,
bool_parser const & parser,
std::ostream & os,
int components = 0);
int components);
template<
typename Context,
@@ -307,12 +319,19 @@ namespace boost { namespace parser { namespace detail {
int Radix,
int MinDigits,
int MaxDigits,
typename Expected>
void print_parser(
typename Expected,
typename ParserMods>
void print_parser_impl(
Context const & context,
uint_parser<T, Radix, MinDigits, MaxDigits, Expected> const & parser,
uint_parser<
T,
Radix,
MinDigits,
MaxDigits,
Expected,
ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<
typename Context,
@@ -320,45 +339,51 @@ namespace boost { namespace parser { namespace detail {
int Radix,
int MinDigits,
int MaxDigits,
typename Expected>
void print_parser(
typename Expected,
typename ParserMods>
void print_parser_impl(
Context const & context,
int_parser<T, Radix, MinDigits, MaxDigits, Expected> const & parser,
int_parser<T, Radix, MinDigits, MaxDigits, Expected, ParserMods> const &
parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename T>
void print_parser(
template<typename Context, typename T, typename ParserMods>
void print_parser_impl(
Context const & context,
float_parser<T> const & parser,
float_parser<T, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context>
void print_parser(
template<typename Context, typename ParserMods>
void print_parser_impl(
Context const & context,
float_parser<float> const & parser,
float_parser<float, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context>
void print_parser(
template<typename Context, typename ParserMods>
void print_parser_impl(
Context const & context,
float_parser<double> const & parser,
float_parser<double, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename SwitchValue, typename OrParser>
void print_parser(
template<
typename Context,
typename SwitchValue,
typename OrParser,
typename ParserMods>
void print_parser_impl(
Context const & context,
switch_parser<SwitchValue, OrParser> const & parser,
switch_parser<SwitchValue, OrParser, ParserMods> const & parser,
std::ostream & os,
int components = 0);
int components);
template<typename Context, typename TokenSpec, typename Expected>
template<bool SuppressOmit = false, typename Context, typename Parser>
void print_parser(
Context const & context,
token_parser<TokenSpec, Expected> const & parser,
Parser const & parser,
std::ostream & os,
int components = 0);
@@ -609,19 +634,29 @@ namespace boost { namespace parser { namespace detail {
Context const & context,
flags f,
Attribute const & attr,
std::string name);
~scoped_trace_t();
// implemented in printing_impl.hpp
std::string name) :
os_(os),
initial_first_(first),
first_(first),
last_(last),
context_(context),
flags_(f),
attr_(attr),
name_(std::move(name))
{
if (!detail::do_trace(flags_))
return;
detail::trace_prefix(os, first_, last_, context_, name_);
}
template<typename I, typename S>
void impl(I initial_first, I first, S last)
~scoped_trace_t()
{
if (!detail::do_trace(flags_))
return;
detail::trace_indent(os_, detail::_indent(context_));
if (*context_.pass_) {
os_ << "matched ";
detail::trace_input(os_, initial_first, first);
detail::trace_input(os_, initial_first_, first_);
os_ << "\n";
detail::print_attribute(
os_,
@@ -630,7 +665,7 @@ namespace boost { namespace parser { namespace detail {
} else {
os_ << "no match\n";
}
detail::trace_suffix(os_, first, last, context_, name_);
detail::trace_suffix(os_, first_, last_, context_, name_);
}
std::ostream & os_;

File diff suppressed because it is too large Load Diff

View File

@@ -47,11 +47,9 @@ namespace boost::parser::detail { namespace stl_interfaces {
`T`. */
template<typename T>
#if defined(BOOST_STL_INTERFACES_DOXYGEN) || BOOST_PARSER_USE_CONCEPTS
// clang-format off
requires std::is_object_v<T>
#endif
struct proxy_arrow_result
// clang-format on
{
constexpr proxy_arrow_result(T const & value) noexcept(
noexcept(T(value))) :
@@ -619,33 +617,25 @@ namespace boost::parser::detail { namespace stl_interfaces { BOOST_PARSER_DETAIL
using iter_concept_t = typename iter_concept<Iterator>::type;
template<typename D, typename DifferenceType>
// clang-format off
concept plus_eq = requires (D d) { d += DifferenceType(1); };
// clang-format on
concept plus_eq = requires(D d) { d += DifferenceType(1); };
template<typename D, typename D2 = D>
// clang-format off
concept base_3way =
#if defined(__cpp_impl_three_way_comparison)
requires (D d, D2 d2) { access::base(d) <=> access::base(d2); };
requires(D d, D2 d2) { access::base(d) <=> access::base(d2); };
#else
false;
#endif
// clang-format on
template<typename D1, typename D2 = D1>
// clang-format off
concept base_eq =
requires (D1 d1, D2 d2) { access::base(d1) == access::base(d2); };
// clang-format on
requires(D1 d1, D2 d2) { access::base(d1) == access::base(d2); };
template<typename D, typename D2 = D>
// clang-format off
concept iter_sub = requires (D d, D2 d2) {
concept iter_sub = requires(D d, D2 d2) {
typename D::difference_type;
{d - d2} -> std::convertible_to<typename D::difference_type>;
{ d - d2 } -> std::convertible_to<typename D::difference_type>;
};
// clang-format on
// This iterator concept -> category mapping scheme follows the one
// from zip_transform_view; see

View File

@@ -328,23 +328,20 @@ namespace boost::parser::detail { namespace stl_interfaces {
{
constexpr adaptor(F f) : f_(f) {}
// clang-format off
template<typename... Args>
constexpr auto operator()(Args &&... args) const
// clang-format on
{
#if BOOST_PARSER_DETAIL_STL_INTERFACES_USE_CONCEPTS
if constexpr (std::is_invocable_v<F const &, Args...>) {
return f_((Args &&) args...);
return f_((Args &&)args...);
} else {
return closure(
stl_interfaces::bind_back(f_, (Args &&) args...));
return closure(stl_interfaces::bind_back(f_, (Args &&)args...));
}
#else
return detail::adaptor_impl<
F const &,
detail::is_invocable_v<F const &, Args...>,
Args...>::call(f_, (Args &&) args...);
Args...>::call(f_, (Args &&)args...);
#endif
}

View File

@@ -97,17 +97,14 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME
template<typename T>
concept grapheme_iter =
// clang-format off
std::input_iterator<T> &&
code_point_range<std::iter_reference_t<T>> &&
std::input_iterator<T> && code_point_range<std::iter_reference_t<T>> &&
requires(T t) {
{ t.base() } -> code_point_iter;
// clang-format on
};
{ t.base() } -> code_point_iter;
};
template<typename T>
concept grapheme_range = std::ranges::input_range<T> &&
grapheme_iter<std::ranges::iterator_t<T>>;
grapheme_iter<std::ranges::iterator_t<T>>;
template<typename R>
using code_point_iterator_t = decltype(std::declval<R>().begin().base());
@@ -116,75 +113,63 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME
using code_point_sentinel_t = decltype(std::declval<R>().end().base());
template<typename T, format F>
concept grapheme_iter_code_unit =
// clang-format off
grapheme_iter<T> &&
requires(T t) {
concept grapheme_iter_code_unit = grapheme_iter<T> && requires(T t) {
{ t.base().base() } -> code_unit_iter<F>;
// clang-format on
};
template<typename T, format F>
concept grapheme_range_code_unit = grapheme_range<T> &&
concept grapheme_range_code_unit =
grapheme_range<T> &&
grapheme_iter_code_unit<std::ranges::iterator_t<T>, F>;
namespace dtl {
template<typename T, class CodeUnit>
concept eraseable_insertable_sized_bidi_range =
// clang-format off
std::ranges::sized_range<T> &&
std::ranges::input_range<T> &&
std::ranges::sized_range<T> && std::ranges::input_range<T> &&
requires(T t, CodeUnit const * it) {
{ t.erase(t.begin(), t.end()) } ->
std::same_as<std::ranges::iterator_t<T>>;
{ t.insert(t.end(), it, it) } ->
std::same_as<std::ranges::iterator_t<T>>;
{
t.erase(t.begin(), t.end())
} -> std::same_as<std::ranges::iterator_t<T>>;
{
t.insert(t.end(), it, it)
} -> std::same_as<std::ranges::iterator_t<T>>;
};
// clang-format on
}
template<typename T>
concept utf8_string =
// clang-format off
utf8_code_unit<std::ranges::range_value_t<T>> &&
dtl::eraseable_insertable_sized_bidi_range<
T, std::ranges::range_value_t<T>>;
// clang-format on
concept utf8_string = utf8_code_unit<std::ranges::range_value_t<T>> &&
dtl::eraseable_insertable_sized_bidi_range<
T,
std::ranges::range_value_t<T>>;
template<typename T>
concept utf16_string =
// clang-format off
utf16_code_unit<std::ranges::range_value_t<T>> &&
dtl::eraseable_insertable_sized_bidi_range<
T, std::ranges::range_value_t<T>>;
// clang-format on
concept utf16_string = utf16_code_unit<std::ranges::range_value_t<T>> &&
dtl::eraseable_insertable_sized_bidi_range<
T,
std::ranges::range_value_t<T>>;
template<typename T>
concept utf_string = utf8_string<T> || utf16_string<T>;
template<typename T>
// clang-format off
concept transcoding_error_handler = requires(T t, std::string_view msg) {
concept transcoding_error_handler = requires(T t, std::string_view msg) {
{ t(msg) } -> std::same_as<char32_t>;
// clang-format on
};
//]
// Clang 13 defines __cpp_lib_concepts but not std::indirectly copyable.
#if defined(__clang_major__) && __clang_major__ <= 13
template<typename In, typename Out>
// clang-format off
concept indirectly_copyable =
std::indirectly_readable<In> &&
std::indirectly_writable<Out, std::iter_reference_t<In>>;
// clang-format on
#else
template<typename In, typename Out>
concept indirectly_copyable = std::indirectly_copyable<In, Out>;
#endif
}}}
}
}}
#endif

View File

@@ -51,13 +51,10 @@ namespace boost::parser::detail { namespace text { namespace detail {
// that is comparable with T's interator type.
template<typename T>
concept cp_sentinel_gr_rng =
// clang-format off
grapheme_range<T> &&
!grapheme_iter<sentinel_t<T>> &&
grapheme_range<T> && !grapheme_iter<sentinel_t<T>> &&
requires(iterator_t<T> first, sentinel_t<T> last) {
{ first.base() == last } -> std::convertible_to<bool>;
// clang-format on
};
{ first.base() == last } -> std::convertible_to<bool>;
};
template<typename T>
using gr_rng_cp_iter_t = decltype(std::declval<iterator_t<T>>().base());

View File

@@ -2350,11 +2350,9 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME
}
template<typename Cont>
// clang-format off
requires requires { typename Cont::value_type; } &&
utf_code_unit<typename Cont::value_type>
utf_code_unit<typename Cont::value_type>
constexpr auto from_utf8_inserter(Cont & c, typename Cont::iterator it)
// clang-format on
{
if constexpr (sizeof(typename Cont::value_type) == 1) {
return std::insert_iterator<Cont>(c, it);
@@ -2366,11 +2364,9 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME
}
template<typename Cont>
// clang-format off
requires requires { typename Cont::value_type; } &&
utf_code_unit<typename Cont::value_type>
utf_code_unit<typename Cont::value_type>
constexpr auto from_utf16_inserter(Cont & c, typename Cont::iterator it)
// clang-format on
{
if constexpr (sizeof(typename Cont::value_type) == 1) {
return utf_16_to_8_insert_iterator<Cont>(c, it);
@@ -2382,11 +2378,9 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME
}
template<typename Cont>
// clang-format off
requires requires { typename Cont::value_type; } &&
utf_code_unit<typename Cont::value_type>
utf_code_unit<typename Cont::value_type>
constexpr auto from_utf32_inserter(Cont & c, typename Cont::iterator it)
// clang-format on
{
if constexpr (sizeof(typename Cont::value_type) == 1) {
return utf_32_to_8_insert_iterator<Cont>(c, it);
@@ -2398,11 +2392,9 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME
}
template<typename Cont>
// clang-format off
requires requires { typename Cont::value_type; } &&
utf_code_unit<typename Cont::value_type>
utf_code_unit<typename Cont::value_type>
constexpr auto from_utf8_back_inserter(Cont & c)
// clang-format on
{
if constexpr (sizeof(typename Cont::value_type) == 1) {
return std::back_insert_iterator<Cont>(c);
@@ -2414,11 +2406,9 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME
}
template<typename Cont>
// clang-format off
requires requires { typename Cont::value_type; } &&
utf_code_unit<typename Cont::value_type>
utf_code_unit<typename Cont::value_type>
constexpr auto from_utf16_back_inserter(Cont & c)
// clang-format on
{
if constexpr (sizeof(typename Cont::value_type) == 1) {
return utf_16_to_8_back_insert_iterator<Cont>(c);
@@ -2430,11 +2420,9 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME
}
template<typename Cont>
// clang-format off
requires requires { typename Cont::value_type; } &&
utf_code_unit<typename Cont::value_type>
utf_code_unit<typename Cont::value_type>
constexpr auto from_utf32_back_inserter(Cont & c)
// clang-format on
{
if constexpr (sizeof(typename Cont::value_type) == 1) {
return utf_32_to_8_back_insert_iterator<Cont>(c);
@@ -2446,11 +2434,9 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME
}
template<typename Cont>
// clang-format off
requires requires { typename Cont::value_type; } &&
utf_code_unit<typename Cont::value_type>
utf_code_unit<typename Cont::value_type>
constexpr auto from_utf8_front_inserter(Cont & c)
// clang-format on
{
if constexpr (sizeof(typename Cont::value_type) == 1) {
return std::front_insert_iterator<Cont>(c);
@@ -2462,11 +2448,9 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME
}
template<typename Cont>
// clang-format off
requires requires { typename Cont::value_type; } &&
utf_code_unit<typename Cont::value_type>
utf_code_unit<typename Cont::value_type>
constexpr auto from_utf16_front_inserter(Cont & c)
// clang-format on
{
if constexpr (sizeof(typename Cont::value_type) == 1) {
return utf_16_to_8_front_insert_iterator<Cont>(c);
@@ -2478,11 +2462,9 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME
}
template<typename Cont>
// clang-format off
requires requires { typename Cont::value_type; } &&
utf_code_unit<typename Cont::value_type>
utf_code_unit<typename Cont::value_type>
constexpr auto from_utf32_front_inserter(Cont & c)
// clang-format on
{
if constexpr (sizeof(typename Cont::value_type) == 1) {
return utf_32_to_8_front_insert_iterator<Cont>(c);
@@ -2492,7 +2474,6 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME
return std::front_insert_iterator<Cont>(c);
}
}
}}}
#endif

View File

@@ -423,49 +423,57 @@ namespace boost::parser::detail { namespace text {
#if defined(__cpp_char8_t)
inline constexpr detail::as_charn_impl<char8_view, format::utf8> as_char8_t;
#endif
inline constexpr detail::as_charn_impl<char16_view, format::utf16> as_char16_t;
inline constexpr detail::as_charn_impl<char32_view, format::utf32> as_char32_t;
inline constexpr detail::as_charn_impl<char16_view, format::utf16>
as_char16_t;
inline constexpr detail::as_charn_impl<char32_view, format::utf32>
as_char32_t;
// clang-format off
#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
template<utf_range V>
requires std::ranges::view<V> && std::ranges::forward_range<V>
requires std::ranges::view<V> && std::ranges::forward_range<V>
#else
template<typename V>
#endif
class unpacking_view : public stl_interfaces::view_interface<unpacking_view<V>> {
V base_ = V();
class unpacking_view
: public stl_interfaces::view_interface<unpacking_view<V>>
{
V base_ = V();
public:
constexpr unpacking_view()
constexpr unpacking_view()
#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
requires std::default_initializable<V>
requires std::default_initializable<V>
#endif
= default;
constexpr unpacking_view(V base) : base_(std::move(base)) {}
= default;
constexpr unpacking_view(V base) : base_(std::move(base)) {}
constexpr V base() const &
constexpr V base() const &
#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
requires std::copy_constructible<V>
requires std::copy_constructible<V>
#endif
{ return base_; }
constexpr V base() && { return std::move(base_); }
{
return base_;
}
constexpr V base() && { return std::move(base_); }
constexpr auto code_units() const noexcept {
auto unpacked = boost::parser::detail::text::unpack_iterator_and_sentinel(detail::begin(base_), detail::end(base_));
return BOOST_PARSER_DETAIL_TEXT_SUBRANGE(unpacked.first, unpacked.last);
}
constexpr auto code_units() const noexcept
{
auto unpacked =
boost::parser::detail::text::unpack_iterator_and_sentinel(
detail::begin(base_), detail::end(base_));
return BOOST_PARSER_DETAIL_TEXT_SUBRANGE(
unpacked.first, unpacked.last);
}
constexpr auto begin() { return code_units().begin(); }
constexpr auto begin() const { return code_units().begin(); }
constexpr auto begin() { return code_units().begin(); }
constexpr auto begin() const { return code_units().begin(); }
constexpr auto end() { return code_units().end(); }
constexpr auto end() const { return code_units().end(); }
constexpr auto end() { return code_units().end(); }
constexpr auto end() const { return code_units().end(); }
};
template<class R>
unpacking_view(R &&) -> unpacking_view<detail::all_t<R>>;
// clang-format on
#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
template<format Format, utf_range V>
@@ -693,14 +701,12 @@ namespace boost::parser::detail { namespace text {
using T = detail::remove_cv_ref_t<R>;
if constexpr (forward_range_v<T>) {
auto unpacked =
boost::parser::detail::text::unpack_iterator_and_sentinel(
detail::begin(r), detail::end(r));
boost::parser::detail::text::unpack_iterator_and_sentinel(detail::begin(r), detail::end(r));
if constexpr (is_bounded_array_v<T>) {
constexpr auto n = std::extent_v<T>;
if (n && !r[n - 1])
--unpacked.last;
return BOOST_PARSER_DETAIL_TEXT_SUBRANGE(
unpacked.first, unpacked.last);
return BOOST_PARSER_DETAIL_TEXT_SUBRANGE(unpacked.first, unpacked.last);
} else if constexpr (
!std::is_same_v<decltype(unpacked.first), iterator_t<R>> ||
!std::is_same_v<decltype(unpacked.last), sentinel_t<R>>) {

View File

@@ -31,7 +31,7 @@ namespace boost { namespace parser {
}
/** Returns the `line_position` for `it`, counting lines from the
beginning of the input `first`. Requires non-token iterators. */
beginning of the input `first`. */
template<typename Iter>
line_position<Iter> find_line_position(Iter first, Iter it)
{
@@ -57,7 +57,7 @@ namespace boost { namespace parser {
}
/** Returns the iterator to the end of the line in which `it` is
found. Requires non-token iterators. */
found. */
template<typename Iter, typename Sentinel>
Iter find_line_end(Iter it, Sentinel last)
{
@@ -73,16 +73,13 @@ namespace boost { namespace parser {
std::ostream & write_formatted_message(
std::ostream & os,
std::string_view filename,
Iter first_,
Iter it_,
Sentinel last_,
Iter first,
Iter it,
Sentinel last,
std::string_view message,
int64_t preferred_max_line_length,
int64_t max_after_caret)
{
auto [first, it, last] =
parser::normalize_iterators(first_, it_, last_);
if (!filename.empty())
os << filename << ':';
auto const position = parser::find_line_position(first, it);
@@ -121,15 +118,13 @@ namespace boost { namespace parser {
std::ostream & write_formatted_message(
std::ostream & os,
std::wstring_view filename,
Iter first_,
Iter it_,
Sentinel last_,
Iter first,
Iter it,
Sentinel last,
std::string_view message,
int64_t preferred_max_line_length,
int64_t max_after_caret)
{
auto [first, it, last] =
parser::normalize_iterators(first_, it_, last_);
auto const r = filename | parser::detail::text::as_utf8;
std::string s(r.begin(), r.end());
return parser::write_formatted_message(
@@ -144,24 +139,23 @@ namespace boost { namespace parser {
}
#endif
template<typename Iter, typename Sentinel, template<class> class Exception>
template<typename Iter, typename Sentinel>
std::ostream & write_formatted_expectation_failure_error_message(
std::ostream & os,
std::string_view filename,
Iter first_,
Sentinel last_,
Exception<Iter> const & e,
Iter first,
Sentinel last,
parse_error<Iter> const & e,
int64_t preferred_max_line_length,
int64_t max_after_caret)
{
std::string message = "error: Expected ";
message += e.what();
auto [first, it, last] = parser::normalize_iterators(first_, e, last_);
return parser::write_formatted_message(
os,
filename,
first,
it,
e.iter,
last,
message,
preferred_max_line_length,
@@ -169,13 +163,13 @@ namespace boost { namespace parser {
}
#if defined(_MSC_VER)
template<typename Iter, typename Sentinel, template<class> class Exception>
template<typename Iter, typename Sentinel>
std::ostream & write_formatted_expectation_failure_error_message(
std::ostream & os,
std::wstring_view filename,
Iter first,
Sentinel last,
Exception<Iter> const & e,
parse_error<Iter> const & e,
int64_t preferred_max_line_length,
int64_t max_after_caret)
{
@@ -186,35 +180,6 @@ namespace boost { namespace parser {
}
#endif
namespace detail {
template<typename I, typename S>
auto normalize_iterators_impl(I first, I it, S last)
{
if constexpr (detail::is_token_iter_v<I>)
return std::tuple(it.range_begin(), it.base(), it.range_end());
else
return std::tuple(first, it, last);
}
}
template<typename I, typename S>
auto normalize_iterators(I first, I it, S last)
{
return detail::normalize_iterators_impl(first, it, last);
}
template<typename I, typename S>
auto normalize_iterators(I first, parse_error<I> e, S last)
{
return detail::normalize_iterators_impl(first, e.iter, last);
}
template<typename I, typename S>
auto normalize_iterators(I first, lex_error<I> e, S last)
{
return detail::normalize_iterators_impl(first, e.iter, last);
}
/** An error handler that allows users to supply callbacks to handle the
reporting of warnings and errors. The reporting of errors and/or
warnings can be suppressed by supplying one or both
@@ -246,13 +211,9 @@ namespace boost { namespace parser {
filename_.assign(r.begin(), r.end());
}
#endif
template<
typename Iter,
typename Sentinel,
template<class>
class Exception>
template<typename Iter, typename Sentinel>
error_handler_result
operator()(Iter first, Sentinel last, Exception<Iter> const & e) const
operator()(Iter first, Sentinel last, parse_error<Iter> const & e) const
{
if (error_) {
std::stringstream ss;
@@ -299,15 +260,13 @@ namespace boost { namespace parser {
std::string filename_;
};
/** An error handler that just re-throws any exception generated by the
parse. */
struct rethrow_error_handler
{
template<
typename Iter,
typename Sentinel,
template<class>
class Exception>
template<typename Iter, typename Sentinel>
error_handler_result
operator()(Iter first, Sentinel last, Exception<Iter> const & e) const
operator()(Iter first, Sentinel last, parse_error<Iter> const & e) const
{
return error_handler_result::rethrow;
}
@@ -329,6 +288,8 @@ namespace boost { namespace parser {
};
#if defined(_MSC_VER) || defined(BOOST_PARSER_DOXYGEN)
/** An error handler that prints to the Visual Studio debugger via calls
to `OutputDebugString()`. */
struct vs_output_error_handler : stream_error_handler
{
vs_output_error_handler() :
@@ -348,9 +309,9 @@ namespace boost { namespace parser {
// implementations
template<typename Iter, typename Sentinel, template<class> class Exception>
template<typename Iter, typename Sentinel>
error_handler_result default_error_handler::operator()(
Iter first, Sentinel last, Exception<Iter> const & e) const
Iter first, Sentinel last, parse_error<Iter> const & e) const
{
parser::write_formatted_expectation_failure_error_message(
std::cerr, "", first, last, e);
@@ -382,9 +343,9 @@ namespace boost { namespace parser {
diagnose(kind, message, context, parser::_where(context).begin());
}
template<typename Iter, typename Sentinel, template<class> class Exception>
template<typename Iter, typename Sentinel>
error_handler_result stream_error_handler::operator()(
Iter first, Sentinel last, Exception<Iter> const & e) const
Iter first, Sentinel last, parse_error<Iter> const & e) const
{
std::ostream * os = err_os_;
if (!os)

View File

@@ -24,29 +24,10 @@ namespace boost { namespace parser {
template<typename Iter>
struct parse_error : std::runtime_error
{
parse_error(Iter it, std::string msg) :
runtime_error(""), message(msg), iter(it)
parse_error(Iter it, std::string const & msg) :
runtime_error(msg), iter(it)
{}
char const * what() const noexcept override { return message.c_str(); }
std::string message;
Iter iter;
};
/** The exception thrown when a lexing error is encountered, consisting of
an iterator to the point of failure, and a description of the value
expected at the point of failure in `what()`. */
template<typename Iter>
struct lex_error : std::runtime_error
{
lex_error(Iter it, std::string msg) :
runtime_error(""), message(msg), iter(it)
{}
char const * what() const noexcept override { return message.c_str(); }
std::string message;
Iter iter;
};
@@ -61,7 +42,7 @@ namespace boost { namespace parser {
};
/** Writes a formatted message (meaning prefixed with the file name, line,
and column number) to `os`. Normalizes token iterators as needed. */
and column number) to `os`. */
template<typename Iter, typename Sentinel>
std::ostream & write_formatted_message(
std::ostream & os,
@@ -75,8 +56,7 @@ namespace boost { namespace parser {
#if defined(_MSC_VER) || defined(BOOST_PARSER_DOXYGEN)
/** Writes a formatted message (meaning prefixed with the file name, line,
and column number) to `os`. Normalizes token iterators as needed.
This overload is Windows-only. */
and column number) to `os`. This overload is Windows-only. */
template<typename Iter, typename Sentinel>
std::ostream & write_formatted_message(
std::ostream & os,
@@ -90,59 +70,32 @@ namespace boost { namespace parser {
#endif
/** Writes a formatted parse-expectation failure (meaning prefixed with
the file name, line, and column number) to `os`. Normalizes token
iterators as needed. */
template<typename Iter, typename Sentinel, template<class> class Exception>
the file name, line, and column number) to `os`. */
template<typename Iter, typename Sentinel>
std::ostream & write_formatted_expectation_failure_error_message(
std::ostream & os,
std::string_view filename,
Iter first,
Sentinel last,
Exception<Iter> const & e,
parse_error<Iter> const & e,
int64_t preferred_max_line_length = 80,
int64_t max_after_caret = 40);
#if defined(_MSC_VER) || defined(BOOST_PARSER_DOXYGEN)
/** Writes a formatted parse-expectation failure (meaning prefixed with
the file name, line, and column number) to `os`. Normalizes token
iterators as needed. This overload is Windows-only. */
template<typename Iter, typename Sentinel, template<class> class Exception>
the file name, line, and column number) to `os`. This overload is
Windows-only. */
template<typename Iter, typename Sentinel>
std::ostream & write_formatted_expectation_failure_error_message(
std::ostream & os,
std::wstring_view filename,
Iter first,
Sentinel last,
Exception<Iter> const & e,
parse_error<Iter> const & e,
int64_t preferred_max_line_length = 80,
int64_t max_after_caret = 40);
#endif
/** Returns a tuple of three iterators (corresponding to `first`, `curr`,
and `last`) that are suitable for use in the other error handling
functions, many of which require iterators into the undelying sequence
being parsed. For non-token parsing cases, this is effectively a
no-op; the given iterators are simply returned as-is. */
template<typename I, typename S>
auto normalize_iterators(I first, I curr, S last);
/** Returns a tuple of three iterators (corresponding to `first`, the
iterator captured in `e`, and `last`) that are suitable for use in the
other error handling functions, many of which require iterators into
the undelying sequence being parsed. For non-token parsing cases,
this is effectively a no-op; the given iterators are simply returned
as-is. */
template<typename I, typename S>
auto normalize_iterators(I first, parse_error<I> e, S last);
/** Returns a tuple of three iterators (corresponding to `first`, the
iterator captured in `e`, and `last`) that are suitable for use in the
other error handling functions, many of which require iterators into
the undelying sequence being parsed. For non-token parsing cases,
this is effectively a no-op; the given iterators are simply returned
as-is. */
template<typename I, typename S>
auto normalize_iterators(I first, lex_error<I> e, S last);
/** The kinds of diagnostics that can be handled by an error handler. */
enum class diagnostic_kind {
error, /// An error diagnostic.
@@ -156,16 +109,12 @@ namespace boost { namespace parser {
{
constexpr default_error_handler() = default;
/** Handles a `parse_error` or `lex_error` exception thrown during
parsing/lexing. A formatted parse-expectation failure is printed
to `std::cerr`. Always returns `error_handler_result::fail`. */
template<
typename Iter,
typename Sentinel,
template<class>
class Exception>
error_handler_result
operator()(Iter first, Sentinel last, Exception<Iter> const & e) const;
/** Handles a `parse_error` exception thrown during parsing. A
formatted parse-expectation failure is printed to `std::cerr`.
Always returns `error_handler_result::fail`. */
template<typename Iter, typename Sentinel>
error_handler_result operator()(
Iter first, Sentinel last, parse_error<Iter> const & e) const;
/** Prints `message` to `std::cerr`. The diagnostic is printed with
the given `kind`, indicating the location as being at `it`. This
@@ -242,13 +191,9 @@ namespace boost { namespace parser {
formatted parse-expectation failure is printed to `*err_os_` when
`err_os_` is non-null, or `std::cerr` otherwise. Always returns
`error_handler_result::fail`. */
template<
typename Iter,
typename Sentinel,
template<class>
class Exception>
template<typename Iter, typename Sentinel>
error_handler_result
operator()(Iter first, Sentinel last, Exception<Iter> const & e) const;
operator()(Iter first, Sentinel last, parse_error<Iter> const & e) const;
/** Let `std::ostream * s = kind == diagnostic_kind::error : err_os_ :
warn_os_`; prints `message` to `*s` when `s` is non-null, or
@@ -280,16 +225,6 @@ namespace boost { namespace parser {
std::ostream * warn_os_;
};
/** An error handler that just re-throws any exception generated by the
parse. */
struct rethrow_error_handler;
#if defined(_MSC_VER) || defined(BOOST_PARSER_DOXYGEN)
/** An error handler that prints to the Visual Studio debugger via calls
to `OutputDebugString()`. */
struct vs_output_error_handler;
#endif
}}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,36 +0,0 @@
// Copyright (C) 2024 T. Zachary Laine
//
// Distributed under the Boost Software License, Version 1.0. (See
// accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_PARSER_LEXER_FWD_HPP
#define BOOST_PARSER_LEXER_FWD_HPP
#include <ranges>
#include <vector>
namespace boost { namespace parser {
/** A `std::views`-compatible view that provides the tokens from the given
contiguous range, using the given lexer and optional token cache. You
should typically not need to use this type directly; use
`boost::parser::to_tokens` instead. */
template<
std::ranges::contiguous_range V,
typename Lexer,
typename TokenCache = std::vector<typename Lexer::token_type>>
requires std::ranges::view<V>
struct tokens_view;
namespace detail {
template<typename T>
constexpr bool is_tokens_view_v = false;
template<typename V, typename Lexer, typename TokenCache>
constexpr bool is_tokens_view_v<tokens_view<V, Lexer, TokenCache>> =
true;
}
}}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -68,32 +68,6 @@ namespace boost { namespace parser {
return BOOST_PARSER_SUBRANGE(ptr, detail::text::null_sentinel);
}
/** The token ID used for whitespace tokens. */
inline constexpr int ws_id = -1000000;
/** The token ID used for single-character tokens. */
inline constexpr int character_id = -2000000;
#ifdef BOOST_PARSER_DOXYGEN
/** A type trait that evaluates to `true` iff `T` is a specialization of
`boost::parser::token`. */
template<typename T>
constexpr bool is_token_v = detail::foo;
#else
template<typename CharType>
struct token;
template<typename T>
constexpr bool is_token_v = false;
template<typename CharType>
constexpr bool is_token_v<token<CharType>> = true;
#endif
namespace detail {
template<typename T>
constexpr bool is_optional_v = enable_optional<T>;
@@ -173,20 +147,37 @@ namespace boost { namespace parser {
{};
struct upper_case_chars
{};
struct any_token_value
{
template<typename T>
bool matches_value(T) const
{
return true;
}
};
template<typename I, typename Context>
struct scoped_lexeme;
}
enum struct omit_attr_t { no, yes };
enum struct ignore_case_t { no, yes };
enum struct expect_match_t { no, yes };
template<typename Parser, expect_match_t ExpectMatch>
struct expected_parser
{
static constexpr expect_match_t expect_match = ExpectMatch;
Parser parser;
};
template<
omit_attr_t OmitAttr = omit_attr_t::no,
ignore_case_t IgnoreCase = ignore_case_t::no,
typename PreParser = detail::nope,
typename PostParser = detail::nope>
struct parser_modifiers
{
using pre_parser_type = PreParser;
using post_parser_type = PostParser;
static constexpr omit_attr_t omit_attr = OmitAttr;
static constexpr ignore_case_t ignore_case = IgnoreCase;
[[no_unique_address]] PreParser pre_parser;
[[no_unique_address]] PostParser post_parser;
};
/** Repeats the application of another parser `p` of type `Parser`,
optionally applying another parser `d` of type `DelimiterParser` in
between each pair of applications of `p`. The parse succeeds if `p`
@@ -197,20 +188,21 @@ namespace boost { namespace parser {
typename Parser,
typename DelimiterParser = detail::nope,
typename MinType = int64_t,
typename MaxType = int64_t>
typename MaxType = int64_t,
typename ParserMods = parser_modifiers<>>
struct repeat_parser;
/** Repeats the application of another parser `p` of type `Parser`, `[0,
Inf)` times. The parse always succeeds. The attribute produced is a
sequence of the type of attribute produced by `Parser`. */
template<typename Parser>
template<typename Parser, typename ParserMods>
struct zero_plus_parser;
/** Repeats the application of another parser `p` of type `Parser`, `[1,
Inf)` times. The parse succeeds iff `p` succeeds at least once. The
attribute produced is a sequence of the type of attribute produced by
`Parser`. */
template<typename Parser>
template<typename Parser, typename ParserMods>
struct one_plus_parser;
/** Repeats the application of another parser `p` of type `Parser`, `[1,
@@ -219,14 +211,14 @@ namespace boost { namespace parser {
succeeds at least once, and `d` succeeds each time it is applied. The
attribute produced is a sequence of the type of attribute produced by
`Parser`. */
template<typename Parser, typename DelimiterParser>
template<typename Parser, typename DelimiterParser, typename ParserMods>
struct delimited_seq_parser;
/** Repeats the application of another parser of type `Parser`, `[0, 1]`
times. The parse always succeeds. The attribute produced is a
`std::optional<T>`, where `T` is the type of attribute produced by
`Parser`. */
template<typename Parser>
template<typename Parser, typename ParserMods>
struct opt_parser;
/** Applies each parser in `ParserTuple`, in order, stopping after the
@@ -234,7 +226,7 @@ namespace boost { namespace parser {
one of the sub-parsers succeeds. The attribute produced is a
`std::variant` over the types of attribute produced by the parsers in
`ParserTuple`. */
template<typename ParserTuple>
template<typename ParserTuple, typename ParserMods>
struct or_parser;
/** Applies each parsers in `ParserTuple`, an any order, stopping after
@@ -245,7 +237,7 @@ namespace boost { namespace parser {
`ParserTuple`, not the order of the parsers' matches. It is an error
to specialize `perm_parser` with a `ParserTuple` template parameter
that includes an `eps_parser`. */
template<typename ParserTuple>
template<typename ParserTuple, typename ParserMods>
struct perm_parser;
/** Applies each parser in `ParserTuple`, in order. The parse succeeds
@@ -258,14 +250,15 @@ namespace boost { namespace parser {
template<
typename ParserTuple,
typename BacktrackingTuple,
typename CombiningGroups>
typename CombiningGroups,
typename ParserMods>
struct seq_parser;
/** Applies the given parser `p` of type `Parser` and an invocable `a` of
type `Action`. `Action` shall model `semantic_action`, and `a` will
only be invoked if `p` succeeds. The parse succeeds iff `p` succeeds.
Produces no attribute. */
template<typename Parser, typename Action>
template<typename Parser, typename Action, typename ParserMods>
struct action_parser;
/** Applies the given parser `p` of type `Parser`. The attribute produced
@@ -273,21 +266,14 @@ namespace boost { namespace parser {
only be invoked if `p` succeeds and sttributes are currently being
generated. The parse succeeds iff `p` succeeds. The attribute
produced is the the result of the call to `f`. */
template<typename Parser, typename F>
template<typename Parser, typename F, typename ParserMods>
struct transform_parser;
/** Applies the given parser `p` of type `Parser`. This parser produces
no attribute, and suppresses the production of any attributes that
would otherwise be produced by `p`. The parse succeeds iff `p`
succeeds. */
template<typename Parser>
struct omit_parser;
/** Applies the given parser `p` of type `Parser`; regardless of the
attribute produced by `Parser`, this parser's attribute is equivalent
to `_where(ctx)` within a semantic action on `p`. The parse succeeds
iff `p` succeeds. */
template<typename Parser>
template<typename Parser, typename ParserMods>
struct raw_parser;
#if defined(BOOST_PARSER_DOXYGEN) || BOOST_PARSER_USE_CONCEPTS
@@ -300,34 +286,30 @@ namespace boost { namespace parser {
non-contiguous, code using `string_view_parser` is ill-formed. The
parse succeeds iff `p` succeeds. This parser is only available in
C++20 and later. */
template<typename Parser>
template<typename Parser, typename ParserMods>
struct string_view_parser;
#endif
/** Applies the given parser `p` of type `Parser`, disabling the current
skipper in use, if any. The parse succeeds iff `p` succeeds. The
attribute produced is the type of attribute produced by `Parser`. */
template<typename Parser>
template<typename Parser, typename ParserMods>
struct lexeme_parser;
/** Applies the given parser `p` of type `Parser`, enabling
case-insensitive matching, based on Unicode case folding. The parse
succeeds iff `p` succeeds. The attribute produced is the type of
attribute produced by `Parser`. */
template<typename Parser>
struct no_case_parser;
/** Applies the given parser `p` of type `Parser`, using a parser of type
`SkipParser` as the skipper. The parse succeeds iff `p` succeeds.
The attribute produced is the type of attribute produced by
`Parser`. */
template<typename Parser, typename SkipParser = detail::nope>
template<
typename Parser,
typename SkipParser = detail::nope,
typename ParserMods = parser_modifiers<>>
struct skip_parser;
/** Applies the given parser `p` of type `Parser`, producing no attributes
and consuming no input. The parse succeeds iff `p`'s success is
unequal to `FailOnMatch`. */
template<typename Parser, bool FailOnMatch>
and consuming no input. The parse succeeds iff `p`'s success is equal
to `ExpectMatch == expect_match_t::yes`. */
template<typename Parser, expect_match_t ExpectMatch, typename ParserMods>
struct expect_parser;
/** Matches one of a set S of possible inputs, each of which is associated
@@ -336,7 +318,7 @@ namespace boost { namespace parser {
from S dynamically, during parsing; any such changes are reverted at
the end of parsing. The parse succeeds iff an element of S is
matched. \see `symbols` */
template<typename T>
template<typename T, typename ParserMods>
struct symbol_parser;
/** Applies another parser `p`, associated with this parser via `TagType`.
@@ -355,22 +337,24 @@ namespace boost { namespace parser {
typename TagType,
typename Attribute,
typename LocalState,
typename ParamsTuple>
typename ParamsTuple,
typename ParserMods>
struct rule_parser;
/** Matches anything, and consumes no input. If `Predicate` is anything
other than `detail::nope` (which it is by default), and `pred_(ctx)`
evaluates to false, where `ctx` is the parser context, the parse
fails. */
template<typename Predicate>
template<typename Predicate, typename ParserMods>
struct eps_parser;
/** Matches only the end of input. Produces no attribute. */
template<typename ParserMods>
struct eoi_parser;
/** Matches anything, consumes no input, and produces an attribute of type
`RESOLVE(Attribute)`. */
template<typename Attribute>
template<typename Attribute, typename ParserMods>
struct attr_parser;
/** A tag type that can be passed as the first parameter to `char_()` when
@@ -387,7 +371,10 @@ namespace boost { namespace parser {
parse fails only if the parser is constructed with a specific set of
expected code point values that does not include the matched code
point. */
template<typename Expected, typename AttributeType = void>
template<
typename Expected,
typename AttributeType = void,
typename ParserMods = parser_modifiers<>>
struct char_parser;
/** Matches a single code point that is equal to one of the code points
@@ -395,7 +382,7 @@ namespace boost { namespace parser {
characters for matching Unicode character classes like punctuation or
lower case. Attribute type is the attribute type of the character
being matched. */
template<typename Tag>
template<typename Tag, typename ParserMods>
struct char_set_parser;
/** Matches a single code point that falls into one of the subranges of
@@ -403,22 +390,26 @@ namespace boost { namespace parser {
sets of characters for matching Unicode character classes like hex
digits or control characters. Attribute type is the attribute type of
the character being matched. */
template<typename Tag>
template<typename Tag, typename ParserMods>
struct char_subrange_parser;
/** Matches a single decimal digit code point, using the Unicode character
class Hex_Digit. Attribute type is the attribute type of the
character being matched. */
template<typename ParserMods>
struct digit_parser;
/** Matches a particular string, delimited by an iterator sentinel pair;
produces no attribute. */
template<typename StrIter, typename StrSentinel>
template<typename StrIter, typename StrSentinel, typename ParserMods>
struct string_parser;
/** Matches a string delimited by quotation marks; produces a
`std::string` attribute. */
template<typename Quotes = detail::nope, typename Escapes = detail::nope>
template<
typename Quotes = detail::nope,
typename Escapes = detail::nope,
typename ParserMods = parser_modifiers<>>
struct quoted_string_parser;
/** Matches an end-of-line (`NewlinesOnly == true`), whitespace
@@ -426,11 +417,12 @@ namespace boost { namespace parser {
but not newline) code point, based on the Unicode definitions of each
(also matches the two code points `"\r\n"`). Produces no
attribute. */
template<bool NewlinesOnly, bool NoNewlines>
template<bool NewlinesOnly, bool NoNewlines, typename ParserMods>
struct ws_parser;
/** Matches the strings "true" and "false", producing an attribute of
`true` or `false`, respectively, and fails on any other input. */
template<typename ParserMods>
struct bool_parser;
/** Matches an unsigned number of radix `Radix`, of at least `MinDigits`
@@ -444,7 +436,8 @@ namespace boost { namespace parser {
int Radix = 10,
int MinDigits = 1,
int MaxDigits = -1,
typename Expected = detail::nope>
typename Expected = detail::nope,
typename ParserMods = parser_modifiers<>>
struct uint_parser;
/** Matches a signed number of radix `Radix`, of at least `MinDigits` and
@@ -458,34 +451,24 @@ namespace boost { namespace parser {
int Radix = 10,
int MinDigits = 1,
int MaxDigits = -1,
typename Expected = detail::nope>
typename Expected = detail::nope,
typename ParserMods = parser_modifiers<>>
struct int_parser;
/** Matches a floating point number, producing an attribute of type
`T`. */
template<typename T>
template<typename T, typename ParserMods>
struct float_parser;
/** A tag type used to represent a value type that is any specialization
of `std::basic_string_view`. Which specialization is used depends on
the input. */
struct string_view_tag
{};
/** Matches a token from the input with ID `TokenSpec::id`. Fails on any
other input. The parse will also fail if `Expected` is anything but
`detail::nope` (which it is by default), and `expected_.matches(attr)`
is not `true` for the produced attribute `attr`. Used in token
parsing only. */
template<typename TokenSpec, typename Expected>
struct token_parser;
/** Applies at most one of the parsers in `OrParser`. If `switch_value_`
matches one or more of the values in the parsers in `OrParser`, the
first such parser is applied, and the success or failure and attribute
of the parse are those of the applied parser. Otherwise, the parse
fails. */
template<typename SwitchValue, typename OrParser = detail::nope>
template<
typename SwitchValue,
typename OrParser = detail::nope,
typename ParserMods = parser_modifiers<>>
struct switch_parser;
/** A wrapper for parsers that provides the operations that must be

View File

@@ -77,22 +77,16 @@ namespace boost::parser {
range_rvalue_reference_t<V2>>;
#if BOOST_PARSER_USE_CONCEPTS
// clang-format off
template<typename ReplacementV, typename V>
concept concatable = requires {
typename detail::concat_reference_t<ReplacementV, V>;
typename detail::concat_value_t<ReplacementV, V>;
typename detail::concat_rvalue_reference_t<ReplacementV, V>;
};
// clang-format on
#else
template<typename ReplacementV, typename V>
// clang-format off
using concatable_expr = decltype(
std::declval<concat_reference_t<ReplacementV, V>>(),
std::declval<concat_value_t<ReplacementV, V>>(),
std::declval<concat_rvalue_reference_t<ReplacementV, V>>());
// clang-format on
using concatable_expr =
decltype(std::declval<concat_reference_t<ReplacementV, V>>(), std::declval<concat_value_t<ReplacementV, V>>(), std::declval<concat_rvalue_reference_t<ReplacementV, V>>());
template<typename ReplacementV, typename V>
constexpr bool concatable =
is_detected_v<concatable_expr, ReplacementV, V>;
@@ -107,7 +101,7 @@ namespace boost::parser {
#endif
>
#if BOOST_PARSER_USE_CONCEPTS
requires concatable<V1, V2>
requires concatable<V1, V2>
#endif
struct either_iterator_impl
: detail::stl_interfaces::iterator_interface<
@@ -169,14 +163,12 @@ namespace boost::parser {
either_iterator_impl<V1, V2>>;
#if BOOST_PARSER_USE_CONCEPTS
// clang-format off
template<typename ReplacementV, typename V>
concept replacement_for = requires (ReplacementV replacement, V base) {
concept replacement_for = requires(ReplacementV replacement, V base) {
{ either_iterator<V, ReplacementV>(replacement.begin()) };
{ either_iterator<V, ReplacementV>(replacement.end()) };
{ either_iterator<V, ReplacementV>(base.begin()) };
};
// clang-format on
#else
template<typename ReplacementV, typename V>
using replacement_for_expr = decltype(
@@ -459,7 +451,7 @@ namespace boost::parser {
Parser,
GlobalState,
ErrorHandler,
parser_interface<eps_parser<detail::phony>>>;
parser_interface<eps_parser<detail::phony, parser_modifiers<>>>>;
template<
typename V,
@@ -477,7 +469,7 @@ namespace boost::parser {
Parser,
GlobalState,
ErrorHandler,
parser_interface<eps_parser<detail::phony>>>;
parser_interface<eps_parser<detail::phony, parser_modifiers<>>>>;
namespace detail {
template<
@@ -528,23 +520,19 @@ namespace boost::parser {
typename GlobalState,
typename ErrorHandler,
typename SkipParser>
requires
// clang-format off
std::ranges::viewable_range<R> &&
std::ranges::viewable_range<ReplacementR> &&
// clang-format on
can_replace_view<
to_range_t<R>,
decltype(to_range<
ReplacementR,
true,
detail::range_utf_format_v<R>>::
call(std::declval<ReplacementR>())),
Parser,
GlobalState,
ErrorHandler,
SkipParser>
// clang-format off
requires std::ranges::viewable_range<R> &&
std::ranges::viewable_range<ReplacementR> &&
can_replace_view<
to_range_t<R>,
decltype(to_range<
ReplacementR,
true,
detail::range_utf_format_v<R>>::
call(std::declval<ReplacementR>())),
Parser,
GlobalState,
ErrorHandler,
SkipParser>
[[nodiscard]] constexpr auto operator()(
R && r,
parser_interface<Parser, GlobalState, ErrorHandler> const &
@@ -552,10 +540,9 @@ namespace boost::parser {
parser_interface<SkipParser> const & skip,
ReplacementR && replacement,
trace trace_mode = trace::off) const
// clang-format on
{
return replace_view(
to_range<R>::call((R &&) r),
to_range<R>::call((R &&)r),
parser,
skip,
to_range<
@@ -572,36 +559,33 @@ namespace boost::parser {
typename Parser,
typename GlobalState,
typename ErrorHandler>
requires
// clang-format off
std::ranges::viewable_range<R> &&
std::ranges::viewable_range<ReplacementR> &&
// clang-format on
can_replace_view<
to_range_t<R>,
decltype(to_range<
ReplacementR,
true,
detail::range_utf_format_v<R>>::
call(std::declval<ReplacementR>())),
Parser,
GlobalState,
ErrorHandler,
parser_interface<eps_parser<detail::phony>>>
// clang-format off
requires std::ranges::viewable_range<R> &&
std::ranges::viewable_range<ReplacementR> &&
can_replace_view<
to_range_t<R>,
decltype(to_range<
ReplacementR,
true,
detail::range_utf_format_v<R>>::
call(std::declval<ReplacementR>())),
Parser,
GlobalState,
ErrorHandler,
parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>>
[[nodiscard]] constexpr auto operator()(
R && r,
parser_interface<Parser, GlobalState, ErrorHandler> const &
parser,
ReplacementR && replacement,
trace trace_mode = trace::off) const
// clang-format on
{
return (*this)(
(R &&) r,
(R &&)r,
parser,
parser_interface<eps_parser<detail::phony>>{},
(ReplacementR &&) replacement,
parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>{},
(ReplacementR &&)replacement,
trace_mode);
}
@@ -641,10 +625,11 @@ namespace boost::parser {
std::is_same_v<Trace, trace>) {
// (r, parser, replacement, trace) case
return impl(
(R &&) r,
(R &&)r,
parser,
parser_interface<eps_parser<detail::phony>>{},
(SkipParser &&) skip,
parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>{},
(SkipParser &&)skip,
replacement);
} else {
static_assert(

View File

@@ -116,7 +116,9 @@ namespace boost::parser {
return BOOST_PARSER_SUBRANGE(first, first);
auto const search_parser = omit[*(char_ - parser)] >> -raw[parser];
if constexpr (std::is_same_v<SkipParser, eps_parser<phony>>) {
if constexpr (std::is_same_v<
SkipParser,
eps_parser<phony, parser_modifiers<>>>) {
auto result = parser::prefix_parse(
first, last, search_parser, trace_mode);
if (*result)
@@ -255,9 +257,9 @@ namespace boost::parser {
trace trace_mode = trace::off)
{
return parser::search(
(R &&) r,
(R &&)r,
parser,
parser_interface<eps_parser<detail::phony>>{},
parser_interface<eps_parser<detail::phony, parser_modifiers<>>>{},
trace_mode);
}
@@ -292,7 +294,7 @@ namespace boost::parser {
return parser::search(
BOOST_PARSER_SUBRANGE(first, last),
parser,
parser_interface<eps_parser<detail::phony>>{},
parser_interface<eps_parser<detail::phony, parser_modifiers<>>>{},
trace_mode);
}
@@ -483,7 +485,7 @@ namespace boost::parser {
Parser,
GlobalState,
ErrorHandler,
parser_interface<eps_parser<detail::phony>>>;
parser_interface<eps_parser<detail::phony, parser_modifiers<>>>>;
template<
typename V,
@@ -496,7 +498,7 @@ namespace boost::parser {
Parser,
GlobalState,
ErrorHandler,
parser_interface<eps_parser<detail::phony>>>;
parser_interface<eps_parser<detail::phony, parser_modifiers<>>>>;
namespace detail {
template<
@@ -541,25 +543,21 @@ namespace boost::parser {
typename GlobalState,
typename ErrorHandler,
typename SkipParser>
requires(
std::ranges::viewable_range<R>) &&
can_search_all_view<
to_range_t<R>,
Parser,
GlobalState,
ErrorHandler,
SkipParser>
// clang-format off
requires(std::ranges::viewable_range<R>) && can_search_all_view<
to_range_t<R>,
Parser,
GlobalState,
ErrorHandler,
SkipParser>
[[nodiscard]] constexpr auto operator()(
R && r,
parser_interface<Parser, GlobalState, ErrorHandler> const &
parser,
parser_interface<SkipParser> const & skip,
trace trace_mode = trace::off) const
// clang-format on
{
return search_all_view(
to_range<R>::call((R &&) r), parser, skip, trace_mode);
to_range<R>::call((R &&)r), parser, skip, trace_mode);
}
template<
@@ -567,26 +565,25 @@ namespace boost::parser {
typename Parser,
typename GlobalState,
typename ErrorHandler>
requires(
std::ranges::viewable_range<R>) &&
can_search_all_view<
to_range_t<R>,
Parser,
GlobalState,
ErrorHandler,
parser_interface<eps_parser<detail::phony>>>
// clang-format off
requires(std::ranges::viewable_range<R>) &&
can_search_all_view<
to_range_t<R>,
Parser,
GlobalState,
ErrorHandler,
parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>>
[[nodiscard]] constexpr auto operator()(
R && r,
parser_interface<Parser, GlobalState, ErrorHandler> const &
parser,
trace trace_mode = trace::off) const
// clang-format on
{
return (*this)(
(R &&) r,
(R &&)r,
parser,
parser_interface<eps_parser<detail::phony>>{},
parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>{},
trace_mode);
}
@@ -597,8 +594,8 @@ namespace boost::parser {
typename Parser,
typename GlobalState,
typename ErrorHandler,
typename SkipParser =
parser_interface<eps_parser<detail::phony>>,
typename SkipParser = parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>,
typename Trace = trace,
typename Enable = std::enable_if_t<is_parsable_range_v<R>>>
[[nodiscard]] constexpr auto operator()(
@@ -614,9 +611,10 @@ namespace boost::parser {
std::is_same_v<Trace, trace>) {
// (r, parser, trace) case
return impl(
(R &&) r,
(R &&)r,
parser,
parser_interface<eps_parser<detail::phony>>{},
parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>{},
skip);
} else if constexpr (
detail::is_parser_iface<SkipParser> &&

View File

@@ -193,14 +193,13 @@ namespace boost::parser {
typename Parser,
typename GlobalState,
typename ErrorHandler>
split_view(
V &&, parser_interface<Parser, GlobalState, ErrorHandler>, trace)
split_view(V &&, parser_interface<Parser, GlobalState, ErrorHandler>, trace)
-> split_view<
detail::text::detail::all_t<V>,
Parser,
GlobalState,
ErrorHandler,
parser_interface<eps_parser<detail::phony>>>;
parser_interface<eps_parser<detail::phony, parser_modifiers<>>>>;
template<
typename V,
@@ -213,7 +212,7 @@ namespace boost::parser {
Parser,
GlobalState,
ErrorHandler,
parser_interface<eps_parser<detail::phony>>>;
parser_interface<eps_parser<detail::phony, parser_modifiers<>>>>;
namespace detail {
template<
@@ -258,25 +257,21 @@ namespace boost::parser {
typename GlobalState,
typename ErrorHandler,
typename SkipParser>
requires(
std::ranges::viewable_range<R>) &&
can_split_view<
to_range_t<R>,
Parser,
GlobalState,
ErrorHandler,
SkipParser>
// clang-format off
requires(std::ranges::viewable_range<R>) && can_split_view<
to_range_t<R>,
Parser,
GlobalState,
ErrorHandler,
SkipParser>
[[nodiscard]] constexpr auto operator()(
R && r,
parser_interface<Parser, GlobalState, ErrorHandler> const &
parser,
parser_interface<SkipParser> const & skip,
trace trace_mode = trace::off) const
// clang-format on
{
return split_view(
to_range<R>::call((R &&) r), parser, skip, trace_mode);
to_range<R>::call((R &&)r), parser, skip, trace_mode);
}
template<
@@ -284,26 +279,25 @@ namespace boost::parser {
typename Parser,
typename GlobalState,
typename ErrorHandler>
requires(
std::ranges::viewable_range<R>) &&
can_split_view<
to_range_t<R>,
Parser,
GlobalState,
ErrorHandler,
parser_interface<eps_parser<detail::phony>>>
// clang-format off
requires(std::ranges::viewable_range<R>) &&
can_split_view<
to_range_t<R>,
Parser,
GlobalState,
ErrorHandler,
parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>>
[[nodiscard]] constexpr auto operator()(
R && r,
parser_interface<Parser, GlobalState, ErrorHandler> const &
parser,
trace trace_mode = trace::off) const
// clang-format on
{
return (*this)(
(R &&) r,
(R &&)r,
parser,
parser_interface<eps_parser<detail::phony>>{},
parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>{},
trace_mode);
}
@@ -314,8 +308,8 @@ namespace boost::parser {
typename Parser,
typename GlobalState,
typename ErrorHandler,
typename SkipParser =
parser_interface<eps_parser<detail::phony>>,
typename SkipParser = parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>,
typename Trace = trace,
typename Enable = std::enable_if_t<is_parsable_range_v<R>>>
[[nodiscard]] constexpr auto operator()(
@@ -331,9 +325,10 @@ namespace boost::parser {
std::is_same_v<Trace, trace>) {
// (r, parser, trace) case
return impl(
(R &&) r,
(R &&)r,
parser,
parser_interface<eps_parser<detail::phony>>{},
parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>{},
skip);
} else if constexpr (
detail::is_parser_iface<SkipParser> &&

View File

@@ -1,305 +0,0 @@
// Copyright (C) 2024 T. Zachary Laine
//
// Distributed under the Boost Software License, Version 1.0. (See
// accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_PARSER_TOKEN_PARSER_HPP
#define BOOST_PARSER_TOKEN_PARSER_HPP
#if !defined(BOOST_PARSER_PARSER_HPP) || !defined(BOOST_PARSER_LEXER_HPP)
#error "token_parser.hpp must be included after lexer.hpp and parser.hpp."
#endif
#include <boost/parser/parser_fwd.hpp>
#include <boost/parser/concepts.hpp>
#include <boost/parser/error_handling.hpp>
#include <algorithm>
namespace boost { namespace parser {
namespace detail {
template<typename AttributeType, typename CharType>
std::optional<AttributeType> token_as(token<CharType> tok)
{
if constexpr (std::is_floating_point_v<AttributeType>) {
if (tok.has_long_double())
return tok.get_long_double();
return std::nullopt;
} else if constexpr (std::is_integral_v<AttributeType>) {
if (tok.has_long_long())
return AttributeType(tok.get_long_long());
return std::nullopt;
} else {
if (tok.has_string_view())
return tok.get_string_view();
return std::nullopt;
}
}
template<typename Expected>
struct token_with_value
{
explicit constexpr token_with_value(Expected value) :
expected_(value)
{}
template<typename T, typename Context>
bool matches(T value, Context const & context) const
{
return value == detail::resolve(context, expected_);
}
Expected expected_;
};
template<typename Subrange>
struct token_with_string_view
{
explicit constexpr token_with_string_view(Subrange subrange) :
subrange_(subrange)
{}
template<typename CharType, typename Context>
bool matches(
std::basic_string_view<CharType> value, Context const &) const
{
auto const value_cps =
make_subrange<CharType>(value.begin(), value.end());
auto const subrange_cps =
make_subrange<CharType>(subrange_.begin(), subrange_.end());
return std::ranges::equal(
value_cps, subrange_cps, [](auto a, auto b) {
return cast_char(a) == cast_char(b);
});
}
template<typename T>
static auto cast_char(T c)
{
if constexpr (std::same_as<T, char>) {
return (unsigned char)c;
} else {
return c;
}
}
template<typename CharType, typename I, typename S>
static auto make_subrange(I f, S l)
{
auto subrange = BOOST_PARSER_SUBRANGE(f, l);
if constexpr (std::is_same_v<CharType, char>) {
return subrange;
} else {
return subrange | detail::text::as_utf32;
}
}
Subrange subrange_;
};
}
#ifndef BOOST_PARSER_DOXYGEN
template<typename TokenSpec, typename Expected>
struct token_parser
{
using token_spec = TokenSpec;
template<typename Iter>
using attribute_type = std::conditional_t<
std::same_as<typename token_spec::value_type, string_view_tag>,
std::basic_string_view<
typename detail::iter_value_t<Iter>::char_type>,
typename token_spec::value_type>;
constexpr token_parser() = default;
constexpr token_parser(Expected expected) : expected_(expected) {}
template<
typename Iter,
typename Sentinel,
typename Context,
typename SkipParser>
auto call(
Iter & first,
Sentinel last,
Context const & context,
SkipParser const & skip,
detail::flags flags,
bool & success) const -> attribute_type<Iter>
{
attribute_type<Iter> retval;
call(first, last, context, skip, flags, success, retval);
return retval;
}
template<
typename Iter,
typename Sentinel,
typename Context,
typename SkipParser,
typename Attribute>
void call(
Iter & first,
Sentinel last,
Context const & context,
SkipParser const & skip,
detail::flags flags,
bool & success,
Attribute & retval) const
{
using value_type = std::remove_cvref_t<decltype(*first)>;
static_assert(
is_token_v<value_type>,
"token_parser can only be used when parsing sequences of "
"tokens.");
[[maybe_unused]] auto _ = detail::scoped_trace(
*this, first, last, context, flags, retval);
if (first == last) {
success = false;
return;
}
value_type const x = *first;
if (x.id() != (int)token_spec::id) {
success = false;
return;
}
constexpr bool use_expected = !std::same_as<Expected, detail::nope>;
if (use_expected || detail::gen_attrs(flags)) {
auto opt_attr = detail::token_as<attribute_type<Iter>>(x);
if constexpr (use_expected) {
if (!opt_attr || !expected_.matches(*opt_attr, context)) {
success = false;
return;
}
}
if (detail::gen_attrs(flags))
detail::assign(retval, *opt_attr);
}
++first;
}
/** Returns a `parser_interface` containing a `token_parser` that
matches `value`. */
template<typename T>
requires(!parsable_range_like<T>)
constexpr auto operator()(T value) const noexcept
{
BOOST_PARSER_ASSERT(
(detail::is_nope_v<Expected> &&
"If you're seeing this, you tried to chain calls on one of "
"your token_spec's, like 'my_token_spec(id1)(id2)'. Quit "
"it!'"));
return parser_interface(
token_parser<TokenSpec, detail::token_with_value<T>>(
detail::token_with_value(std::move(value))));
}
/** Returns a `parser_interface` containing a `token_parser` that
matches the range `r`. If the token being matched during the
parse has a `char_type` of `char8_t`, `char16_t`, or `char32_t`,
the elements of `r` are transcoded from their presumed encoding to
UTF-32 during the comparison. Otherwise, the character being
matched is directly compared to the elements of `r`. */
template<parsable_range_like R>
constexpr auto operator()(R && r) const noexcept
{
BOOST_PARSER_ASSERT(
((!std::is_rvalue_reference_v<R &&> ||
!detail::is_range<detail::remove_cv_ref_t<R>>) &&
"It looks like you tried to pass an rvalue range to "
"token_spec(). Don't do that, or you'll end up with dangling "
"references."));
BOOST_PARSER_ASSERT(
(detail::is_nope_v<Expected> &&
"If you're seeing this, you tried to chain calls on "
"token_spec, like 'token_spec(char-set)(char-set)'. Quit "
"it!'"));
auto expected =
detail::token_with_string_view{make_expected_range((R &&)r)};
return parser_interface(
token_parser<token_spec, decltype(expected)>(expected));
}
template<typename R>
static constexpr auto make_expected_range(R && r)
{
using T = detail::remove_cv_ref_t<R>;
if constexpr (std::is_bounded_array_v<T>) {
constexpr auto n = std::extent_v<T>;
auto const offset = n && !r[n - 1] ? 1 : 0;
return BOOST_PARSER_SUBRANGE(
std::ranges::begin(r), std::ranges::end(r) - offset);
} else {
return BOOST_PARSER_SUBRANGE(
std::ranges::begin(r), std::ranges::end(r));
}
}
// TODO: Consider adding a special string_view-like type that can be
// passed to the range overload above. It would be based on
// adobe::name_t. When comparing it to a tokens' string_view, if it
// matches, it would replace the token's string_view, so that
// subsequent comparisons are O(1) in the length of the string.
Expected expected_;
};
#endif
/** A variable template that defines a token parser associated with
`boost::parser::token_spec_t<Regex, ID, ValueType, Base>`. This token
parser can be used to specify a lexer, and may also be used in
parsers. */
template<
ctll::fixed_string Regex,
auto ID,
typename ValueType = string_view_tag,
int Base = 10>
constexpr parser_interface token_spec{
token_parser<token_spec_t<Regex, ID, ValueType, Base>, detail::nope>()};
#ifndef BOOST_PARSER_DOXYGEN
template<
typename CharType,
typename ID,
ctll::fixed_string WsStr,
ctll::fixed_string RegexStr,
detail::nttp_array IDs,
detail::nttp_array Specs>
template<
ctll::fixed_string RegexStr2,
auto ID2,
typename ValueType,
int Base>
constexpr auto
lexer_t<CharType, ID, WsStr, RegexStr, IDs, Specs>::operator|(
parser_interface<token_parser<
token_spec_t<RegexStr2, ID2, ValueType, Base>,
detail::nope>> const &) const
{
static_assert(
std::same_as<ID, decltype(ID2)>,
"All id_types must be the same for all token_specs.");
constexpr auto new_regex =
detail::wrap_escape_concat<regex_str, RegexStr2>();
constexpr auto new_ids = IDs.template append<(int)ID2>();
constexpr auto new_specs = Specs.template append<detail::parse_spec_for<
token_spec_t<RegexStr2, ID2, ValueType, Base>>()>();
return lexer_t<CharType, ID, WsStr, new_regex, new_ids, new_specs>{};
}
#endif
}}
#endif

View File

@@ -22,22 +22,21 @@ namespace boost::parser {
std::declval<
parse_context<false, false, I, S, default_error_handler>>(),
ws,
flags(uint32_t(flags::gen_attrs) | uint32_t(flags::use_skip)),
detail::default_flags(),
std::declval<bool &>()));
template<typename R, typename Parser>
using range_attr_t = attr_type<iterator_t<R>, sentinel_t<R>, Parser>;
#if BOOST_PARSER_USE_CONCEPTS
// clang-format off
template<typename F, typename V, typename Parser>
concept transform_replacement_for =
std::regular_invocable<F &, range_attr_t<V, Parser>> &&
detail::replacement_for<
std::invoke_result_t<F &, range_attr_t<V, Parser>>, V> &&
std::invoke_result_t<F &, range_attr_t<V, Parser>>,
V> &&
(detail::range_utf_format_v<V> ==
detail::range_utf_format_v<
std::invoke_result_t<F &, range_attr_t<V, Parser>>>);
// clang-format on
#else
template<typename F, typename V, typename Parser>
using transform_replacement_for_expr = decltype(std::declval<F &>()(
@@ -255,7 +254,9 @@ namespace boost::parser {
BOOST_PARSER_SUBRANGE(first, first), parse_result{});
}
if constexpr (std::is_same_v<SkipParser, eps_parser<phony>>) {
if constexpr (std::is_same_v<
SkipParser,
eps_parser<phony, parser_modifiers<>>>) {
auto result = parser::prefix_parse(
first, last, search_parser, trace_mode);
if (*result) {
@@ -579,7 +580,7 @@ namespace boost::parser {
Parser,
GlobalState,
ErrorHandler,
parser_interface<eps_parser<detail::phony>>>;
parser_interface<eps_parser<detail::phony, parser_modifiers<>>>>;
template<
typename V,
@@ -595,7 +596,7 @@ namespace boost::parser {
Parser,
GlobalState,
ErrorHandler,
parser_interface<eps_parser<detail::phony>>>;
parser_interface<eps_parser<detail::phony, parser_modifiers<>>>>;
namespace detail {
template<
@@ -646,24 +647,20 @@ namespace boost::parser {
typename GlobalState,
typename ErrorHandler,
typename SkipParser>
requires
// clang-format off
std::ranges::viewable_range<R> &&
std::regular_invocable<
F &,
range_attr_t<to_range_t<R>, Parser>> &&
// clang-format on
can_transform_replace_view<
to_range_t<R>,
utf_rvalue_shim<
to_range_t<R>,
std::remove_cvref_t<F>,
range_attr_t<to_range_t<R>, Parser>>,
Parser,
GlobalState,
ErrorHandler,
SkipParser>
// clang-format off
requires std::ranges::viewable_range<R> &&
std::regular_invocable<
F &,
range_attr_t<to_range_t<R>, Parser>> &&
can_transform_replace_view<
to_range_t<R>,
utf_rvalue_shim<
to_range_t<R>,
std::remove_cvref_t<F>,
range_attr_t<to_range_t<R>, Parser>>,
Parser,
GlobalState,
ErrorHandler,
SkipParser>
[[nodiscard]] constexpr auto operator()(
R && r,
parser_interface<Parser, GlobalState, ErrorHandler> const &
@@ -671,16 +668,15 @@ namespace boost::parser {
parser_interface<SkipParser> const & skip,
F && f,
trace trace_mode = trace::off) const
// clang-format on
{
return transform_replace_view(
to_range<R>::call((R &&) r),
to_range<R>::call((R &&)r),
parser,
skip,
utf_rvalue_shim<
to_range_t<R>,
std::remove_cvref_t<F>,
range_attr_t<to_range_t<R>, Parser>>((F &&) f),
range_attr_t<to_range_t<R>, Parser>>((F &&)f),
trace_mode);
}
@@ -690,37 +686,34 @@ namespace boost::parser {
typename Parser,
typename GlobalState,
typename ErrorHandler>
requires
// clang-format off
std::ranges::viewable_range<R> &&
std::regular_invocable<
F &,
range_attr_t<to_range_t<R>, Parser>> &&
// clang-format on
can_transform_replace_view<
to_range_t<R>,
utf_rvalue_shim<
to_range_t<R>,
std::remove_cvref_t<F>,
range_attr_t<to_range_t<R>, Parser>>,
Parser,
GlobalState,
ErrorHandler,
parser_interface<eps_parser<detail::phony>>>
// clang-format off
requires std::ranges::viewable_range<R> &&
std::regular_invocable<
F &,
range_attr_t<to_range_t<R>, Parser>> &&
can_transform_replace_view<
to_range_t<R>,
utf_rvalue_shim<
to_range_t<R>,
std::remove_cvref_t<F>,
range_attr_t<to_range_t<R>, Parser>>,
Parser,
GlobalState,
ErrorHandler,
parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>>
[[nodiscard]] constexpr auto operator()(
R && r,
parser_interface<Parser, GlobalState, ErrorHandler> const &
parser,
F && f,
trace trace_mode = trace::off) const
// clang-format on
{
return (*this)(
(R &&) r,
(R &&)r,
parser,
parser_interface<eps_parser<detail::phony>>{},
(F &&) f,
parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>{},
(F &&)f,
trace_mode);
}
@@ -764,10 +757,11 @@ namespace boost::parser {
std::is_same_v<Trace, trace>) {
// (r, parser, f, trace) case
return impl(
to_range<R>::call((R &&) r),
to_range<R>::call((R &&)r),
parser,
parser_interface<eps_parser<detail::phony>>{},
(SkipParser &&) skip,
parser_interface<
eps_parser<detail::phony, parser_modifiers<>>>{},
(SkipParser &&)skip,
f);
} else {
static_assert(

View File

@@ -6,23 +6,6 @@ enable_testing()
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -j4 -C ${CMAKE_CFG_INTDIR})
if (CXX_STD GREATER_EQUAL 20)
include(FetchContent)
FetchContent_Declare(
ctre
URL https://raw.githubusercontent.com/hanickadot/compile-time-regular-expressions/refs/heads/main/single-header/ctre-unicode.hpp
DOWNLOAD_NO_EXTRACT true
)
FetchContent_MakeAvailable(ctre)
set(ctre_include_dir ${CMAKE_BINARY_DIR}/_deps/ctre-src)
add_library(ctre_single_header INTERFACE)
target_include_directories(ctre_single_header INTERFACE ${ctre_include_dir})
else()
add_library(ctre_single_header INTERFACE)
endif()
##################################################
# Parser tests
##################################################
@@ -48,7 +31,6 @@ add_test(NAME parser_api COMMAND parser_api)
add_executable(
compile_tests
compile_include_lexer_parser.cpp
compile_tests_main.cpp
compile_attribute.cpp
compile_seq_attribute.cpp
@@ -57,12 +39,12 @@ add_executable(
compile_all_t.cpp
)
set_property(TARGET compile_tests PROPERTY CXX_STANDARD ${CXX_STD})
target_link_libraries(compile_tests parser boost ctre_single_header)
target_link_libraries(compile_tests parser boost)
macro(add_test_executable name)
add_executable(${name} ${name}.cpp)
set_property(TARGET ${name} PROPERTY CXX_STANDARD ${CXX_STD})
target_link_libraries(${name} parser boost ctre_single_header ${link_flags})
target_link_libraries(${name} parser boost ${link_flags})
if (MSVC)
target_compile_options(${name} PRIVATE /source-charset:utf-8 /bigobj)
elseif (USE_ASAN OR USE_UBSAN)
@@ -100,14 +82,6 @@ add_test_executable(parser_seq_permutations_1)
add_test_executable(parser_seq_permutations_2)
add_test_executable(parser_or_permutations_1)
add_test_executable(parser_or_permutations_2)
if (CXX_STD GREATER_EQUAL 20)
add_test_executable(lexer)
add_test_executable(lexer_adobe_files)
add_test_executable(lexer_and_parser)
add_test_executable(lexer_and_parser_api)
add_test_executable(lexer_and_parser_terminals)
add_test_executable(lexer_and_parser_symbol_table)
endif()
if (MSVC)
add_executable(vs_output_tracing tracing.cpp)

View File

@@ -1,76 +0,0 @@
/**
* Copyright (C) 2024 T. Zachary Laine
*
* Distributed under the Boost Software License, Version 1.0. (See
* accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
#ifndef BOOST_PARSER_TEST_ADOBE_LEXER
#define BOOST_PARSER_TEST_ADOBE_LEXER
#include <boost/parser/parser.hpp>
#include <boost/parser/lexer.hpp>
namespace bp = boost::parser;
enum class adobe_tokens {
keyword_true_false,
keyword_empty,
identifier,
lead_comment,
trail_comment,
quoted_string,
number,
eq_op,
rel_op,
mul_op,
define,
or_,
and_
};
constexpr auto true_false =
bp::token_spec<"true|false", adobe_tokens::keyword_true_false, bool>;
constexpr auto empty = bp::token_spec<"empty", adobe_tokens::keyword_empty>;
constexpr auto identifier =
bp::token_spec<"[a-zA-Z]\\w*", adobe_tokens::identifier>;
constexpr auto lead_comment = bp::token_spec<
"\\/\\*[^*]*\\*+(?:[^/*][^*]*\\*+)*\\/",
adobe_tokens::lead_comment>;
constexpr auto trail_comment =
bp::token_spec<"\\/\\/.*$", adobe_tokens::trail_comment>;
constexpr auto quoted_string =
bp::token_spec<"\\\"[^\\\"]*\\\"|'[^']*'", adobe_tokens::quoted_string>;
constexpr auto number =
bp::token_spec<"\\d+(?:\\.\\d*)?", adobe_tokens::number, double>;
constexpr auto eq_op = bp::token_spec<"==|!=", adobe_tokens::eq_op>;
constexpr auto define = bp::token_spec<"<==", adobe_tokens::define>;
constexpr auto rel_op = bp::token_spec<"<|>|<=|>=", adobe_tokens::rel_op>;
constexpr auto mul_op = bp::token_spec<"\\*|\\/|%", adobe_tokens::mul_op>;
constexpr auto or_ = bp::token_spec<"\\|\\|", adobe_tokens::or_>;
constexpr auto and_ = bp::token_spec<"&&", adobe_tokens::and_>;
constexpr auto adobe_lexer = bp::lexer<char, adobe_tokens> | true_false |
empty | identifier | lead_comment | trail_comment |
quoted_string | number | eq_op | define | rel_op |
mul_op | or_ | and_ |
bp::token_chars<
'=',
'+',
'-',
'!',
'?',
':',
'.',
',',
'(',
')',
'[',
']',
'{',
'}',
'@',
';'>;
#endif

View File

@@ -65,7 +65,7 @@ void compile_attribute_non_unicode()
using attr_t = decltype(parse(null_term(r), parser));
static_assert(std::is_same_v<attr_t, std::optional<char>>);
static_assert(std::is_same_v<
attribute_t<decltype(null_term(r)), decltype(parser)>,
attribute_t<decltype(r), decltype(parser)>,
char>);
}
{
@@ -73,7 +73,7 @@ void compile_attribute_non_unicode()
using attr_t = decltype(parse(null_term(r), parser));
static_assert(std::is_same_v<attr_t, std::optional<std::string>>);
static_assert(std::is_same_v<
attribute_t<decltype(null_term(r)), decltype(parser)>,
attribute_t<decltype(r), decltype(parser)>,
std::string>);
}
{
@@ -81,7 +81,7 @@ void compile_attribute_non_unicode()
using attr_t = decltype(parse(null_term(r), parser));
static_assert(std::is_same_v<attr_t, std::optional<std::string>>);
static_assert(std::is_same_v<
attribute_t<decltype(null_term(r)), decltype(parser)>,
attribute_t<decltype(r), decltype(parser)>,
std::string>);
}
{
@@ -89,7 +89,7 @@ void compile_attribute_non_unicode()
using attr_t = decltype(parse(null_term(r), parser));
static_assert(std::is_same_v<attr_t, std::optional<std::string>>);
static_assert(std::is_same_v<
attribute_t<decltype(null_term(r)), decltype(parser)>,
attribute_t<decltype(r), decltype(parser)>,
std::string>);
}
{
@@ -97,7 +97,7 @@ void compile_attribute_non_unicode()
using attr_t = decltype(parse(null_term(r), parser));
static_assert(std::is_same_v<attr_t, std::optional<std::string>>);
static_assert(std::is_same_v<
attribute_t<decltype(null_term(r)), decltype(parser)>,
attribute_t<decltype(r), decltype(parser)>,
std::string>);
}
}

View File

@@ -1,10 +0,0 @@
// Copyright (C) 2024 T. Zachary Laine
//
// Distributed under the Boost Software License, Version 1.0. (See
// accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#include <boost/parser/config.hpp>
#if BOOST_PARSER_USE_CONCEPTS
#include <boost/parser/lexer.hpp>
#endif
#include <boost/parser/parser.hpp>

View File

@@ -1,569 +0,0 @@
/**
* Copyright (C) 2024 T. Zachary Laine
*
* Distributed under the Boost Software License, Version 1.0. (See
* accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
#define BOOST_PARSER_TESTING
#include <boost/parser/lexer.hpp>
#include <boost/parser/parser.hpp>
#include <boost/parser/transcode_view.hpp>
#include "ill_formed.hpp"
#include <boost/core/lightweight_test.hpp>
#include <boost/container/small_vector.hpp>
#include <deque>
namespace bp = boost::parser;
enum class my_tokens { ws, foo, bar, baz };
int main()
{
// formation of token_specs
{
auto const token_spec = bp::token_spec<"foo", 12>;
bp::token_spec_t<"foo", 12, bp::string_view_tag, 10>
token_spec_explicit;
static_assert(std::same_as<
decltype(token_spec.parser_)::token_spec,
decltype(token_spec_explicit)>);
}
{
auto const token_spec = bp::token_spec<"foo", my_tokens::foo>;
bp::token_spec_t<"foo", my_tokens::foo, bp::string_view_tag, 10>
token_spec_explicit;
static_assert(std::same_as<
decltype(token_spec.parser_)::token_spec,
decltype(token_spec_explicit)>);
}
{
auto const token_spec = bp::token_spec<"bar", my_tokens::bar>;
bp::token_spec_t<"bar", my_tokens::bar, bp::string_view_tag, 10>
token_spec_explicit;
static_assert(std::same_as<
decltype(token_spec.parser_)::token_spec,
decltype(token_spec_explicit)>);
}
{
auto const token_spec = bp::token_spec<"foo", 12, int, 2>;
bp::token_spec_t<"foo", 12, int, 2> token_spec_explicit;
static_assert(std::same_as<
decltype(token_spec.parser_)::token_spec,
decltype(token_spec_explicit)>);
}
{
auto const token_spec = bp::token_spec<"foo", 12>;
bp::token_spec_t<"foo", 12, bp::string_view_tag, 10>
token_spec_explicit;
static_assert(std::same_as<
decltype(token_spec.parser_)::token_spec,
decltype(token_spec_explicit)>);
}
{
auto const token_spec = bp::token_spec<"foo", 12, unsigned int, 8>;
bp::token_spec_t<"foo", 12, unsigned int, 8> token_spec_explicit;
static_assert(std::same_as<
decltype(token_spec.parser_)::token_spec,
decltype(token_spec_explicit)>);
}
{
auto const token_spec = bp::token_spec<"foo", 12, short>;
bp::token_spec_t<"foo", 12, short, 10> token_spec_explicit;
static_assert(std::same_as<
decltype(token_spec.parser_)::token_spec,
decltype(token_spec_explicit)>);
}
{
auto const token_spec = bp::token_spec<"foo", 12, float>;
bp::token_spec_t<"foo", 12, float, 10> token_spec_explicit;
static_assert(std::same_as<
decltype(token_spec.parser_)::token_spec,
decltype(token_spec_explicit)>);
}
{
auto const token_spec = bp::token_spec<"foo", 12, double>;
bp::token_spec_t<"foo", 12, double, 10> token_spec_explicit;
static_assert(std::same_as<
decltype(token_spec.parser_)::token_spec,
decltype(token_spec_explicit)>);
}
// making lexers
{
auto const lexer = bp::lexer<char, my_tokens> |
bp::token_spec<"foo", my_tokens::foo> |
bp::token_spec<"bar", my_tokens::bar> |
bp::token_spec<"baz", my_tokens::baz>;
// +1 because of the 0-group
static_assert(decltype(lexer)::size() == 3 + 1);
static_assert(std::same_as<decltype(lexer)::id_type, my_tokens>);
}
{
auto const lexer = bp::lexer<char, my_tokens> | bp::token_chars<'='>;
static_assert(decltype(lexer)::size() == 1 + 1);
static_assert(std::same_as<decltype(lexer)::id_type, my_tokens>);
}
{
auto const lexer = bp::lexer<char, my_tokens> | bp::token_chars<'='> |
bp::token_spec<"foo", my_tokens::foo> |
bp::token_spec<"bar", my_tokens::bar> |
bp::token_spec<"baz", my_tokens::baz>;
static_assert(decltype(lexer)::size() == 4 + 1);
static_assert(std::same_as<decltype(lexer)::id_type, my_tokens>);
}
{
auto const lexer =
bp::lexer<char, my_tokens> | bp::token_spec<"foo", my_tokens::foo> |
bp::token_spec<"bar", my_tokens::bar> |
bp::token_spec<"baz", my_tokens::baz> | bp::token_chars<'='>;
static_assert(decltype(lexer)::size() == 4 + 1);
static_assert(std::same_as<decltype(lexer)::id_type, my_tokens>);
}
{
auto const lexer = bp::lexer<char, my_tokens> | bp::token_chars<
'=',
'+',
'-',
'!',
'?',
':',
'.',
',',
'(',
')',
'[',
']',
'{',
'}',
'@',
';'>;
static_assert(decltype(lexer)::size() == 16 + 1);
static_assert(std::same_as<decltype(lexer)::id_type, my_tokens>);
}
#if 0 // This is a test of whether the escapes work for every possible char
// value accepted by detail::token_chars_spec. This takes a long time and
// really only needs to happen once.
{
auto const lexer = bp::lexer<char, my_tokens> | bp::token_chars<
char(0),
char(1),
char(2),
char(3),
char(4),
char(5),
char(6),
char(7),
char(8),
char(9),
char(10),
char(11),
char(12),
char(13),
char(14),
char(15),
char(16),
char(17),
char(18),
char(19),
char(20),
char(21),
char(22),
char(23),
char(24),
char(25),
char(26),
char(27),
char(28),
char(29),
char(30),
char(31),
char(32),
char(33),
char(34),
char(35),
char(36),
char(37),
char(38),
char(39),
char(40),
char(41),
char(42),
char(43),
char(44),
char(45),
char(46),
char(47),
char(48),
char(49),
char(50),
char(51),
char(52),
char(53),
char(54),
char(55),
char(56),
char(57),
char(58),
char(59),
char(60),
char(61),
char(62),
char(63),
char(64),
char(65),
char(66),
char(67),
char(68),
char(69),
char(70),
char(71),
char(72),
char(73),
char(74),
char(75),
char(76),
char(77),
char(78),
char(79),
char(80),
char(81),
char(82),
char(83),
char(84),
char(85),
char(86),
char(87),
char(88),
char(89),
char(90),
char(91),
char(92),
char(93),
char(94),
char(95),
char(96),
char(97),
char(98),
char(99),
char(100),
char(101),
char(103),
char(102),
char(104),
char(105),
char(106),
char(107),
char(108),
char(109),
char(110),
char(111),
char(112),
char(113),
char(114),
char(115),
char(116),
char(117),
char(118),
char(119),
char(120),
char(121),
char(122),
char(123),
char(124),
char(125),
char(126),
char(127)>;
}
#endif
{
// Mixed UTFs.
auto const lexer =
bp::lexer<char, my_tokens> | bp::token_spec<"foo", my_tokens::foo> |
bp::token_spec<u"bar", my_tokens::bar> |
bp::token_spec<U"baz", my_tokens::baz> | bp::token_chars<'='>;
// mutable vs. const token_views + mutable vs. const input views
std::string input = "foo = bar";
auto mr_mi = input | bp::to_tokens(lexer);
auto const cr_mi = input | bp::to_tokens(lexer);
auto const const_input = input;
auto mr_ci = input | bp::to_tokens(lexer);
auto const cr_ci = input | bp::to_tokens(lexer);
using tok_t = bp::token<char>;
tok_t const expected[] = {
tok_t((int)my_tokens::foo, 0, "foo"),
tok_t(bp::character_id, 0, (long long)'='),
tok_t((int)my_tokens::bar, 0, "bar")};
int position = 0;
position = 0;
for (auto tok : mr_mi) {
BOOST_TEST(tok == expected[position]);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
position = 0;
for (auto tok : cr_mi) {
BOOST_TEST(tok == expected[position]);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
position = 0;
for (auto tok : mr_ci) {
BOOST_TEST(tok == expected[position]);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
position = 0;
for (auto tok : cr_ci) {
BOOST_TEST(tok == expected[position]);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
}
// Check basic plumbing of connecting UTF inputs to CTRE.
{
auto const lexer =
bp::lexer<char, my_tokens> | bp::token_spec<"foo", my_tokens::foo> |
bp::token_spec<"bar", my_tokens::bar> |
bp::token_spec<"baz", my_tokens::baz> | bp::token_chars<'='>;
std::string s = "foo = bar";
using tok_t = bp::token<char>;
tok_t const expected[] = {
tok_t((int)my_tokens::foo, 0, "foo"),
tok_t(bp::character_id, 0, (long long)'='),
tok_t((int)my_tokens::bar, 0, "bar")};
auto const lexer8 = bp::lexer<char8_t, my_tokens> |
bp::token_spec<"foo", my_tokens::foo> |
bp::token_spec<"bar", my_tokens::bar> |
bp::token_spec<"baz", my_tokens::baz> |
bp::token_chars<'='>;
std::u8string u8s = u8"foo = bar";
using tok8_t = bp::token<char8_t>;
tok8_t const expected8[] = {
tok8_t((int)my_tokens::foo, 0, u8"foo"),
tok8_t(bp::character_id, 0, (long long)'='),
tok8_t((int)my_tokens::bar, 0, u8"bar")};
auto const lexer16 = bp::lexer<char16_t, my_tokens> |
bp::token_spec<"foo", my_tokens::foo> |
bp::token_spec<"bar", my_tokens::bar> |
bp::token_spec<"baz", my_tokens::baz> |
bp::token_chars<'='>;
std::u16string u16s = u"foo = bar";
using tok16_t = bp::token<char16_t>;
tok16_t const expected16[] = {
tok16_t((int)my_tokens::foo, 0, u"foo"),
tok16_t(bp::character_id, 0, (long long)'='),
tok16_t((int)my_tokens::bar, 0, u"bar")};
auto const lexer32 = bp::lexer<char32_t, my_tokens> |
bp::token_spec<"foo", my_tokens::foo> |
bp::token_spec<"bar", my_tokens::bar> |
bp::token_spec<"baz", my_tokens::baz> |
bp::token_chars<'='>;
std::u32string u32s = U"foo = bar";
using tok32_t = bp::token<char32_t>;
tok32_t const expected32[] = {
tok32_t((int)my_tokens::foo, 0, U"foo"),
tok32_t(bp::character_id, 0, (long long)'='),
tok32_t((int)my_tokens::bar, 0, U"bar")};
int position = 0;
position = 0;
for (auto tok : s | bp::to_tokens(lexer)) {
BOOST_TEST(tok == expected[position]);
static_assert(
std::
same_as<decltype(tok.get_string_view()), std::string_view>);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
position = 0;
for (auto tok : u8s | bp::to_tokens(lexer8)) {
BOOST_TEST(tok == expected8[position]);
static_assert(std::same_as<
decltype(tok.get_string_view()),
std::u8string_view>);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
position = 0;
for (auto tok : u16s | bp::to_tokens(lexer16)) {
BOOST_TEST(tok == expected16[position]);
static_assert(std::same_as<
decltype(tok.get_string_view()),
std::u16string_view>);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
position = 0;
for (auto tok : u32s | bp::to_tokens(lexer32)) {
BOOST_TEST(tok == expected32[position]);
static_assert(std::same_as<
decltype(tok.get_string_view()),
std::u32string_view>);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
}
// no-ws lexer
{
auto const lexer = bp::lexer<char, my_tokens, bp::no_ws> |
bp::token_spec<"foo", my_tokens::foo> |
bp::token_spec<"bar", my_tokens::bar> |
bp::token_spec<"baz", my_tokens::baz> |
bp::token_chars<'='>;
std::string s = "foo=bar";
using tok_t = bp::token<char>;
tok_t const expected[] = {
tok_t((int)my_tokens::foo, 0, "foo"),
tok_t(bp::character_id, 0, (long long)'='),
tok_t((int)my_tokens::bar, 0, "bar")};
int position = 0;
for (auto tok : s | bp::to_tokens(lexer)) {
BOOST_TEST(tok == expected[position]);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
}
// ws-as-token lexers
{
auto const lexer = bp::lexer<char, my_tokens, bp::no_ws> |
bp::token_spec<"\\s+", my_tokens::ws> |
bp::token_spec<"foo", my_tokens::foo> |
bp::token_spec<"bar", my_tokens::bar> |
bp::token_spec<"baz", my_tokens::baz> |
bp::token_chars<'='>;
std::string s = "foo = bar";
using tok_t = bp::token<char>;
tok_t const expected[] = {
tok_t((int)my_tokens::foo, 0, "foo"),
tok_t((int)my_tokens::ws, 0, " "),
tok_t(bp::character_id, 0, (long long)'='),
tok_t((int)my_tokens::ws, 0, " "),
tok_t((int)my_tokens::bar, 0, "bar")};
int position = 0;
for (auto tok : s | bp::to_tokens(lexer)) {
BOOST_TEST(tok == expected[position]);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
}
// lexing errors
{
using namespace std::literals;
auto const lexer = bp::lexer<char, int> |
bp::token_spec<"foo", 0, float> |
bp::token_spec<"bar", 1, int> |
bp::token_spec<"baz", 2, unsigned short> |
bp::token_spec<"quux", 3, int, 8> |
bp::token_spec<"next", 4, unsigned long long, 16>;
bool caught_exception = false;
caught_exception = false;
try {
for (auto tok : "foo" | bp::to_tokens(lexer)) {
(void)tok;
}
} catch (std::exception const & e) {
BOOST_TEST(e.what() == "32-bit floating-point number"sv);
caught_exception = true;
}
BOOST_TEST(caught_exception);
caught_exception = false;
try {
for (auto tok : "bar" | bp::to_tokens(lexer)) {
(void)tok;
}
} catch (std::exception const & e) {
BOOST_TEST(e.what() == "32-bit signed integer"sv);
caught_exception = true;
}
BOOST_TEST(caught_exception);
caught_exception = false;
try {
for (auto tok : "baz" | bp::to_tokens(lexer)) {
(void)tok;
}
} catch (std::exception const & e) {
BOOST_TEST(e.what() == "16-bit unsigned integer"sv);
caught_exception = true;
}
BOOST_TEST(caught_exception);
caught_exception = false;
try {
for (auto tok : "quux" | bp::to_tokens(lexer)) {
(void)tok;
}
} catch (std::exception const & e) {
BOOST_TEST(e.what() == "32-bit, base-8 signed integer"sv);
caught_exception = true;
}
BOOST_TEST(caught_exception);
caught_exception = false;
try {
for (auto tok : "next" | bp::to_tokens(lexer)) {
(void)tok;
}
} catch (std::exception const & e) {
BOOST_TEST(e.what() == "64-bit, base-16 unsigned integer"sv);
caught_exception = true;
}
BOOST_TEST(caught_exception);
}
return boost::report_errors();
}

View File

@@ -1,828 +0,0 @@
/**
* Copyright (C) 2024 T. Zachary Laine
*
* Distributed under the Boost Software License, Version 1.0. (See
* accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
#define BOOST_PARSER_TESTING
#include <boost/parser/lexer.hpp>
#include <boost/parser/transcode_view.hpp>
#include "ill_formed.hpp"
#include "adobe_lexer.hpp"
#include <boost/core/lightweight_test.hpp>
#include <boost/container/small_vector.hpp>
#include <deque>
namespace bp = boost::parser;
int main()
{
{
static_assert(decltype(adobe_lexer)::size() == 29 + 1);
static_assert(
std::same_as<decltype(adobe_lexer)::id_type, adobe_tokens>);
// tokens_view from adobe_lexer
{
char const input[] = R"(/*
Copyright 2005-2007 Adobe Systems Incorporated
Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
or a copy at http://stlab.adobe.com/licenses.html)
*/
sheet alert_dialog
{
output:
result <== { dummy_value: 42 };
})";
// first, just make a ctre range
{
std::string_view const expected[] = {
R"(/*
Copyright 2005-2007 Adobe Systems Incorporated
Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
or a copy at http://stlab.adobe.com/licenses.html)
*/)",
R"(
)", R"(sheet)", R"( )", R"(alert_dialog)",
R"(
)", R"({)",
R"(
)", R"(output)", R"(:)",
R"(
)", R"(result)", R"( )", R"(<==)",
R"( )", R"({)", R"( )", R"(dummy_value)",
R"(:)", R"( )", R"(42)", R"( )",
R"(})", R"(;)",
R"(
)", R"(})"};
auto r = adobe_lexer.regex_range(input);
int position = 0;
for (auto subrange : r) {
std::string_view sv = subrange;
BOOST_TEST(sv == expected[position]);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
std::cout << "\n";
}
using tok_t = bp::token<char>;
tok_t const expected[] = {
tok_t((int)adobe_tokens::lead_comment, 0, R"(/*
Copyright 2005-2007 Adobe Systems Incorporated
Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
or a copy at http://stlab.adobe.com/licenses.html)
*/)"),
tok_t((int)adobe_tokens::identifier, 0, "sheet"),
tok_t((int)adobe_tokens::identifier, 0, "alert_dialog"),
tok_t(bp::character_id, 0, (long long)'{'),
tok_t((int)adobe_tokens::identifier, 0, "output"),
tok_t(bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "result"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t(bp::character_id, 0, (long long)'{'),
tok_t((int)adobe_tokens::identifier, 0, "dummy_value"),
tok_t(bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::number, 0, (long double)42.0),
tok_t(bp::character_id, 0, (long long)'}'),
tok_t(bp::character_id, 0, (long long)';'),
tok_t(bp::character_id, 0, (long long)'}')};
// make a tokens_view
{
auto r = bp::tokens_view(input, adobe_lexer);
int position = 0;
for (auto tok : r) {
BOOST_TEST(tok == expected[position]);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
}
// to_tokens range adaptor
{
int position = 0;
for (auto tok : bp::to_tokens(input, adobe_lexer)) {
BOOST_TEST(tok == expected[position]);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
}
{
std::string const input_str = input;
int position = 0;
for (auto tok : bp::to_tokens(input_str, adobe_lexer)) {
BOOST_TEST(tok == expected[position]);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
}
{
int position = 0;
for (auto tok :
std::string(input) | bp::to_tokens(adobe_lexer)) {
BOOST_TEST(tok == expected[position]);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
}
// using external caches
{
std::vector<bp::token<char>> cache;
int position = 0;
for (auto tok :
bp::to_tokens(input, adobe_lexer, std::ref(cache))) {
BOOST_TEST(tok == expected[position]);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
}
{
boost::container::small_vector<bp::token<char>, 10> cache;
int position = 0;
for (auto tok :
input | bp::to_tokens(adobe_lexer, std::ref(cache))) {
BOOST_TEST(tok == expected[position]);
++position;
}
BOOST_TEST(position == (int)std::size(expected));
}
{
char const large_input[] = R"(/*
Copyright 2005-2007 Adobe Systems Incorporated
Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
or a copy at http://stlab.adobe.com/licenses.html)
*/
sheet image_size
{
input:
original_width : 1600;
original_height : 1200;
original_resolution : 300;
constant:
original_doc_width : original_width / original_resolution;
original_doc_height : original_height / original_resolution;
interface:
resample : true;
unlink constrain : true <== resample ? constrain : true;
unlink scale_styles : true <== resample && constrain ? scale_styles : false;
resample_method : @bicubic;
dim_width_pixels : original_width <== resample ? round(dim_width_pixels) : original_width;
dim_width_percent : 100 <== resample ? dim_width_percent : 100;
dim_height_pixels : original_height <== resample ? round(dim_height_pixels) : original_height;
dim_height_percent : 100 <== resample ? dim_height_percent : 100;
doc_width_inches : original_doc_width;
doc_width_percent : 100;
/*
Resolution must be initialized before width and height inches to allow proportions
to be constrained.
*/
doc_resolution : original_resolution;
doc_height_inches : original_doc_height;
doc_height_percent : 100;
auto_quality : @draft;
screen_lpi; // initialized from doc_resolution
logic:
relate {
doc_width_inches <== doc_width_percent * original_doc_width / 100;
doc_width_percent <== doc_width_inches * 100 / original_doc_width;
}
relate {
doc_height_inches <== doc_height_percent * original_doc_height / 100;
doc_height_percent <== doc_height_inches * 100 / original_doc_height;
}
relate {
screen_lpi <== doc_resolution / (auto_quality == @draft ? 1 : (auto_quality == @good ? 1.5 : 2.0));
doc_resolution <== screen_lpi * (auto_quality == @draft ? 1 : (auto_quality == @good ? 1.5 : 2.0));
}
when (resample) relate {
dim_width_pixels <== dim_width_percent * original_width / 100;
dim_width_percent <== dim_width_pixels * 100 / original_width;
}
when (resample) relate {
dim_height_pixels <== dim_height_percent * original_height / 100;
dim_height_percent <== dim_height_pixels * 100 / original_height;
}
when (resample) relate {
doc_width_inches <== dim_width_pixels / doc_resolution;
dim_width_pixels <== doc_width_inches * doc_resolution;
doc_resolution <== dim_width_pixels / doc_width_inches;
}
when (resample) relate {
doc_height_inches <== dim_height_pixels / doc_resolution;
dim_height_pixels <== doc_height_inches * doc_resolution;
doc_resolution <== dim_height_pixels / doc_height_inches;
}
when (!resample) relate {
doc_resolution <== original_width / doc_width_inches;
doc_width_inches <== original_width / doc_resolution;
}
when (!resample) relate {
doc_resolution <== original_height / doc_height_inches;
doc_height_inches <== original_height / doc_resolution;
}
when (constrain && resample) relate {
dim_width_percent <== dim_height_percent;
dim_height_percent <== dim_width_percent;
}
output:
byte_count <== dim_width_pixels * dim_height_pixels * 32;
result <== resample ? {
command: @resize_image,
width: dim_width_pixels,
height: dim_height_pixels,
resolution: doc_resolution,
scale_styles: scale_styles,
resample_method: resample_method
} : {
command: @set_resolution,
resolution: doc_resolution
};
invariant:
width_max <== dim_width_pixels <= 300000;
height_max <== dim_height_pixels <= 300000;
}
)";
tok_t const expected[] = {
tok_t((int)adobe_tokens::lead_comment, 0, R"(/*
Copyright 2005-2007 Adobe Systems Incorporated
Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
or a copy at http://stlab.adobe.com/licenses.html)
*/)"),
tok_t((int)adobe_tokens::identifier, 0, "sheet"),
tok_t((int)adobe_tokens::identifier, 0, "image_size"),
tok_t((int)bp::character_id, 0, (long long)'{'),
tok_t((int)adobe_tokens::identifier, 0, "input"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::number, 0, (long double)1600.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::number, 0, (long double)1200.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier,
0,
"original_resolution"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::number, 0, (long double)300.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "constant"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t(
(int)adobe_tokens::identifier, 0, "original_doc_width"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t(
(int)adobe_tokens::identifier,
0,
"original_resolution"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier,
0,
"original_doc_height"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t(
(int)adobe_tokens::identifier,
0,
"original_resolution"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "interface"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::keyword_true_false, 0, 1ll),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "unlink"),
tok_t((int)adobe_tokens::identifier, 0, "constrain"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::keyword_true_false, 0, 1ll),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)'?'),
tok_t((int)adobe_tokens::identifier, 0, "constrain"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::keyword_true_false, 0, 1ll),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "unlink"),
tok_t((int)adobe_tokens::identifier, 0, "scale_styles"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::keyword_true_false, 0, 1ll),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)adobe_tokens::and_, 0, "&&"),
tok_t((int)adobe_tokens::identifier, 0, "constrain"),
tok_t((int)bp::character_id, 0, (long long)'?'),
tok_t((int)adobe_tokens::identifier, 0, "scale_styles"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::keyword_true_false, 0, 0ll),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "resample_method"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)bp::character_id, 0, (long long)'@'),
tok_t((int)adobe_tokens::identifier, 0, "bicubic"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)'?'),
tok_t((int)adobe_tokens::identifier, 0, "round"),
tok_t((int)bp::character_id, 0, (long long)'('),
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
tok_t((int)bp::character_id, 0, (long long)')'),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_width_percent"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)'?'),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_width_percent"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)'?'),
tok_t((int)adobe_tokens::identifier, 0, "round"),
tok_t((int)bp::character_id, 0, (long long)'('),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
tok_t((int)bp::character_id, 0, (long long)')'),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_percent"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)'?'),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_percent"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t(
(int)adobe_tokens::identifier, 0, "original_doc_width"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_width_percent"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::lead_comment, 0, R"(/*
Resolution must be initialized before width and height inches to allow proportions
to be constrained.
*/)"),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t(
(int)adobe_tokens::identifier,
0,
"original_resolution"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t(
(int)adobe_tokens::identifier,
0,
"original_doc_height"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_height_percent"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "auto_quality"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)bp::character_id, 0, (long long)'@'),
tok_t((int)adobe_tokens::identifier, 0, "draft"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "screen_lpi"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::trail_comment,
0,
"// initialized from doc_resolution"),
tok_t((int)adobe_tokens::identifier, 0, "logic"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "relate"),
tok_t((int)bp::character_id, 0, (long long)'{'),
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_width_percent"),
tok_t((int)adobe_tokens::mul_op, 0, "*"),
tok_t(
(int)adobe_tokens::identifier, 0, "original_doc_width"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_width_percent"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
tok_t((int)adobe_tokens::mul_op, 0, "*"),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t(
(int)adobe_tokens::identifier, 0, "original_doc_width"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)bp::character_id, 0, (long long)'}'),
tok_t((int)adobe_tokens::identifier, 0, "relate"),
tok_t((int)bp::character_id, 0, (long long)'{'),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_height_percent"),
tok_t((int)adobe_tokens::mul_op, 0, "*"),
tok_t(
(int)adobe_tokens::identifier,
0,
"original_doc_height"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_height_percent"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
tok_t((int)adobe_tokens::mul_op, 0, "*"),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t(
(int)adobe_tokens::identifier,
0,
"original_doc_height"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)bp::character_id, 0, (long long)'}'),
tok_t((int)adobe_tokens::identifier, 0, "relate"),
tok_t((int)bp::character_id, 0, (long long)'{'),
tok_t((int)adobe_tokens::identifier, 0, "screen_lpi"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t((int)bp::character_id, 0, (long long)'('),
tok_t((int)adobe_tokens::identifier, 0, "auto_quality"),
tok_t((int)adobe_tokens::eq_op, 0, "=="),
tok_t((int)bp::character_id, 0, (long long)'@'),
tok_t((int)adobe_tokens::identifier, 0, "draft"),
tok_t((int)bp::character_id, 0, (long long)'?'),
tok_t((int)adobe_tokens::number, 0, (long double)1.0),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)bp::character_id, 0, (long long)'('),
tok_t((int)adobe_tokens::identifier, 0, "auto_quality"),
tok_t((int)adobe_tokens::eq_op, 0, "=="),
tok_t((int)bp::character_id, 0, (long long)'@'),
tok_t((int)adobe_tokens::identifier, 0, "good"),
tok_t((int)bp::character_id, 0, (long long)'?'),
tok_t((int)adobe_tokens::number, 0, (long double)1.5),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::number, 0, (long double)2.0),
tok_t((int)bp::character_id, 0, (long long)')'),
tok_t((int)bp::character_id, 0, (long long)')'),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "screen_lpi"),
tok_t((int)adobe_tokens::mul_op, 0, "*"),
tok_t((int)bp::character_id, 0, (long long)'('),
tok_t((int)adobe_tokens::identifier, 0, "auto_quality"),
tok_t((int)adobe_tokens::eq_op, 0, "=="),
tok_t((int)bp::character_id, 0, (long long)'@'),
tok_t((int)adobe_tokens::identifier, 0, "draft"),
tok_t((int)bp::character_id, 0, (long long)'?'),
tok_t((int)adobe_tokens::number, 0, (long double)1.0),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)bp::character_id, 0, (long long)'('),
tok_t((int)adobe_tokens::identifier, 0, "auto_quality"),
tok_t((int)adobe_tokens::eq_op, 0, "=="),
tok_t((int)bp::character_id, 0, (long long)'@'),
tok_t((int)adobe_tokens::identifier, 0, "good"),
tok_t((int)bp::character_id, 0, (long long)'?'),
tok_t((int)adobe_tokens::number, 0, (long double)1.5),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::number, 0, (long double)2.0),
tok_t((int)bp::character_id, 0, (long long)')'),
tok_t((int)bp::character_id, 0, (long long)')'),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)bp::character_id, 0, (long long)'}'),
tok_t((int)adobe_tokens::identifier, 0, "when"),
tok_t((int)bp::character_id, 0, (long long)'('),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)')'),
tok_t((int)adobe_tokens::identifier, 0, "relate"),
tok_t((int)bp::character_id, 0, (long long)'{'),
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_width_percent"),
tok_t((int)adobe_tokens::mul_op, 0, "*"),
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_width_percent"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
tok_t((int)adobe_tokens::mul_op, 0, "*"),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)bp::character_id, 0, (long long)'}'),
tok_t((int)adobe_tokens::identifier, 0, "when"),
tok_t((int)bp::character_id, 0, (long long)'('),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)')'),
tok_t((int)adobe_tokens::identifier, 0, "relate"),
tok_t((int)bp::character_id, 0, (long long)'{'),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_percent"),
tok_t((int)adobe_tokens::mul_op, 0, "*"),
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_percent"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
tok_t((int)adobe_tokens::mul_op, 0, "*"),
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)bp::character_id, 0, (long long)'}'),
tok_t((int)adobe_tokens::identifier, 0, "when"),
tok_t((int)bp::character_id, 0, (long long)'('),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)')'),
tok_t((int)adobe_tokens::identifier, 0, "relate"),
tok_t((int)bp::character_id, 0, (long long)'{'),
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
tok_t((int)adobe_tokens::mul_op, 0, "*"),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)bp::character_id, 0, (long long)'}'),
tok_t((int)adobe_tokens::identifier, 0, "when"),
tok_t((int)bp::character_id, 0, (long long)'('),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)')'),
tok_t((int)adobe_tokens::identifier, 0, "relate"),
tok_t((int)bp::character_id, 0, (long long)'{'),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
tok_t((int)adobe_tokens::mul_op, 0, "*"),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)bp::character_id, 0, (long long)'}'),
tok_t((int)adobe_tokens::identifier, 0, "when"),
tok_t((int)bp::character_id, 0, (long long)'('),
tok_t((int)bp::character_id, 0, (long long)'!'),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)')'),
tok_t((int)adobe_tokens::identifier, 0, "relate"),
tok_t((int)bp::character_id, 0, (long long)'{'),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)bp::character_id, 0, (long long)'}'),
tok_t((int)adobe_tokens::identifier, 0, "when"),
tok_t((int)bp::character_id, 0, (long long)'('),
tok_t((int)bp::character_id, 0, (long long)'!'),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)')'),
tok_t((int)adobe_tokens::identifier, 0, "relate"),
tok_t((int)bp::character_id, 0, (long long)'{'),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
tok_t((int)adobe_tokens::mul_op, 0, "/"),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)bp::character_id, 0, (long long)'}'),
tok_t((int)adobe_tokens::identifier, 0, "when"),
tok_t((int)bp::character_id, 0, (long long)'('),
tok_t((int)adobe_tokens::identifier, 0, "constrain"),
tok_t((int)adobe_tokens::and_, 0, "&&"),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)')'),
tok_t((int)adobe_tokens::identifier, 0, "relate"),
tok_t((int)bp::character_id, 0, (long long)'{'),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_width_percent"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_percent"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_percent"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_width_percent"),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)bp::character_id, 0, (long long)'}'),
tok_t((int)adobe_tokens::identifier, 0, "output"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "byte_count"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
tok_t((int)adobe_tokens::mul_op, 0, "*"),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
tok_t((int)adobe_tokens::mul_op, 0, "*"),
tok_t((int)adobe_tokens::number, 0, (long double)32.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "result"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "resample"),
tok_t((int)bp::character_id, 0, (long long)'?'),
tok_t((int)bp::character_id, 0, (long long)'{'),
tok_t((int)adobe_tokens::identifier, 0, "command"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)bp::character_id, 0, (long long)'@'),
tok_t((int)adobe_tokens::identifier, 0, "resize_image"),
tok_t((int)bp::character_id, 0, (long long)','),
tok_t((int)adobe_tokens::identifier, 0, "width"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
tok_t((int)bp::character_id, 0, (long long)','),
tok_t((int)adobe_tokens::identifier, 0, "height"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
tok_t((int)bp::character_id, 0, (long long)','),
tok_t((int)adobe_tokens::identifier, 0, "resolution"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)bp::character_id, 0, (long long)','),
tok_t((int)adobe_tokens::identifier, 0, "scale_styles"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "scale_styles"),
tok_t((int)bp::character_id, 0, (long long)','),
tok_t((int)adobe_tokens::identifier, 0, "resample_method"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "resample_method"),
tok_t((int)bp::character_id, 0, (long long)'}'),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)bp::character_id, 0, (long long)'{'),
tok_t((int)adobe_tokens::identifier, 0, "command"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)bp::character_id, 0, (long long)'@'),
tok_t((int)adobe_tokens::identifier, 0, "set_resolution"),
tok_t((int)bp::character_id, 0, (long long)','),
tok_t((int)adobe_tokens::identifier, 0, "resolution"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
tok_t((int)bp::character_id, 0, (long long)'}'),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "invariant"),
tok_t((int)bp::character_id, 0, (long long)':'),
tok_t((int)adobe_tokens::identifier, 0, "width_max"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
tok_t((int)adobe_tokens::rel_op, 0, "<"),
tok_t((int)bp::character_id, 0, (long long)'='),
tok_t((int)adobe_tokens::number, 0, (long double)300000.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)adobe_tokens::identifier, 0, "height_max"),
tok_t((int)adobe_tokens::define, 0, "<=="),
tok_t(
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
tok_t((int)adobe_tokens::rel_op, 0, "<"),
tok_t((int)bp::character_id, 0, (long long)'='),
tok_t((int)adobe_tokens::number, 0, (long double)300000.0),
tok_t((int)bp::character_id, 0, (long long)';'),
tok_t((int)bp::character_id, 0, (long long)'}')};
int position = 0;
for (auto tok :
std::string(large_input) | bp::to_tokens(adobe_lexer)) {
BOOST_TEST(tok == expected[position]);
if (tok != expected[position]) {
std::cout << "At pos=" << position << ": got " << tok
<< " expected " << expected[position] << "\n";
}
++position;
}
BOOST_TEST(position == (int)std::size(expected));
}
}
}
return boost::report_errors();
}

View File

@@ -1,237 +0,0 @@
/**
* Copyright (C) 2024 T. Zachary Laine
*
* Distributed under the Boost Software License, Version 1.0. (See
* accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
#define BOOST_PARSER_TESTING
//[ tokens_basics_headers
#include <boost/parser/lexer.hpp>
#include <boost/parser/parser.hpp>
//]
#include <boost/core/lightweight_test.hpp>
#include "adobe_lexer.hpp"
namespace bp = boost::parser;
int main()
{
// Minimal test; just instantiate the member functions, without involving
// the parse() API.
{
bp::token<char> tokens[1] = {};
auto p = bp::token_spec<"12", 12, int>;
auto first = std::begin(tokens);
auto const last = std::end(tokens);
bp::detail::nope globals;
bp::default_error_handler error_handler;
// From parse_impl().
bool success = true;
int trace_indent = 0;
bp::detail::symbol_table_tries_t symbol_table_tries;
bp::detail::pending_symbol_table_operations_t
pending_symbol_table_operations;
bp::detail::scoped_apply_pending_symbol_table_operations apply_pending(
pending_symbol_table_operations);
auto context = bp::detail::make_context<false, false>(
first,
last,
success,
trace_indent,
error_handler,
globals,
symbol_table_tries,
pending_symbol_table_operations);
auto const flags = bp::detail::flags::gen_attrs;
std::optional<int> result =
p(first, last, context, bp::ws, flags, success);
(void)result;
}
// Minimal tests of building parsers from token_parser and token_spec.
{
auto parser1 = true_false(true);
auto parser2 = true_false(false);
(void)parser1;
(void)parser2;
}
{
auto parser = identifier("foo") >> '=' >> true_false >> ';';
(void)parser;
}
// Minimal tests of using a lexer and parser together.
{
auto parser = identifier("foo") >> '=' >> true_false >> ';';
auto r = "some input" | bp::to_tokens(adobe_lexer);
auto result = bp::parse(r, parser);
BOOST_TEST(!result);
static_assert(!std::same_as<
std::remove_cvref_t<
decltype(bp::detail::tokens_view_or_nope(r))>,
bp::detail::nope>);
auto const & cr = r;
static_assert(!std::same_as<
std::remove_cvref_t<
decltype(bp::detail::tokens_view_or_nope(cr))>,
bp::detail::nope>);
}
{
auto parser = identifier >> '=' >> true_false >> ';';
auto r = "foo = false;" | bp::to_tokens(adobe_lexer);
auto result = bp::parse(r, parser);
BOOST_TEST(result);
BOOST_TEST(std::get<0>(*result) == "foo");
BOOST_TEST(std::get<1>(*result) == false);
}
// Test the use of an external token cache.
{
auto parser = identifier >> '=' >> true_false >> ';';
std::vector<bp::token<char>> cache;
auto r = "foo = false;" | bp::to_tokens(adobe_lexer, std::ref(cache));
auto result = bp::parse(r, parser);
BOOST_TEST(std::get<0>(*result) == "foo");
BOOST_TEST(std::get<1>(*result) == false);
BOOST_TEST(cache.size() == 4u);
}
// Test the clearing of the token cache at expectation points.
{
auto parser = identifier >> '=' > true_false >> ';';
std::vector<bp::token<char>> cache;
auto r = "foo = false;" | bp::to_tokens(adobe_lexer, std::ref(cache));
auto result = bp::parse(r, parser);
BOOST_TEST(std::get<0>(*result) == "foo");
BOOST_TEST(std::get<1>(*result) == false);
BOOST_TEST(cache.size() == 2u);
}
// doc examples
// clang-format off
{
//[ tokens_basics_lexer
auto const foo = bp::token_spec<"foo", 0>;
auto const bar = bp::token_spec<"b.r", 1>;
auto const baz = bp::token_spec<"b.z", 2>;
auto const lexer = bp::lexer<char, int> | foo | bar | baz;
//]
//[ tokens_basics_input_range
auto r = "foobazbar" | bp::to_tokens(lexer);
//]
//[ tokens_basics_parser
auto parser = foo >> baz >> bar;
//]
//[ tokens_basics_parse
auto result = bp::parse(r, parser);
assert(result);
assert(std::get<0>(*result) == "foo");
assert(std::get<1>(*result) == "baz");
assert(std::get<2>(*result) == "bar");
//]
}
{
//[ tokens_attrs
constexpr auto true_false = bp::token_spec<"true|false", 0, bool>;
constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>;
constexpr auto number = bp::token_spec<"\\d+(?:\\.\\d*)?", 2, double>;
//]
(void)true_false;
(void)identifier;
(void)number;
}
{
//[ tokens_token_char
constexpr auto true_false = bp::token_spec<"true|false", 0, bool>;
constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>;
constexpr auto lexer =
bp::lexer<char, int> | true_false | identifier | bp::token_chars<'=', ';'>;
auto parser = identifier >> '=' >> true_false >> ';';
auto r = "foo = false;" | bp::to_tokens(lexer);
auto result = bp::parse(r, parser);
assert(result);
assert(std::get<0>(*result) == "foo");
assert(std::get<1>(*result) == false);
//]
}
{
//[ tokens_caching_simple
constexpr auto true_false = bp::token_spec<"true|false", 0, bool>;
constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>;
constexpr auto lexer =
bp::lexer<char, int> | true_false | identifier | bp::token_chars<'=', ';'>;
auto parser = identifier >> '=' >> true_false >> ';';
std::vector<bp::token<char>> cache;
auto r = "foo = false;" | bp::to_tokens(lexer, std::ref(cache));
auto result = bp::parse(r, parser);
assert(result);
assert(std::get<0>(*result) == "foo");
assert(std::get<1>(*result) == false);
assert(cache.size() == 4u);
//]
}
{
constexpr auto true_false = bp::token_spec<"true|false", 0, bool>;
constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>;
constexpr auto lexer =
bp::lexer<char, int> | true_false | identifier | bp::token_chars<'=', ';'>;
//[ tokens_caching_expectation_point
auto parser = identifier >> '=' > true_false >> ';';
std::vector<bp::token<char>> cache;
auto r = "foo = false;" | bp::to_tokens(lexer, std::ref(cache));
auto result = bp::parse(r, parser);
assert(result);
assert(std::get<0>(*result) == "foo");
assert(std::get<1>(*result) == false);
assert(cache.size() == 2u);
//]
}
{
//[ tokens_string_in_character_vs_token_parsing
constexpr auto true_false = bp::token_spec<"true|false", 0, bool>;
constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>;
constexpr auto lexer =
bp::lexer<char, int> | true_false | identifier | bp::token_chars<'=', ';'>;
auto parser = bp::string("=;");
// NOTE: Character parsing here.
auto character_parse_result = bp::parse("=;", parser);
assert(character_parse_result);
assert(*character_parse_result == "=;");
// NOTE: Token parsing here.
auto token_parse_result = bp::parse("=;" | bp::to_tokens(lexer), parser);
assert(!token_parse_result);
//]
}
// clang-format on
return boost::report_errors();
}

View File

@@ -1,184 +0,0 @@
/**
* Copyright (C) 2024 T. Zachary Laine
*
* Distributed under the Boost Software License, Version 1.0. (See
* accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
#define BOOST_PARSER_TESTING
#include <boost/parser/lexer.hpp>
#include <boost/parser/parser.hpp>
#include <boost/core/lightweight_test.hpp>
namespace bp = boost::parser;
constexpr auto true_false = bp::token_spec<"true|false", 0, bool>;
constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>;
struct tf_tag
{};
struct id_tag
{};
constexpr bp::callback_rule<tf_tag, bool> callback_true_false = "";
constexpr bp::callback_rule<id_tag, std::string_view> callback_identifier = "";
constexpr auto callback_true_false_def = true_false;
constexpr auto callback_identifier_def = identifier;
BOOST_PARSER_DEFINE_RULES(callback_true_false, callback_identifier);
struct callbacks
{
void operator()(id_tag, std::string_view sv) const { sv_ = sv; }
void operator()(tf_tag, bool b) const { b_ = b; }
std::string_view & sv_;
bool & b_;
};
int main()
{
auto assign_bool_parser = identifier >> '=' >> true_false >> ';';
auto assign_bool_no_semi_parser = identifier >> '=' >> true_false;
constexpr auto lexer = bp::lexer<char, int> | true_false | identifier |
bp::token_chars<'=', ';'>;
auto r = "foo = false;" | bp::to_tokens(lexer);
// prefix_parse() w/attr
{
auto f = r.begin();
auto const l = r.end();
std::tuple<std::string_view, bool> result;
auto success = bp::prefix_parse(f, l, assign_bool_parser, result);
BOOST_TEST(success);
BOOST_TEST(std::get<0>(result) == "foo");
BOOST_TEST(std::get<1>(result) == false);
}
{
auto f = r.begin();
auto const l = r.end();
std::tuple<std::string_view, bool> result;
auto success = bp::prefix_parse(f, l, assign_bool_no_semi_parser, result);
BOOST_TEST(success);
BOOST_TEST(std::get<0>(result) == "foo");
BOOST_TEST(std::get<1>(result) == false);
BOOST_TEST(f != l);
}
// parse() w/attr
{
std::tuple<std::string_view, bool> result;
auto success = bp::parse(r, assign_bool_parser, result);
BOOST_TEST(success);
BOOST_TEST(std::get<0>(result) == "foo");
BOOST_TEST(std::get<1>(result) == false);
}
{
constexpr auto lexer = bp::lexer<char8_t, int> | true_false |
identifier | bp::token_chars<'=', ';'>;
auto r8 = u8"foo = false;" | bp::to_tokens(lexer);
std::tuple<std::u8string_view, bool> result;
auto success = bp::parse(r8, assign_bool_parser, result);
BOOST_TEST(success);
BOOST_TEST(std::get<0>(result) == u8"foo");
BOOST_TEST(std::get<1>(result) == false);
}
{
constexpr auto lexer = bp::lexer<char16_t, int> | true_false |
identifier | bp::token_chars<'=', ';'>;
auto r16 = u"foo = false;" | bp::to_tokens(lexer);
std::tuple<std::u16string_view, bool> result;
auto success = bp::parse(r16, assign_bool_parser, result);
BOOST_TEST(success);
BOOST_TEST(std::get<0>(result) == u"foo");
BOOST_TEST(std::get<1>(result) == false);
}
{
constexpr auto lexer = bp::lexer<char32_t, int> | true_false |
identifier | bp::token_chars<'=', ';'>;
auto r32 = U"foo = false;" | bp::to_tokens(lexer);
std::tuple<std::u32string_view, bool> result;
auto success = bp::parse(r32, assign_bool_parser, result);
BOOST_TEST(success);
BOOST_TEST(std::get<0>(result) == U"foo");
BOOST_TEST(std::get<1>(result) == false);
}
// prefix_parse() no attr
{
auto f = r.begin();
auto const l = r.end();
auto result = bp::prefix_parse(f, l, assign_bool_parser);
BOOST_TEST(result);
BOOST_TEST(std::get<0>(*result) == "foo");
BOOST_TEST(std::get<1>(*result) == false);
}
{
auto f = r.begin();
auto const l = r.end();
auto result = bp::prefix_parse(f, l, assign_bool_no_semi_parser);
BOOST_TEST(result);
BOOST_TEST(std::get<0>(*result) == "foo");
BOOST_TEST(std::get<1>(*result) == false);
BOOST_TEST(f != l);
}
// parse() no attr
{
auto result = bp::parse(r, assign_bool_parser);
BOOST_TEST(result);
BOOST_TEST(std::get<0>(*result) == "foo");
BOOST_TEST(std::get<1>(*result) == false);
}
// callback_prefix_parse()
{
auto assign_bool_parser =
callback_identifier >> '=' >> callback_true_false >> ';';
auto f = r.begin();
auto const l = r.end();
std::string_view sv;
bool b = false;
auto success = bp::callback_prefix_parse(
f, l, assign_bool_parser, callbacks{sv, b});
BOOST_TEST(success);
BOOST_TEST(sv == "foo");
BOOST_TEST(b == false);
}
{
auto assign_bool_no_semi_parser =
callback_identifier >> '=' >> callback_true_false;
auto f = r.begin();
auto const l = r.end();
std::string_view sv;
bool b = false;
auto success = bp::callback_prefix_parse(
f, l, assign_bool_no_semi_parser, callbacks{sv, b});
BOOST_TEST(success);
BOOST_TEST(sv == "foo");
BOOST_TEST(b == false);
BOOST_TEST(f != l);
}
// callback_parse()
{
auto assign_bool_parser =
callback_identifier >> '=' >> callback_true_false >> ';';
std::string_view sv;
bool b = false;
auto success =
bp::callback_parse(r, assign_bool_parser, callbacks{sv, b});
BOOST_TEST(success);
BOOST_TEST(sv == "foo");
BOOST_TEST(b == false);
}
return boost::report_errors();
}

View File

@@ -1,112 +0,0 @@
// Copyright (C) 2024 T. Zachary Laine
//
// Distributed under the Boost Software License, Version 1.0. (See
// accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#define BOOST_PARSER_TESTING
#include <boost/parser/lexer.hpp>
#include <boost/parser/parser.hpp>
#include <boost/core/lightweight_test.hpp>
namespace bp = boost::parser;
bp::rule<class symbol_rule, std::string_view> const symrule = "symbols";
bp::symbols<std::string_view> rule_symbols;
auto const fwd_attr = [](auto & ctx) { _val(ctx) = _attr(ctx); };
auto symrule_def = rule_symbols[fwd_attr];
BOOST_PARSER_DEFINE_RULES(symrule);
constexpr auto I = bp::token_spec<"I", 0>;
constexpr auto V = bp::token_spec<"V", 1>;
constexpr auto X = bp::token_spec<"X", 2>;
constexpr auto L = bp::token_spec<"L", 3>;
constexpr auto C = bp::token_spec<"C", 4>;
constexpr auto arabic_num = bp::token_spec<"\\d+", 5, int>;
constexpr auto lexer = bp::lexer<char, int> | I | V | X | L | C | arabic_num;
int main()
{
// symbols_empty
{
bp::symbols<int> roman_numerals;
bp::symbols<std::string> named_strings;
auto r = "I" | bp::to_tokens(lexer);
BOOST_TEST(!bp::parse(r, roman_numerals));
BOOST_TEST(!bp::parse(r, named_strings));
}
// symbols_simple
{
bp::symbols<int> const roman_numerals = {
{"I", 1}, {"V", 5}, {"X", 10}, {"L", 50}, {"C", 100}};
bp::symbols<std::string> const named_strings = {
{"I", "1"}, {"V", "5"}, {"X", "10"}, {"L", "50"}, {"C", "100"}};
{
auto const result =
bp::parse("I" | bp::to_tokens(lexer), roman_numerals);
BOOST_TEST(result);
BOOST_TEST(*result == 1);
}
{
auto const result =
bp::parse("I" | bp::to_tokens(lexer), named_strings);
BOOST_TEST(result);
BOOST_TEST(*result == "1");
}
{
auto const result =
bp::parse("L" | bp::to_tokens(lexer), roman_numerals);
BOOST_TEST(result);
BOOST_TEST(*result == 50);
}
{
auto const result =
bp::parse("L" | bp::to_tokens(lexer), named_strings);
BOOST_TEST(result);
BOOST_TEST(*result == "50");
}
}
// symbols_mutating
{
bp::symbols<int> roman_numerals;
roman_numerals.insert_for_next_parse("I", 1);
roman_numerals.insert_for_next_parse("V", 5);
roman_numerals.insert_for_next_parse("X", 10);
auto const add_numeral = [&roman_numerals](auto & context) {
using namespace boost::parser::literals;
const std::string_view sv = bp::get(_attr(context), 0_c);
roman_numerals.insert(context, sv, bp::get(_attr(context), 1_c));
};
auto const numerals_parser =
((I | V | X | L | C) >> arabic_num)[add_numeral] >> roman_numerals;
{
auto const result =
bp::parse("L50L" | bp::to_tokens(lexer), numerals_parser);
BOOST_TEST(result);
BOOST_TEST(*result == 50);
BOOST_TEST(!bp::parse("L", roman_numerals));
}
{
auto const result =
bp::parse("C100C" | bp::to_tokens(lexer), numerals_parser);
BOOST_TEST(result);
BOOST_TEST(*result == 100);
BOOST_TEST(!bp::parse("C", roman_numerals));
}
{
auto const result =
bp::parse("L50C" | bp::to_tokens(lexer), numerals_parser);
BOOST_TEST(!result);
}
}
return boost::report_errors();
}

File diff suppressed because it is too large Load Diff

View File

@@ -39,7 +39,7 @@ constexpr auto double_s = u8"sS"; // U+0073 U+0073
// basic)
{
constexpr auto char_p = no_case[char_('a') | char_('B')];
auto char_p = no_case[char_('a') | char_('B')];
{
auto const result = parse("a", char_p);
@@ -461,12 +461,12 @@ constexpr auto double_s = u8"sS"; // U+0073 U+0073
{
constexpr auto mixed_sharp_s1 = U"ẞs";
constexpr auto mixed_sharp_s2 = U"sẞ";
auto const result = detail::no_case_aware_string_mismatch(
mixed_sharp_s1,
detail::text::null_sentinel,
mixed_sharp_s2,
detail::text::null_sentinel,
true);
auto const result =
detail::no_case_aware_string_mismatch<ignore_case_t::yes>(
mixed_sharp_s1,
detail::text::null_sentinel,
mixed_sharp_s2,
detail::text::null_sentinel);
BOOST_TEST(result.first == detail::text::null_sentinel);
BOOST_TEST(result.second == detail::text::null_sentinel);
}

File diff suppressed because it is too large Load Diff

View File

@@ -6,10 +6,6 @@
* http://www.boost.org/LICENSE_1_0.txt)
*/
#include <boost/parser/config.hpp>
#if BOOST_PARSER_USE_CONCEPTS
#include <boost/parser/lexer.hpp>
#endif
#include <boost/parser/parser.hpp>
@@ -30,17 +26,6 @@ struct globals_t
globals_t const globals;
enum class unprintable_tokens { foo, bar };
enum class printable_tokens { foo, bar };
std::ostream & operator<<(std::ostream & os, printable_tokens tok)
{
switch (tok) {
case printable_tokens::foo: os << "foo"; break;
case printable_tokens::bar: os << "bar"; break;
}
return os;
}
auto i = [](auto & ctx) { return _globals(ctx).i; };
auto i2 = [](auto & ctx) { return _globals(ctx).i2; };
auto u = [](auto & ctx) { return _globals(ctx).u; };
@@ -159,7 +144,7 @@ int main()
std::cout << "\n\n"
<< "----------------------------------------\n"
<< "| transform)f_[] |\n"
<< "| transform(f)[] |\n"
<< "----------------------------------------\n";
auto f = [](auto x) { return x; };
@@ -171,6 +156,8 @@ int main()
<< "----------------------------------------\n";
PARSE(omit[char_]);
PARSE(omit[omit[char_]]);
PARSE(omit[*omit[char_]]);
std::cout << "\n\n"
<< "----------------------------------------\n"
@@ -203,6 +190,15 @@ int main()
PARSE(skip[char_]);
PARSE(skip(ws)[char_]);
std::cout << "\n\n"
<< "----------------------------------------\n"
<< "| no_case[] |\n"
<< "----------------------------------------\n";
PARSE(no_case[char_]);
PARSE(no_case[no_case[char_]]);
PARSE(no_case[*no_case[char_]]);
std::cout << "\n\n"
<< "----------------------------------------\n"
<< "| merge[] |\n"
@@ -233,6 +229,16 @@ int main()
PARSE(!char_);
PARSE(!(*char_ >> char_));
PARSE(!char_ >> char_);
PARSE(*char_ >> !char_);
PARSE(!char_ >> *char_ >> char_ >> !char_);
try {
PARSE((!char_) > char_);
} catch (...) {
}
PARSE(*char_ > !char_);
PARSE((!char_) > *char_ >> char_ > !char_);
std::cout << "\n\n"
<< "----------------------------------------\n"
<< "| operator& |\n"
@@ -241,6 +247,16 @@ int main()
PARSE(&char_);
PARSE(&(*char_ >> char_));
PARSE(&char_ >> char_);
PARSE(*char_ >> &char_);
PARSE(&char_ >> *char_ >> char_ >> &char_);
try {
PARSE(&char_ > char_);
} catch (...) {
}
PARSE(*char_ > &char_);
PARSE(&char_ >>*char_ >> char_ > &char_);
std::cout << "\n\n"
<< "----------------------------------------\n"
<< "| symbols<T> |\n"
@@ -499,64 +515,4 @@ int main()
PARSE_CHAR32(float_);
PARSE_CHAR32(double_);
#if BOOST_PARSER_USE_CONCEPTS
{
std::cout << "\n\n"
<< "----------------------------------------\n"
<< "| unprintable_foo (token_spec) |\n"
<< "----------------------------------------\n";
constexpr auto unprintable_foo =
token_spec<"\\w\\w\\w", unprintable_tokens::foo>;
constexpr auto unprintable_lexer =
lexer<char, unprintable_tokens> | unprintable_foo;
std::cout << "token_spec<\"\\w\\w\\w\", unprintable_tokens::foo>:\n";
parse(str | to_tokens(unprintable_lexer), unprintable_foo, trace::on);
std::cout
<< "token_spec<\"\\w\\w\\w\", unprintable_tokens::foo>(\"foo\"):\n";
parse(
str | to_tokens(unprintable_lexer),
unprintable_foo("foo"),
trace::on);
}
{
std::cout << "\n\n"
<< "----------------------------------------\n"
<< "| printable_foo (token_spec) |\n"
<< "----------------------------------------\n";
constexpr auto printable_foo =
token_spec<"\\w\\w\\w", printable_tokens::foo>;
constexpr auto printable_lexer =
lexer<char, printable_tokens> | printable_foo;
std::cout << "token_spec<\"\\w\\w\\w\", printable_tokens::foo>:\n";
parse(str | to_tokens(printable_lexer), printable_foo, trace::on);
std::cout
<< "token_spec<\"\\w\\w\\w\", printable_tokens::foo>(\"bar\"):\n";
parse(
str | to_tokens(printable_lexer), printable_foo("bar"), trace::on);
}
{
std::cout << "\n\n"
<< "----------------------------------------\n"
<< "| int_foo (token_spec) |\n"
<< "----------------------------------------\n";
constexpr auto int_foo = token_spec<"\\w\\w\\w", 42, int>;
constexpr auto int_lexer = lexer<char, int> | int_foo;
std::cout << "token_spec<\"\\w\\w\\w\", 42, int>:\n";
parse(str | to_tokens(int_lexer), int_foo, trace::on);
std::cout << "token_spec<\"\\w\\w\\w\", 42, int>(13):\n";
parse(str | to_tokens(int_lexer), int_foo(13), trace::on);
}
#endif
}