mirror of
https://github.com/boostorg/parser.git
synced 2026-01-19 16:32:13 +00:00
Compare commits
88 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c6d35e8791 | ||
|
|
ff1059695d | ||
|
|
9a958224e4 | ||
|
|
0463ecb4f6 | ||
|
|
5c036a778a | ||
|
|
c3667f5265 | ||
|
|
e2f015991c | ||
|
|
2b48b8656e | ||
|
|
df966c78f6 | ||
|
|
9733445118 | ||
|
|
b1c5c4b487 | ||
|
|
96824b2013 | ||
|
|
19952581f0 | ||
|
|
06816abc62 | ||
|
|
8f8791244a | ||
|
|
05a110c54d | ||
|
|
cafd04c391 | ||
|
|
3fd285c014 | ||
|
|
bc6e9e3447 | ||
|
|
eab7e82988 | ||
|
|
655870000b | ||
|
|
039453079e | ||
|
|
a908e950d5 | ||
|
|
87e00a173d | ||
|
|
3176c6f823 | ||
|
|
92c4993b87 | ||
|
|
9d67b0df7f | ||
|
|
d4f4589ead | ||
|
|
178d62a250 | ||
|
|
e654c9fda7 | ||
|
|
9dbf30241d | ||
|
|
f298bfe59b | ||
|
|
07cd667a91 | ||
|
|
00510ed962 | ||
|
|
1a5b7467ca | ||
|
|
13a52e10f9 | ||
|
|
67c3ec180c | ||
|
|
8658f8cd6b | ||
|
|
f69d7acdd9 | ||
|
|
542bdb0e0e | ||
|
|
f3e326e344 | ||
|
|
027d861b08 | ||
|
|
6ab8f96e19 | ||
|
|
2b518bc74d | ||
|
|
1421592876 | ||
|
|
092a76173b | ||
|
|
8830dfed02 | ||
|
|
935322649b | ||
|
|
7566dbdde1 | ||
|
|
298bae0058 | ||
|
|
b6c1229c54 | ||
|
|
793e519eb8 | ||
|
|
416607c954 | ||
|
|
e419ef2a60 | ||
|
|
1a405f8133 | ||
|
|
fbc21ef2fd | ||
|
|
f00f4dfa75 | ||
|
|
358adf247a | ||
|
|
ee8ab13779 | ||
|
|
391bb2b5b0 | ||
|
|
ad64fb6973 | ||
|
|
6ed1152390 | ||
|
|
51cc855dd7 | ||
|
|
bf336fb096 | ||
|
|
63483cb284 | ||
|
|
8b24206aee | ||
|
|
49213c428e | ||
|
|
3df6626e58 | ||
|
|
79f34ef252 | ||
|
|
f465a75069 | ||
|
|
6159a481db | ||
|
|
cd9c7492dd | ||
|
|
9a1fadbe82 | ||
|
|
e5c101378b | ||
|
|
f468d529fe | ||
|
|
b5d4339f2c | ||
|
|
6d7fa6f105 | ||
|
|
c975f57908 | ||
|
|
063291b78c | ||
|
|
3eb827dcd6 | ||
|
|
6d796287b6 | ||
|
|
bb0fb885b8 | ||
|
|
94a9daec40 | ||
|
|
4344dd3f47 | ||
|
|
a7c7470bc1 | ||
|
|
b273133fd2 | ||
|
|
3a7ddcf936 | ||
|
|
d79efb0daa |
@@ -22,12 +22,6 @@ target_link_libraries(boost_parser
|
||||
Boost::type_index
|
||||
)
|
||||
|
||||
if(BUILD_TESTING AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt")
|
||||
|
||||
add_subdirectory(test)
|
||||
|
||||
endif()
|
||||
|
||||
else()
|
||||
|
||||
cmake_minimum_required(VERSION 3.14...3.20)
|
||||
|
||||
11
README.md
11
README.md
@@ -34,12 +34,7 @@ int main()
|
||||
}
|
||||
```
|
||||
|
||||
This library is header-only, and has a default dependency on Boost.Hana. The
|
||||
Boost.Hana dependency can be eliminated, and `std::tuple` will be used instead
|
||||
of `boost::hana::tuple` throughout the library, if you `#define`
|
||||
`BOOST_PARSER_DISABLE_HANA_TUPLE`. To try out the lib without mseeing with
|
||||
dependencies, add its `include/` dir as an include path in your build and
|
||||
define `BOOST_PARSER_DISABLE_HANA_TUPLE` your build.
|
||||
This library is header-only, and has no Boost dependencies by default.
|
||||
|
||||
Features:
|
||||
|
||||
@@ -52,9 +47,7 @@ Features:
|
||||
- Trace support for debugging your parsers.
|
||||
- Clever hacks to make compile time errors easier to deal with. (These are totally optional.)
|
||||
|
||||
This library targets submission to Boost.
|
||||
|
||||
Online docs: https://tzlaine.github.io/parser
|
||||
This library first appeared in Boost 1.87.0
|
||||
|
||||
Master status:
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
[import ../test/parser.cpp]
|
||||
[import ../test/parser_rule.cpp]
|
||||
[import ../test/parser_quoted_string.cpp]
|
||||
[import ../test/lexer_and_parser.cpp]
|
||||
|
||||
[import ../include/boost/parser/concepts.hpp]
|
||||
[import ../include/boost/parser/error_handling_fwd.hpp]
|
||||
@@ -109,6 +110,16 @@
|
||||
[def _trans_replace_vs_ [classref boost::parser::transform_replace_view `boost::parser::transform_replace_view`s]]
|
||||
|
||||
|
||||
[def _lex_ [classref boost::parser::lexer_t `boost::parser::lexer_t`]]
|
||||
[def _tok_ [classref boost::parser::token `boost::parser::token`]]
|
||||
[def _toks_ [classref boost::parser::token `boost::parser::token`s]]
|
||||
[def _tok_spec_ [classref boost::parser::token_spec_t `boost::parser::token_spec_t`]]
|
||||
[def _tok_specs_ [classref boost::parser::token_spec_t `boost::parser::token_spec_t`s]]
|
||||
[def _tok_chs_ [globalref boost::parser::token_chars `boost::parser::token_chars`]]
|
||||
[def _to_tok_ [globalref boost::parser::to_tokens `boost::parser::to_tokens`]]
|
||||
[def _tok_v_ [classref boost::parser::tokens_view `boost::parser::tokens_view`]]
|
||||
[def _ch_id_ [globalref boost::parser::character_id `boost::parser::character_id`]]
|
||||
|
||||
[def _std_str_ `std::string`]
|
||||
[def _std_vec_char_ `std::vector<char>`]
|
||||
[def _std_vec_char32_ `std::vector<char32_t>`]
|
||||
@@ -253,6 +264,12 @@
|
||||
[def _udls_ [@https://en.cppreference.com/w/cpp/language/user_literal UDLs]]
|
||||
[def _yaml_ [@https://yaml.org/spec/1.2/spec.html YAML 1.2]]
|
||||
|
||||
[def _nttp_ [@https://en.cppreference.com/w/cpp/language/template_parameters NTTP]]
|
||||
[def _nttps_ [@https://en.cppreference.com/w/cpp/language/template_parameters NTTPs]]
|
||||
|
||||
[def _ctre_ [@https://github.com/hanickadot/compile-time-regular-expressions CTRE]]
|
||||
[def _pcre_ [@https://www.pcre.org PCRE]]
|
||||
|
||||
[def _Spirit_ [@https://www.boost.org/doc/libs/release/libs/spirit Boost.Spirit]]
|
||||
[def _spirit_reals_ [@https://www.boost.org/doc/libs/release/libs/spirit/doc/html/spirit/qi/reference/numeric/real.html real number parsers]]
|
||||
|
||||
|
||||
221
doc/tables.qbk
221
doc/tables.qbk
@@ -34,9 +34,9 @@ itself be used as a parser; it must be called. In the table below:
|
||||
|
||||
* `a` is a semantic action;
|
||||
|
||||
* `r` is an object whose type models `parsable_range`; and
|
||||
* `r` is an object whose type models `parsable_range`;
|
||||
|
||||
* `p`, `p1`, `p2`, ... are parsers.
|
||||
* `p`, `p1`, `p2`, ... are parsers; and
|
||||
|
||||
* `escapes` is a _symbols_t_ object, where `T` is `char` or `char32_t`.
|
||||
|
||||
@@ -595,3 +595,220 @@ same attribute generation rules.
|
||||
[[`p1 | p2[a] | p3`] [`std::optional<std::variant<_ATTR_np_(p1), _ATTR_np_(p3)>>`]]
|
||||
]
|
||||
]
|
||||
|
||||
[template table_token_parsers_and_their_semantics
|
||||
This table lists all the _Parser_ parsers usable during token parsing. For
|
||||
the callable parsers, a separate entry exists for each possible arity of
|
||||
arguments. For a parser `p`, if there is no entry for `p` without arguments,
|
||||
`p` is a function, and cannot itself be used as a parser; it must be called.
|
||||
In the table below:
|
||||
|
||||
* each entry is a global object usable directly in your parsers, unless
|
||||
otherwise noted;
|
||||
|
||||
* "code point" is used to refer to the elements of the input range, which
|
||||
assumes that the parse is being done in the Unicode-aware code path (if the
|
||||
parse is being done in the non-Unicode code path, read "code point" as
|
||||
"`char`");
|
||||
|
||||
* _RES_ is a notional macro that expands to the resolution of parse argument
|
||||
or evaluation of a parse predicate (see _parsers_uses_);
|
||||
|
||||
* "`_RES_np_(pred) == true`" is a shorthand notation for "`_RES_np_(pred)` is
|
||||
contextually convertible to `bool` and `true`"; likewise for `false`;
|
||||
|
||||
* `c` is a character of some character type;
|
||||
|
||||
* `str` is a string literal of type `CharType const[]`, for some character
|
||||
type `Char\Type`;
|
||||
|
||||
* `pred` is a parse predicate;
|
||||
|
||||
* `arg0`, `arg1`, `arg2`, ... are parse arguments;
|
||||
|
||||
* `a` is a semantic action;
|
||||
|
||||
* `r` is an object whose type models `parsable_range`;
|
||||
|
||||
* `tok` is a token parser created using _tok_spec_; and
|
||||
|
||||
* `p`, `p1`, `p2`, ... are parsers.
|
||||
|
||||
[note The definition of `parsable_range` is:
|
||||
|
||||
[parsable_range_concept]
|
||||
|
||||
]
|
||||
|
||||
[note Some of the parsers in this table consume no input. All parsers consume
|
||||
the input they match unless otherwise stated in the table below.]
|
||||
|
||||
[table Token Parsers and Their Semantics
|
||||
[[Parser] [Semantics] [Attribute Type] [Notes]]
|
||||
|
||||
[[ `tok` ]
|
||||
[ Matches any token with the same ID as `tok`. ]
|
||||
[ The attribute type given when specifying `tok`, or a string view if unspecified. The attribute type must be a specialization of `std::basic_string_view`, an integral type, or a floating point type. ]
|
||||
[]]
|
||||
|
||||
[[ `tok(arg0)` ]
|
||||
[ Matches exactly the value `_RES_np_(arg0)`. ]
|
||||
[ The attribute type given when specifying `tok`. The attribute type must be a an integral type or a floating point type. ]
|
||||
[ This case applies only when `arg0` is *not* a range. ]]
|
||||
|
||||
[[ `tok(r)` ]
|
||||
[ Matches exactly the value `r`. ]
|
||||
[ The attribute type given when specifying `tok`. The attribute type must be a specialization of `std::basic_string_view`. ]
|
||||
[ This overload does *not* take parse arguments. ]]
|
||||
|
||||
[[ _e_ ]
|
||||
[ Matches /epsilon/, the empty string. Always matches, and consumes no input. ]
|
||||
[ None. ]
|
||||
[ Matching _e_ an unlimited number of times creates an infinite loop, which is undefined behavior in C++. _Parser_ will assert in debug mode when it encounters `*_e_`, `+_e_`, etc (this applies to unconditional _e_ only). ]]
|
||||
|
||||
[[ `_e_(pred)` ]
|
||||
[ Fails to match the input if `_RES_np_(pred) == false`. Otherwise, the semantics are those of _e_. ]
|
||||
[ None. ]
|
||||
[]]
|
||||
|
||||
[[ _ws_ ]
|
||||
[ Matches a single whitespace code point (see note), according to the Unicode White_Space property. ]
|
||||
[ None. ]
|
||||
[ For more info, see the [@https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt Unicode properties]. _ws_ may consume one code point or two. It only consumes two code points when it matches `"\r\n"`. ]]
|
||||
|
||||
[[ _eol_ ]
|
||||
[ Matches a single newline (see note), following the "hard" line breaks in the Unicode line breaking algorithm. ]
|
||||
[ None. ]
|
||||
[ For more info, see the [@https://unicode.org/reports/tr14 Unicode Line Breaking Algorithm]. _eol_ may consume one code point or two. It only consumes two code points when it matches `"\r\n"`. ]]
|
||||
|
||||
[[ _eoi_ ]
|
||||
[ Matches only at the end of input, and consumes no input. ]
|
||||
[ None. ]
|
||||
[]]
|
||||
|
||||
[[ _attr_np_`(arg0)` ]
|
||||
[ Always matches, and consumes no input. Generates the attribute `_RES_np_(arg0)`. ]
|
||||
[ `decltype(_RES_np_(arg0))`. ]
|
||||
[ An important use case for `_attr_` is to provide a default attribute value as a trailing alternative. For instance, an *optional* comma-delmited list is: `int_ % ',' | attr(std::vector<int>)`. Without the "`| attr(...)`", at least one `int_` match would be required. ]]
|
||||
|
||||
[[ _ch_ ]
|
||||
[ Matches any single code point. ]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See _attr_gen_. ]
|
||||
[ Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ `_ch_(arg0)` ]
|
||||
[ Matches exactly the code point `_RES_np_(arg0)`. ]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See _attr_gen_. ]
|
||||
[ Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ `_ch_(arg0, arg1)` ]
|
||||
[ Matches the next code point `n` in the input, if `_RES_np_(arg0) <= n && n <= _RES_np_(arg1)`. ]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See _attr_gen_. ]
|
||||
[ Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ `_ch_(r)` ]
|
||||
[ Matches the next code point `n` in the input, if `n` is one of the code points in `r`. ]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See _attr_gen_. ]
|
||||
[ `r` is taken to be in a UTF encoding. The exact UTF used depends on `r`'s element type. If you do not pass UTF encoded ranges for `r`, the behavior of _ch_ is undefined. Note that ASCII is a subset of UTF-8, so ASCII is fine. EBCDIC is not. `r` is not copied; a reference to it is taken. The lifetime of `_ch_(r)` must be within the lifetime of `r`. This overload of _ch_ does *not* take parse arguments. Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ _cp_ ]
|
||||
[ Matches a single code point. ]
|
||||
[ `char32_t` ]
|
||||
[ Similar to _ch_, but with a fixed `char32_t` attribute type; _cp_ has all the same call operator overloads as _ch_, though they are not repeated here, for brevity. Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ _cu_ ]
|
||||
[ Matches a single code point. ]
|
||||
[ `char` ]
|
||||
[ Similar to _ch_, but with a fixed `char` attribute type; _cu_ has all the same call operator overloads as _ch_, though they are not repeated here, for brevity. Even though the name "`cu`" suggests that this parser match at the code unit level, it does not. The name refers to the attribute type generated, much like the names _i_ versus _ui_. Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ `_blank_` ]
|
||||
[ Equivalent to `_ws_ - _eol_`. ]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
|
||||
[ Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ `_control_` ]
|
||||
[ Matches a single control-character code point. ]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
|
||||
[ Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ `_digit_` ]
|
||||
[ Matches a single decimal digit code point. ]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
|
||||
[ Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ `_punct_` ]
|
||||
[ Matches a single punctuation code point. ]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
|
||||
[ Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ `_hex_digit_` ]
|
||||
[ Matches a single hexidecimal digit code point. ]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
|
||||
[ Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ `_lower_` ]
|
||||
[ Matches a single lower-case code point. ]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
|
||||
[ Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ `_upper_` ]
|
||||
[ Matches a single upper-case code point. ]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
|
||||
[ Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ _lit_np_`(c)`]
|
||||
[ Matches exactly the given code point `c`. ]
|
||||
[ None. ]
|
||||
[_lit_ does *not* take parse arguments. Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ `c_l` ]
|
||||
[ Matches exactly the given code point `c`. ]
|
||||
[ None. ]
|
||||
[ This is a _udl_ that represents `_lit_np_(c)`, for example `'F'_l`. Only matches tokens with the ID _ch_id_. ]]
|
||||
|
||||
[[ _lit_np_`(r)`]
|
||||
[ Matches exactly the given string `r`. ]
|
||||
[ None. ]
|
||||
[ _lit_ does *not* take parse arguments. _str_ matches the entire token or not at all. Only matches tokens with an attribute type that is a specialization of `std::basic_string_view`. ]]
|
||||
|
||||
[[ `str_l` ]
|
||||
[ Matches exactly the given string `str`. ]
|
||||
[ None. ]
|
||||
[ This is a _udl_ that represents `_lit_np_(s)`, for example `"a string"_l`. Only matches tokens with an attribute type that is a specialization of `std::basic_string_view`. ]]
|
||||
|
||||
[[ `_str_np_(r)`]
|
||||
[ Matches exactly `r`, and generates the match as an attribute. ]
|
||||
[ _std_str_ ]
|
||||
[ _str_ does *not* take parse arguments. _str_ matches the entire token or not at all. Only matches tokens with an attribute type that is a specialization of `std::basic_string_view`. ]]
|
||||
|
||||
[[ `str_p`]
|
||||
[ Matches exactly `str`, and generates the match as an attribute. ]
|
||||
[ _std_str_ ]
|
||||
[ This is a _udl_ that represents `_str_np_(s)`, for example `"a string"_p`. Only matches tokens with an attribute type that is a specialization of `std::basic_string_view`. ]]
|
||||
|
||||
[[ `_rpt_np_(arg0)[p]` ]
|
||||
[ Matches iff `p` matches exactly `_RES_np_(arg0)` times. ]
|
||||
[ `std::string` if `_ATTR_np_(p)` is `char` or `char32_t`, otherwise `std::vector<_ATTR_np_(p)>` ]
|
||||
[ The special value _inf_ may be used; it indicates unlimited repetition. `decltype(_RES_np_(arg0))` must be implicitly convertible to `int64_t`. Matching _e_ an unlimited number of times creates an infinite loop, which is undefined behavior in C++. _Parser_ will assert in debug mode when it encounters `_rpt_np_(_inf_)[_e_]` (this applies to unconditional _e_ only). ]]
|
||||
|
||||
[[ `_rpt_np_(arg0, arg1)[p]` ]
|
||||
[ Matches iff `p` matches between `_RES_np_(arg0)` and `_RES_np_(arg1)` times, inclusively. ]
|
||||
[ `std::string` if `_ATTR_np_(p)` is `char` or `char32_t`, otherwise `std::vector<_ATTR_np_(p)>` ]
|
||||
[ The special value _inf_ may be used for the upper bound; it indicates unlimited repetition. `decltype(_RES_np_(arg0))` and `decltype(_RES_np_(arg1))` each must be implicitly convertible to `int64_t`. Matching _e_ an unlimited number of times creates an infinite loop, which is undefined behavior in C++. _Parser_ will assert in debug mode when it encounters `_rpt_np_(n, _inf_)[_e_]` (this applies to unconditional _e_ only). ]]
|
||||
|
||||
[[ `_if_np_(pred)[p]` ]
|
||||
[ Equivalent to `_e_(pred) >> p`. ]
|
||||
[ `std::optional<_ATTR_np_(p)>` ]
|
||||
[ It is an error to write `_if_np_(pred)`. That is, it is an error to omit the conditionally matched parser `p`. ]]
|
||||
|
||||
[[ `_sw_np_(arg0)(arg1, p1)(arg2, p2) ...` ]
|
||||
[ Equivalent to `p1` when `_RES_np_(arg0) == _RES_np_(arg1)`, `p2` when `_RES_np_(arg0) == _RES_np_(arg2)`, etc. If there is such no `argN`, the behavior of _sw_ is undefined. ]
|
||||
[ `std::variant<_ATTR_np_(p1), _ATTR_np_(p2), ...>` ]
|
||||
[ It is an error to write `_sw_np_(arg0)`. That is, it is an error to omit the conditionally matched parsers `p1`, `p2`, .... ]]
|
||||
|
||||
[[ _symbols_t_ ]
|
||||
[ _symbols_ is an associative container of key, value pairs. Each key is a _std_str_ and each value has type `T`. In the Unicode parsing path, the strings are considered to be UTF-8 encoded; in the non-Unicode path, no encoding is assumed. _symbols_ Matches the longest prefix `pre` of the input that is equal to one of the keys `k`. If the length `len` of `pre` is zero, and there is no zero-length key, it does not match the input. If `len` is positive, the generated attribute is the value associated with `k`.]
|
||||
[ `T` ]
|
||||
[ Unlike the other entries in this table, _symbols_ is a type, not an object. ]]
|
||||
]
|
||||
]
|
||||
|
||||
473
doc/tutorial.qbk
473
doc/tutorial.qbk
@@ -75,6 +75,10 @@ matches the input. _ATTR_ is a notional macro that expands to the attribute
|
||||
type of the parser passed to it; `_ATTR_np_(_d_)` is `double`. This is
|
||||
similar to the _attr_ type trait.
|
||||
|
||||
/Token parsing/ is parsing using _Parser_'s optional support for
|
||||
lexing/tokenizing first, and parsing the resulting tokens, as opposed to the
|
||||
normal operation of _Parser_, in which input characters are parsed.
|
||||
|
||||
Next, we'll look at some simple programs that parse using _Parser_. We'll
|
||||
start small and build up from there.
|
||||
|
||||
@@ -351,7 +355,7 @@ so this directive is only available in C++20 and later.
|
||||
|
||||
namespace bp = boost::parser;
|
||||
auto int_parser = bp::int_ % ','; // ATTR(int_parser) is std::vector<int>
|
||||
auto sv_parser = bp::string_view[int_parser]; // ATTR(subrange_parser) is a string_view
|
||||
auto sv_parser = bp::string_view[int_parser]; // ATTR(sv_parser) is a string_view
|
||||
|
||||
auto const str = std::string("1, 2, 3, 4, a, b, c");
|
||||
auto first = str.begin();
|
||||
@@ -1126,24 +1130,12 @@ the second `'X'` is recognized by the symbol table parser. However:
|
||||
|
||||
If we parse again, we find that `"X"` did not stay in the symbol table. The
|
||||
fact that `symbols` was declared const might have given you a hint that this
|
||||
would happen. Also, notice that the call to `insert()` in the semantic action
|
||||
uses the parse context; that's where all the symbol table changes are stored
|
||||
during the parse.
|
||||
would happen.
|
||||
|
||||
The full program:
|
||||
|
||||
[self_filling_symbol_table_example]
|
||||
|
||||
[tip _symbols_ also has a call operator that does exactly what
|
||||
`.insert_for_next_parse()` does. This allows you to chain additions with a
|
||||
convenient syntax, like this:
|
||||
|
||||
```
|
||||
symbols<int> roman_numerals;
|
||||
roman_numerals.insert_for_next_parse("I", 1)("V", 5)("X", 10);
|
||||
```
|
||||
]
|
||||
|
||||
[important _symbols_ stores all its strings in UTF-32 internally. If you do
|
||||
Unicode or ASCII parsing, this will not matter to you at all. If you do
|
||||
non-Unicode parsing of a character encoding that is not a subset of Unicode
|
||||
@@ -1163,6 +1155,11 @@ erase and clear for the current parse, and another that applies only to
|
||||
subsequent parses. The full set of operations can be found in the _symbols_
|
||||
API docs.
|
||||
|
||||
[mpte There are two versions of each of the _symbols_ `*_for_next_parse()`
|
||||
functions _emdash_ one that takes a context, and one that does not. The one
|
||||
with the context is meant to be used within a semantic action. The one
|
||||
without the context is for use outside of any parse.]
|
||||
|
||||
[endsect]
|
||||
|
||||
[section The Parsers And Their Uses]
|
||||
@@ -1170,7 +1167,7 @@ API docs.
|
||||
_Parser_ comes with all the parsers most parsing tasks will ever need. Each
|
||||
one is a `constexpr` object, or a `constexpr` function. Some of the
|
||||
non-functions are also callable, such as _ch_, which may be used directly, or
|
||||
with arguments, as in _ch_`('a', 'z')`. Any parser that can be called,
|
||||
with arguments, as in `_ch_('a', 'z')`. Any parser that can be called,
|
||||
whether a function or callable object, will be called a /callable parser/ from
|
||||
now on. Note that there are no nullary callable parsers; they each take one
|
||||
or more arguments.
|
||||
@@ -3668,6 +3665,452 @@ Some things to be aware of when looking at _Parser_ trace output:
|
||||
|
||||
[endsect]
|
||||
|
||||
[section Token parsing / Using a Lexer]
|
||||
|
||||
_Parser_ has optional support for lexing before parsing. The optional support
|
||||
is based on an external dependency, _ctre_. _ctre_ produces a sequence of
|
||||
tokens by matching a set of regexes that you provide. Each regex is used to
|
||||
match against the input to produce one token with an ID associated with that
|
||||
regex. When you call _p_, you pass it a lazy range of tokens that adapts the
|
||||
input, and _p_ parses the tokens, not the underlying characters. When you
|
||||
backtrack, you just move back to an earlier token, not an earlier place in the
|
||||
underlying sequence of characters.
|
||||
|
||||
[heading A basic example]
|
||||
|
||||
Let's look at an example of how to do token parsing. First, you must include
|
||||
the lexer header before the parser header.
|
||||
|
||||
[tokens_basics_headers]
|
||||
|
||||
The inclusion of this optional header is what enables token parsing.
|
||||
Character parsing ("normal" parsing) is unaffected by this header inclusion
|
||||
_emdash_ you can always do character parsing.
|
||||
|
||||
[important _ctre_ is a header-only library, and it can be included as a single
|
||||
header. It requires C++20 or later, _Parser_'s support for token parsing does
|
||||
as well. _Parser_ uses the single-header version with Unicode support,
|
||||
`ctre-unicode.hpp`.]
|
||||
|
||||
Then, you define a lexer and its tokens.
|
||||
|
||||
[tokens_basics_lexer]
|
||||
|
||||
Here, we first see three _tok_specs_. Each one consists of an _nttp_ regex
|
||||
string literal and an _nttp_ token ID; the first one matches `"foo"`, and has
|
||||
an ID of `0`, etc. _lex_ takes two template parameters. The first parameter
|
||||
indicates that the value type of the parsed input sequence is `char`. The
|
||||
second one indicates that the ID-type of all subsequent _tok_specs_ will be
|
||||
`int`. We create a full lexer by starting with the `lexer<...>` expression,
|
||||
followed by a piped-together sequence of _tok_specs_.
|
||||
|
||||
The final lexer `lexer` has a combined regex string, `"(foo)|(b.*r)|(b.+z)"`.
|
||||
This string is built up at compile time, and is represented by an _nttp_. It
|
||||
is the single regex given to _ctre_, which _ctre_ uses to produce a sequence
|
||||
of matches from it.
|
||||
|
||||
`lexer` and `token_spec` are variable templates; they make variables from the
|
||||
templates _lex_ and _tok_spec_, respectively. The are provided as a
|
||||
notational convenience, just so you don't have to put `{}` after every lexer
|
||||
and token spec you write. _lex_ and _tok_spec_ are empty classes. Their
|
||||
configury is stored in _nttps_.
|
||||
|
||||
Next, you create a range of _toks_ from your input. This range of tokens is
|
||||
what _p_ will parse.
|
||||
|
||||
[tokens_basics_input_range]
|
||||
|
||||
The input must model `std::ranges::contiguous_range`. This is due to the way
|
||||
_ctre_ works; it produces a sequence of matches that are convertible to
|
||||
`std::basic_string_view<CharType>`. In our case, since we are lexing a
|
||||
sequence of `char`, _ctre_ will produce a sequence of `std::basic_string`
|
||||
matches. Note that the value type/character type we specified for _lex_ above
|
||||
must match the input sequence's value type/character type, or the program is
|
||||
ill-formed. Also note that because we are lexing a contiguous range of
|
||||
characters, you cannot use any of the `boost::parser::as_utf*` range adaptors
|
||||
when doing token parsing.
|
||||
|
||||
Next, you define a parser.
|
||||
|
||||
[tokens_basics_parser]
|
||||
|
||||
This has the same semantics as the character parsers you've seen in the rest
|
||||
of the documentation. Each _tok_spec_ has the same interface as a parser, so
|
||||
it can be used with all the parser combining operations, like `operator>>`.
|
||||
However, unlike when doing character parsing, when token parsing all the
|
||||
terminal parsers are restricted to a subset of the terminal parsers that are
|
||||
available in character parsing (see the full list in the table below). This
|
||||
is because most of the parsers in _Parser_ parse sequences of characters. For
|
||||
example, if you used `_i_(42)` above instead of `foo`, the _i_ parser would
|
||||
try to match two consecutive values from the input sequence, and would expect
|
||||
them to equal `'4'` and `'2'`, respectively. It would instead see two tokens,
|
||||
and the comparisons would not even compile.
|
||||
|
||||
Finally, you can put everything together in a call to _p_.
|
||||
|
||||
[tokens_basics_parse]
|
||||
|
||||
As you can see, the parse succeeded, and we got three attributes out of it.
|
||||
Each attribute has the type `std::string_view`.
|
||||
|
||||
[heading Capture groups]
|
||||
|
||||
Capture groups are valid regex syntax, but you cannot use them in your
|
||||
_tok_spec_ regexes. For instance, `bp::token_spec<"(foo)+", 0>` (to match one
|
||||
or more consecutive `"foo"`s) will compile and run, and you will get garbage
|
||||
results. _Parser_ relies on the exact number and order of capture groups to
|
||||
do its token generation. If you want to group a part of your regex, use a
|
||||
non-capture group, like `"(?:foo)+"`.
|
||||
|
||||
[heading Whitespace in token parsing]
|
||||
|
||||
Using the parser above, what if we tried to parse the token range `"foo baz
|
||||
bar" | bp::to_tokens(lexer)` instead? Turns out, we get the same answer. You
|
||||
cannot use am explicit skipper when parsing tokens. However, parsers are much
|
||||
simpler when you have a notion of a skipper, especially for whitespace. So,
|
||||
_lex_ has one built in; it uses `"\\s+"` by default. Whitespace is matched,
|
||||
but produces no tokens. If you want to change the whitespace/skipper regex,
|
||||
you can provide it when specifying the lexer. For example, here is how you
|
||||
would specify the whitespace/skipped tokens to be any sequence of whitespace
|
||||
characters, or any C++-style trailing comment (`// ...`).
|
||||
|
||||
bp::lexer<char, int, "\\s+|\\/\\/.*$">
|
||||
|
||||
If whitespace information is important in your parse, simply provide `""` or
|
||||
the more readable convenience constant `bp::no_ws` to `lexer<>` as the
|
||||
whitespace regex, and make a regular token that matches whitespace. That way,
|
||||
you'll see all the whitespace in the sequence of tokens that you parse.
|
||||
|
||||
[heading Token attribute types]
|
||||
|
||||
The parser we looked at in the initial simple example produced three
|
||||
`std::string_view`s, one for each token we parsed. However, we may know that
|
||||
a particular token is meant to match numbers. If this is the case, we can let
|
||||
_Parser_ know that we expect the token to be interpretable as a particular
|
||||
type of numeric value. I'm using "numeric" for brevity, but this includes
|
||||
`bool` as well. For example:
|
||||
|
||||
[tokens_attrs]
|
||||
|
||||
The attribute types for these tokens are `bool`, `std::string_view`, and
|
||||
`double`, respectively. `identifier` has attribute type `std::string_view`
|
||||
because that is the default if you do not specify a type.
|
||||
|
||||
A _tok_ is essentially a variant of `std::basic_string_view<CharType>`, `long
|
||||
long`, and `long double`. The latter two types were selected because they can
|
||||
fit any value of an integral or floating-point type, respectively. Even
|
||||
though _tok_ effectively erases the exact type when it is integral or
|
||||
floating-point, the token parser retains the information of what the exact
|
||||
type is. This is why `true_false` above has an attribute type of `bool` and
|
||||
not `long long`.
|
||||
|
||||
_ctre_ produces a sequence of substrings. Each token produced by _Parser_
|
||||
gets its numeric value (if it should have one) by parsing the substring from
|
||||
_ctre_ with _emdash_ you guessed it _emdash_ a _Parser_ parser. The parser
|
||||
for `bool` is just _b_; the one for `int` is _i_, etc. The integral-type
|
||||
parsers all support a radix/base. If you specify an integral value type for
|
||||
one of your tokens, you can also specify a base, like `bp::token_spec<"\\d+",
|
||||
int, 16>` to parse hex-encoded `int`s.
|
||||
|
||||
Part of the advantage of doing lexing before parsing is that you don't have to
|
||||
reparse everything over and over again. If the subsequence `"1.23456789"` is
|
||||
found in the input, you only lex it once. After that, it's already in the
|
||||
right form as a floating-point number; backtracking will not provoke reparsing
|
||||
of those ten characters.
|
||||
|
||||
[heading Single-character tokens]
|
||||
|
||||
Just about any parser above a certain size will have punctuation of some sort
|
||||
_emdash_ elements of the input, usually a single character, that delimit other
|
||||
parts of the input, like commas and braces. To make it easier to specify such
|
||||
tokens, _Parser_ provides _tok_chs_. You can give _tok_chs_ a list of
|
||||
individual characters, and it will create a separate, single-character regex
|
||||
for each one, and add it to your lexer. Each such token will have the special
|
||||
ID _ch_id_.
|
||||
|
||||
Note that the single character you provide must be a `char` in the ASCII range
|
||||
(that is, less than `128`). If you want to use a single character that is
|
||||
outside the ASCII range, just make a normal _tok_spec_ for it. Here is an
|
||||
example using _tok_chs_.
|
||||
|
||||
[tokens_token_char]
|
||||
|
||||
Just like in a character parser, we can use character literals to match the
|
||||
single-character tokens (`'='` and `';'` in the example above). The character
|
||||
literals are turned into _ch_ parsers. _ch_ parsers that you explicitly write
|
||||
may be used as well. They will only match single-character tokens, though
|
||||
(that is, tokens with the ID _ch_id_).
|
||||
|
||||
[heading The differences between parsing characters and parsing tokens]
|
||||
|
||||
Even though _ch_ and _str_ (and lots of other character parsers _emdash_ see
|
||||
the table below) are available when doing token parsing, their semantics are
|
||||
subtly different when using for token parsing. This is because token parsing
|
||||
involves parsing chunks of input as tokens, rather than individual characters.
|
||||
This may sound obvious, but the implications are not. Consider this example.
|
||||
|
||||
[tokens_string_in_character_vs_token_parsing]
|
||||
|
||||
Why doesn't the token parsing case work? In the character parsing case,
|
||||
_str_np_ tries to match characters from the input, one at a time; it sees
|
||||
`'='` followed by `';'`, so it matches. In the token parsing case, this does
|
||||
not happen. Instead, the input is broken up into two tokens (one for `'='`
|
||||
and one for `';'`). `_str_np_("=;")` tries to match the first token in its
|
||||
entirety, but that token is a character token, not a token with a
|
||||
`std::basic_string_view` attribute. Even if that token did have
|
||||
a`std::basic_string_view` attribute, it would be `"="`, not `"=;"`, and so the
|
||||
match would still fail.
|
||||
|
||||
So, even though string matching is available using _str_, make sure you
|
||||
understand that _str_ is looking for 1) a token with a string view attribute,
|
||||
and 2) a full match of the token's string view against the range provided to
|
||||
_str_.
|
||||
|
||||
_ch_ is also a bit different, since it only matches character tokens that you
|
||||
make with _tok_chs_. Such tokens have the token ID _ch_id_. _ch_ will
|
||||
*never* match any other kind of token. This goes for all the character
|
||||
parsers (_blank_, _punct_, _upper_, etc).
|
||||
|
||||
The character class parsers (e.g. _punct_) are also limited in token parsing
|
||||
vs. their use in character parsing. _tok_chs_ limits characters to the ASCII
|
||||
range for simplicity, and to discourage parsing of sequences of tokens to find
|
||||
things that are detectable using _pcre_ directly. In other words, if you need
|
||||
the full set of punctuation characters, use `"\p{P}"` in one of your token
|
||||
regexes, rather than trying to parse punctuation characters out of the input
|
||||
using _punct_. Because _tok_chs_ limits characters to the ASCII range, all
|
||||
the matching for any character class parser (like _punct_) above the ASCII
|
||||
range will fail.
|
||||
|
||||
[important Though the string and character parsers are available, they're a
|
||||
bit clunky and should be avoided in most cases. Instead, use the character
|
||||
handling from the _pcre_ regex language to make the tokens you want. The best
|
||||
use of string and character parsers in your _Parser_ token parsers is as
|
||||
literals like `"function"`, `'='`, etc.]
|
||||
|
||||
One more important difference between token and character parsing is the
|
||||
effect that using _lexeme_ and/or _skip_ has. If you use _lexeme_ or _skip_,
|
||||
you are changing the sequence tokens that must be in the token cache. As
|
||||
such, whenever you *enter* or *leave* a _lexeme_ *or* _skip_ directive, the
|
||||
token cache is flushed. The flushed tokens are everything from the current
|
||||
token position to the end of the cache. If you write `bp::lexeme[p]`
|
||||
frequently enough in your parsers, you could be in for some very uneven
|
||||
performance.
|
||||
|
||||
[important Though you may be used to using _lexeme_ and _skip_ in character
|
||||
parsing, prefer to write explicit token regexes that have equivalent
|
||||
semantics, but operating during lexing rather than during parsing.]
|
||||
|
||||
[heading Parsing tokens with a specific value]
|
||||
|
||||
So far, we've only seen examples of parsing for a particular token. Sometimes
|
||||
we want to match only occurrences of a given token with a particular value,
|
||||
just like when we write something like `_ch_('a', 'z')` in a character parser.
|
||||
|
||||
Just as with _ch_ and most other _Parser_ parsers, you can just add the value
|
||||
to match in parens after the token, like `true_false(true)` or
|
||||
`identifier("exact string")`.
|
||||
|
||||
[heading Token IDs and diagnostics]
|
||||
|
||||
So far, we've only seen `int` used as the token ID type. Any integral type or
|
||||
enum can be used, though. There are limitations on the values you can provide
|
||||
for IDs. First, the values must all be nonnegative; negative values are
|
||||
reserved for use by _Parser_. Second, the values must not exceed `2^23-1`; no
|
||||
one is likely to have very many unique IDs, and token storage can be reduced a
|
||||
bit by using 3 bytes for the ID instead of 4.
|
||||
|
||||
Using an enum has the advantage of making the code a lot clearer. For
|
||||
instance:
|
||||
|
||||
enum class token_names { foo, bar };
|
||||
auto const foo = bp::token_spec<"foo", token_names::foo>;
|
||||
auto const bar = bp::token_spec<"b.r", token_names::bar>;
|
||||
|
||||
... reads a lot better than just using IDs like `0` and `1`.
|
||||
|
||||
There is another important advantage related to diagnostic messages. Consider
|
||||
this parse.
|
||||
|
||||
constexpr auto lexer = bp::lexer<char, token_names> | foo;
|
||||
bp::parse("bar" | bp::to_tokens(lexer), bp::eps > foo);
|
||||
|
||||
Here is what the diagnostic looks like.
|
||||
|
||||
[pre
|
||||
1:0: error: Expected tok<0> here:
|
||||
bar
|
||||
^
|
||||
]
|
||||
|
||||
If we added a specific string value we expect, that would be included.
|
||||
|
||||
bp::parse("bar" | bp::to_tokens(lexer), bp::eps > foo("foo"));
|
||||
|
||||
[pre
|
||||
1:0: error: Expected tok<0>("foo") here:
|
||||
bar
|
||||
^
|
||||
]
|
||||
|
||||
Instead of `"tok<N>"`, it might be nice to give the failed expectation a
|
||||
user-friendly name. In character parsers we usually do this by giving _rs_
|
||||
user-facing diagnostic text. This makes your parse failures much easier to
|
||||
understand and correct. However, many _tok_specs_ may already have a nice
|
||||
name, so why not use it? If you use enumerators for you token IDs, and make
|
||||
their enumeration streamable, _Parser_ will detect this, and use the streamed
|
||||
enumerator instead of `"tok<N>"`. Here is what we could have written instead.
|
||||
|
||||
enum class printable_tokens { foo, bar };
|
||||
std::ostream & operator<<(std::ostream & os, printable_tokens tok)
|
||||
{
|
||||
switch (tok) {
|
||||
case printable_tokens::foo: os << "foo"; break;
|
||||
case printable_tokens::bar: os << "bar"; break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
auto const foo = bp::token_spec<"foo", printable_tokens::foo>;
|
||||
auto const bar = bp::token_spec<"b.*r", printable_tokens::bar>;
|
||||
|
||||
constexpr auto lexer = bp::lexer<char, printable_tokens> | foo;
|
||||
bp::parse("bar" | bp::to_tokens(lexer), bp::eps > foo);
|
||||
|
||||
That results in the enumerator being printed instead.
|
||||
|
||||
[pre
|
||||
1:0: error: Expected foo here:
|
||||
bar
|
||||
^
|
||||
]
|
||||
|
||||
[important If you provide a streamable enumeration as the token ID type, this
|
||||
enables the alternate printing behavior described above. If you specify a
|
||||
particular value for the token parser, that value is printed as the expected
|
||||
value. So the diagnostic name for `bp::token_spec<"\\d+", 3>(42)` is
|
||||
`tok<3>(42)` but the name for `bp::token_spec<"\\d+",
|
||||
printable_tokens::foo>(42)` is just `42` (not `foo`).]
|
||||
|
||||
The takeaway here is that you should use a streamable enumeration for your ID
|
||||
type. It makes your code easier to read, and produces better diagnostics.
|
||||
|
||||
[heading Token caching]
|
||||
|
||||
Given that I told you earlier that we will make a sequence of tokens and
|
||||
backtrack within those tokens, you may be wondering where the tokens are
|
||||
stored. The _tok_v_ (the type created by the range adaptor _to_tok_) uses
|
||||
internal storage or user-provided external storage to store the tokens as they
|
||||
are generated. Here is an example of using external storage.
|
||||
|
||||
[tokens_caching_simple]
|
||||
|
||||
The cache could have been a `boost::container::small_vector<bp::token, N>`, or
|
||||
even a `static_vector` of appropriate size, to reduce or eliminate memory
|
||||
allocations.
|
||||
|
||||
Note the size of the cache after the parse; it still contains some tokens.
|
||||
This is a special case of a more general phenomenon: the token cache grows
|
||||
without bound when there are no expectation points. This is because, without
|
||||
expectation points, backtracking is unbounded (refer to the _expect_pts_
|
||||
section to see why). If you can go back arbitrarily far in order to backtrack,
|
||||
you need to be sure that there will be a token at the place you backtrack to.
|
||||
|
||||
However, if you use expectation points, the cache is trimmed. The prefix of
|
||||
tokens before the expectation point is erased from the token cache.
|
||||
|
||||
[tokens_caching_expectation_point]
|
||||
|
||||
Note the use of `std::ref()` to pass a reference to `cache`. This is
|
||||
necessary because _to_tok_ uses `std::bind_back()` (or a workalike in C++17
|
||||
mode). As with the other binders in `std`, it does not gracefully propagate
|
||||
bare lvalue references, so you have to use `std::ref()`.
|
||||
|
||||
[heading Lexing failures]
|
||||
|
||||
Parse failures that fail the top-level parse happen only at expectation
|
||||
points. Lexing failures that fail the top-level parse can happen at any point
|
||||
in the input. If there is no token regex that matches the current point of
|
||||
the input, we cannot continue to lex. Lexing failures are usually caused by
|
||||
bad input, or failure to specify the correct set of _tok_specs_ to cover all
|
||||
valid input. However, it may also be that you have written an impossible
|
||||
_tok_spec_. Consider this one.
|
||||
|
||||
constexpr auto bad_token = bp::token_spec<"foo", 0, int>;
|
||||
|
||||
This _tok_spec_ can never generate a valid token. It will match `"foo"` in
|
||||
the input, but then it will try to parse `"foo"` as an `int`, which is
|
||||
guaranteed to fail.
|
||||
|
||||
The takeaway here is that a lexing failure might be due to bad input, but it
|
||||
can also be the sign of a bug in one or more of your _tok_specs_.
|
||||
|
||||
[heading The token parsers]
|
||||
|
||||
Many of the parsers that work in character parsing do not work in token
|
||||
parsing, because they try to parse individual characters from the input.
|
||||
Token parsing only provides tokens, not characters. This table describes all
|
||||
the parsers compatible with token parsing.
|
||||
|
||||
[table_token_parsers_and_their_semantics]
|
||||
|
||||
[heading Directives and token parsing]
|
||||
|
||||
One directive that works in character parsing does not work in token parsing
|
||||
_emdash_ the argument form of _skip_. The argument to _skip_ is a new
|
||||
skipper, and this cannot be changed in the middle of tokenization. The set of
|
||||
tokens and their regexes are fixed at compile time. The nullary form of
|
||||
_skip_ works fine; all it does is re-enable skipping that has been turned off
|
||||
by _lexeme_.
|
||||
|
||||
[heading The token parsing API]
|
||||
|
||||
Not all the _p_ and _cbp_ overloads can do token parsing. In particular, the
|
||||
overloads that take a skipper are precluded, since the skipper must be built
|
||||
into the lexer itself (see the section above about whitespace handling for
|
||||
details).
|
||||
|
||||
[heading _ctre_ particulars]
|
||||
|
||||
There are a few details you might want to know about how _ctre_ works.
|
||||
|
||||
_ctre_ uses _pcre_ as its regex grammar.
|
||||
|
||||
"Maximum munch" appears not to be the way _ctre_ tokenizes input. For
|
||||
instance, if you have _tok_spec_ A that matches `"<=="` and _tok_spec_ B that
|
||||
matches `"<|>|<=|>=|==|!="`, the input characters `"<=="` will be tokenized as
|
||||
`"<=="` if the lexer includes `A | B`, but will be parsed as `"<"` followed by
|
||||
`"=="` if the lexer includes `B | A`.
|
||||
|
||||
_ctre_ uses `char32_t` for all its compile time strings. If you give it a
|
||||
regex string literal like `bp::token_spec<"foo", 0>` (that is, an array of
|
||||
`char`), it will be interpreted in one of two ways. By default, the `char`s
|
||||
are copied into an array of `char32_t`, unmodified. This is fine if you
|
||||
provide an ASCII regex, or a regex in a non-Unicode encoding. However, if you
|
||||
define `CTRE_STRING_IS_UTF8` before including `<boost/parser/lexer.hpp>`, the
|
||||
array of `char` will be interpreted as UTF-8, and will be transcoded to UTF-32
|
||||
before being stored in the array of `char32_t`. All the `charN_t` character
|
||||
types will be interpreted as UTF-N encoded, and will be transcoded to UTF-32
|
||||
if needed. `wchar_t` is taken to mean UTF-32 *even on Windows*. Again, all
|
||||
of this transcoding happens at compile time.
|
||||
|
||||
[heading Error handling details]
|
||||
|
||||
Error handling during token parsing mostly Just Works. That is, you don't
|
||||
need to know or do anything special just because you are parsing tokens.
|
||||
|
||||
However, the error reporting functions all operate at the level of character
|
||||
input, not tokens. The higher level functions provided in _err_fwd_hpp_ and
|
||||
_err_hpp_ (like `write_formatted_message()`) simply get the iterators to the
|
||||
underlying range of input before doing their work. The lower-level functions
|
||||
provided in _err_fwd_hpp_ and _err_hpp_ (like `find_line_position()`) do not.
|
||||
Each function's API documentation specifies whether or not it does this
|
||||
"normalization" to underlying iterators. If you use the lower-level API
|
||||
directly in your code, you can call one of the overloads of
|
||||
`normalize_iterators()` to get the underlying iterators in the token parsing
|
||||
case.
|
||||
|
||||
[endsect]
|
||||
|
||||
[section Memory Allocation]
|
||||
|
||||
_Parser_ seldom allocates memory. The exceptions to this are:
|
||||
|
||||
@@ -33,7 +33,7 @@ int main()
|
||||
std::cout << input << "\n";
|
||||
|
||||
//[ parsing_into_a_class_vec_of_strs
|
||||
constexpr auto uint_string = bp::uint_ >> bp::char_ >> bp::char_;
|
||||
constexpr auto uint_string = bp::uint_ >> +bp::char_;
|
||||
std::vector<std::string> vector_from_parse;
|
||||
if (parse(input, uint_string, bp::ws, vector_from_parse)) {
|
||||
std::cout << "That yields this vector of strings:\n";
|
||||
|
||||
@@ -31,9 +31,9 @@ struct logging_error_handler
|
||||
// and rethrow. Returning fail fails the top-level parse; returning
|
||||
// rethrow just re-throws the parse_error exception that got us here in
|
||||
// the first place.
|
||||
template<typename Iter, typename Sentinel>
|
||||
template<typename Iter, typename Sentinel, template<class> class Exception>
|
||||
bp::error_handler_result
|
||||
operator()(Iter first, Sentinel last, bp::parse_error<Iter> const & e) const
|
||||
operator()(Iter first, Sentinel last, Exception<Iter> const & e) const
|
||||
{
|
||||
bp::write_formatted_expectation_failure_error_message(
|
||||
ofs_, filename_, first, last, e);
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
|
||||
#if defined(BOOST_PARSER_DOXYGEN) || BOOST_PARSER_USE_CONCEPTS
|
||||
|
||||
#include <boost/parser/lexer_fwd.hpp>
|
||||
|
||||
#include <ranges>
|
||||
|
||||
|
||||
@@ -27,13 +29,19 @@ namespace boost { namespace parser {
|
||||
std::same_as<std::remove_cv_t<T>, char32_t>;
|
||||
|
||||
template<typename T>
|
||||
concept parsable_iter =
|
||||
std::forward_iterator<T> && code_unit<std::iter_value_t<T>>;
|
||||
concept token_iter = is_token_v<std::iter_value_t<T>>;
|
||||
|
||||
//[ parsable_range_like_concept
|
||||
template<typename T>
|
||||
concept parsable_range = std::ranges::forward_range<T> &&
|
||||
code_unit<std::ranges::range_value_t<T>>;
|
||||
concept parsable_iter =
|
||||
(std::forward_iterator<T> && code_unit<std::iter_value_t<T>>) ||
|
||||
token_iter<T>;
|
||||
|
||||
//[ parsable_range_concept
|
||||
template<typename T>
|
||||
concept parsable_range = (std::ranges::forward_range<T> &&
|
||||
code_unit<std::ranges::range_value_t<T>>) ||
|
||||
detail::is_tokens_view_v<T>;
|
||||
//]
|
||||
|
||||
template<typename T>
|
||||
concept parsable_pointer = std::is_pointer_v<std::remove_cvref_t<T>> &&
|
||||
@@ -41,7 +49,6 @@ namespace boost { namespace parser {
|
||||
|
||||
template<typename T>
|
||||
concept parsable_range_like = parsable_range<T> || parsable_pointer<T>;
|
||||
//]
|
||||
|
||||
template<typename T>
|
||||
concept range_like = std::ranges::range<T> || parsable_pointer<T>;
|
||||
@@ -58,7 +65,8 @@ namespace boost { namespace parser {
|
||||
std::declval<int &>(),
|
||||
std::declval<ErrorHandler const &>(),
|
||||
std::declval<detail::nope &>(),
|
||||
std::declval<detail::symbol_table_tries_t &>()));
|
||||
std::declval<detail::symbol_table_tries_t &>(),
|
||||
std::declval<detail::pending_symbol_table_operations_t &>()));
|
||||
|
||||
template<typename T, typename I, typename S, typename GlobalState>
|
||||
concept error_handler =
|
||||
|
||||
@@ -59,6 +59,12 @@
|
||||
also defined. */
|
||||
# define BOOST_PARSER_TRACE_TO_VS_OUTPUT
|
||||
|
||||
/** When lexing is enabled, each token contains its position within the
|
||||
underlying range. To save a bit of space, an `unsiged int` is used for
|
||||
this. If you parse input sequences longer than 2^32-1 characters, define
|
||||
`BOOST_PARSER_TOKEN_POSITION_TYPE` to be a larger integral type. */
|
||||
# define BOOST_PARSER_TOKEN_POSITION_TYPE unsigned int
|
||||
|
||||
#else
|
||||
|
||||
# ifdef BOOST_PARSER_NO_RUNTIME_ASSERTIONS
|
||||
@@ -103,6 +109,10 @@
|
||||
# define BOOST_PARSER_MAX_AGGREGATE_SIZE 25
|
||||
#endif
|
||||
|
||||
#if !defined(BOOST_PARSER_TOKEN_POSITION_TYPE)
|
||||
# define BOOST_PARSER_TOKEN_POSITION_TYPE unsigned int
|
||||
#endif
|
||||
|
||||
// VS2019 and VS2017 need conditional constexpr in some places, even in C++17 mode.
|
||||
#if !defined(_MSC_VER) || 1930 <= _MSC_VER
|
||||
# define BOOST_PARSER_CONSTEXPR constexpr
|
||||
@@ -116,4 +126,18 @@
|
||||
# define BOOST_PARSER_TRACE_OSTREAM std::cout
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# define BOOST_PARSER_DIAGNOSTIC_PUSH __pragma(warning(push))
|
||||
# define BOOST_PARSER_DIAGNOSTIC_POP __pragma(warning(pop))
|
||||
#elif defined(__clang_major__)
|
||||
# define BOOST_PARSER_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
|
||||
# define BOOST_PARSER_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
|
||||
#elif defined(__GNUC__)
|
||||
# define BOOST_PARSER_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
|
||||
# define BOOST_PARSER_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
|
||||
#else
|
||||
# define BOOST_PARSER_DIAGNOSTIC_PUSH
|
||||
# define BOOST_PARSER_DIAGNOSTIC_POP
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -197,6 +197,31 @@ namespace boost { namespace parser { namespace detail::hl {
|
||||
}
|
||||
|
||||
|
||||
// fold_n
|
||||
|
||||
template<std::size_t I, std::size_t N>
|
||||
struct fold_n_dispatch
|
||||
{
|
||||
template<typename F, typename State>
|
||||
constexpr static auto call(State && s, F const & f)
|
||||
{
|
||||
if constexpr (I + 1 == N) {
|
||||
return f((State &&)s, llong<I>{});
|
||||
} else {
|
||||
return fold_n_dispatch<I + 1, N>::call(
|
||||
f((State &&)s, llong<I>{}), f);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<std::size_t N, typename F, typename State>
|
||||
constexpr auto fold_n(State && s, F const & f)
|
||||
{
|
||||
static_assert(0 < N, "fold_n must operate on sequences of length >= 1");
|
||||
return hl::fold_n_dispatch<0, N>::call((State &&)s, (F &&)f);
|
||||
}
|
||||
|
||||
|
||||
// size
|
||||
|
||||
template<typename... Args>
|
||||
|
||||
@@ -355,6 +355,13 @@ namespace boost { namespace parser { namespace detail {
|
||||
std::ostream & os,
|
||||
int components = 0);
|
||||
|
||||
template<typename Context, typename TokenSpec, typename Expected>
|
||||
void print_parser(
|
||||
Context const & context,
|
||||
token_parser<TokenSpec, Expected> const & parser,
|
||||
std::ostream & os,
|
||||
int components = 0);
|
||||
|
||||
enum { trace_indent_factor = 2 };
|
||||
|
||||
inline void trace_indent(std::ostream & os, int indent)
|
||||
@@ -602,29 +609,19 @@ namespace boost { namespace parser { namespace detail {
|
||||
Context const & context,
|
||||
flags f,
|
||||
Attribute const & attr,
|
||||
std::string name) :
|
||||
os_(os),
|
||||
initial_first_(first),
|
||||
first_(first),
|
||||
last_(last),
|
||||
context_(context),
|
||||
flags_(f),
|
||||
attr_(attr),
|
||||
name_(std::move(name))
|
||||
{
|
||||
if (!detail::do_trace(flags_))
|
||||
return;
|
||||
detail::trace_prefix(os, first_, last_, context_, name_);
|
||||
}
|
||||
std::string name);
|
||||
~scoped_trace_t();
|
||||
// implemented in printing_impl.hpp
|
||||
|
||||
~scoped_trace_t()
|
||||
template<typename I, typename S>
|
||||
void impl(I initial_first, I first, S last)
|
||||
{
|
||||
if (!detail::do_trace(flags_))
|
||||
return;
|
||||
detail::trace_indent(os_, detail::_indent(context_));
|
||||
if (*context_.pass_) {
|
||||
os_ << "matched ";
|
||||
detail::trace_input(os_, initial_first_, first_);
|
||||
detail::trace_input(os_, initial_first, first);
|
||||
os_ << "\n";
|
||||
detail::print_attribute(
|
||||
os_,
|
||||
@@ -633,7 +630,7 @@ namespace boost { namespace parser { namespace detail {
|
||||
} else {
|
||||
os_ << "no match\n";
|
||||
}
|
||||
detail::trace_suffix(os_, first_, last_, context_, name_);
|
||||
detail::trace_suffix(os_, first, last, context_, name_);
|
||||
}
|
||||
|
||||
std::ostream & os_;
|
||||
|
||||
@@ -942,6 +942,100 @@ namespace boost { namespace parser { namespace detail {
|
||||
context, parser.or_parser_, os, components);
|
||||
}
|
||||
|
||||
#if defined(BOOST_PARSER_TOKEN_PARSER_HPP)
|
||||
|
||||
template<typename Context, typename TokenSpec, typename Expected>
|
||||
void print_parser(
|
||||
Context const & context,
|
||||
token_parser<TokenSpec, Expected> const & parser,
|
||||
std::ostream & os,
|
||||
int components)
|
||||
{
|
||||
constexpr bool do_print_value = requires { parser.expected_.value_; };
|
||||
|
||||
auto print_value = [&] {
|
||||
if constexpr (do_print_value) {
|
||||
if constexpr (std::ranges::range<
|
||||
decltype(parser.expected_.value_)>) {
|
||||
os << '"';
|
||||
for (auto c : parser.expected_.value_ | text::as_utf8) {
|
||||
detail::print_char(os, c);
|
||||
}
|
||||
os << '"';
|
||||
} else {
|
||||
detail::print(os, parser.expected_.value_);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if constexpr (requires {
|
||||
os << TokenSpec::id;
|
||||
} && std::is_enum_v<typename TokenSpec::id_type>) {
|
||||
if constexpr (do_print_value) {
|
||||
print_value();
|
||||
} else {
|
||||
os << TokenSpec::id;
|
||||
}
|
||||
} else {
|
||||
os << "tok<" << (int)TokenSpec::id << '>';
|
||||
if constexpr (do_print_value) {
|
||||
os << '(';
|
||||
print_value();
|
||||
os << ')';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<
|
||||
bool DoTrace,
|
||||
typename Iter,
|
||||
typename Sentinel,
|
||||
typename Context,
|
||||
typename Attribute>
|
||||
scoped_trace_t<DoTrace, Iter, Sentinel, Context, Attribute>::scoped_trace_t(
|
||||
std::ostream & os,
|
||||
Iter & first,
|
||||
Sentinel last,
|
||||
Context const & context,
|
||||
flags f,
|
||||
Attribute const & attr,
|
||||
std::string name) :
|
||||
os_(os),
|
||||
initial_first_(first),
|
||||
first_(first),
|
||||
last_(last),
|
||||
context_(context),
|
||||
flags_(f),
|
||||
attr_(attr),
|
||||
name_(std::move(name))
|
||||
{
|
||||
if (!detail::do_trace(flags_))
|
||||
return;
|
||||
if constexpr (is_token_iter_v<Iter>) {
|
||||
detail::trace_prefix(
|
||||
os, first_.base(), first_.range_end(), context_, name_);
|
||||
} else {
|
||||
detail::trace_prefix(os, first_, last_, context_, name_);
|
||||
}
|
||||
}
|
||||
|
||||
template<
|
||||
bool DoTrace,
|
||||
typename Iter,
|
||||
typename Sentinel,
|
||||
typename Context,
|
||||
typename Attribute>
|
||||
scoped_trace_t<DoTrace, Iter, Sentinel, Context, Attribute>::
|
||||
~scoped_trace_t()
|
||||
{
|
||||
if constexpr (is_token_iter_v<Iter>)
|
||||
impl(first_.range_begin(), first_.base(), first_.range_end());
|
||||
else
|
||||
impl(initial_first_, first_, last_);
|
||||
}
|
||||
|
||||
}}}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -693,12 +693,14 @@ namespace boost::parser::detail { namespace text {
|
||||
using T = detail::remove_cv_ref_t<R>;
|
||||
if constexpr (forward_range_v<T>) {
|
||||
auto unpacked =
|
||||
boost::parser::detail::text::unpack_iterator_and_sentinel(detail::begin(r), detail::end(r));
|
||||
boost::parser::detail::text::unpack_iterator_and_sentinel(
|
||||
detail::begin(r), detail::end(r));
|
||||
if constexpr (is_bounded_array_v<T>) {
|
||||
constexpr auto n = std::extent_v<T>;
|
||||
if (n && !r[n - 1])
|
||||
--unpacked.last;
|
||||
return BOOST_PARSER_DETAIL_TEXT_SUBRANGE(unpacked.first, unpacked.last);
|
||||
return BOOST_PARSER_DETAIL_TEXT_SUBRANGE(
|
||||
unpacked.first, unpacked.last);
|
||||
} else if constexpr (
|
||||
!std::is_same_v<decltype(unpacked.first), iterator_t<R>> ||
|
||||
!std::is_same_v<decltype(unpacked.last), sentinel_t<R>>) {
|
||||
|
||||
@@ -31,7 +31,7 @@ namespace boost { namespace parser {
|
||||
}
|
||||
|
||||
/** Returns the `line_position` for `it`, counting lines from the
|
||||
beginning of the input `first`. */
|
||||
beginning of the input `first`. Requires non-token iterators. */
|
||||
template<typename Iter>
|
||||
line_position<Iter> find_line_position(Iter first, Iter it)
|
||||
{
|
||||
@@ -57,7 +57,7 @@ namespace boost { namespace parser {
|
||||
}
|
||||
|
||||
/** Returns the iterator to the end of the line in which `it` is
|
||||
found. */
|
||||
found. Requires non-token iterators. */
|
||||
template<typename Iter, typename Sentinel>
|
||||
Iter find_line_end(Iter it, Sentinel last)
|
||||
{
|
||||
@@ -73,13 +73,16 @@ namespace boost { namespace parser {
|
||||
std::ostream & write_formatted_message(
|
||||
std::ostream & os,
|
||||
std::string_view filename,
|
||||
Iter first,
|
||||
Iter it,
|
||||
Sentinel last,
|
||||
Iter first_,
|
||||
Iter it_,
|
||||
Sentinel last_,
|
||||
std::string_view message,
|
||||
int64_t preferred_max_line_length,
|
||||
int64_t max_after_caret)
|
||||
{
|
||||
auto [first, it, last] =
|
||||
parser::normalize_iterators(first_, it_, last_);
|
||||
|
||||
if (!filename.empty())
|
||||
os << filename << ':';
|
||||
auto const position = parser::find_line_position(first, it);
|
||||
@@ -118,13 +121,15 @@ namespace boost { namespace parser {
|
||||
std::ostream & write_formatted_message(
|
||||
std::ostream & os,
|
||||
std::wstring_view filename,
|
||||
Iter first,
|
||||
Iter it,
|
||||
Sentinel last,
|
||||
Iter first_,
|
||||
Iter it_,
|
||||
Sentinel last_,
|
||||
std::string_view message,
|
||||
int64_t preferred_max_line_length,
|
||||
int64_t max_after_caret)
|
||||
{
|
||||
auto [first, it, last] =
|
||||
parser::normalize_iterators(first_, it_, last_);
|
||||
auto const r = filename | parser::detail::text::as_utf8;
|
||||
std::string s(r.begin(), r.end());
|
||||
return parser::write_formatted_message(
|
||||
@@ -139,23 +144,24 @@ namespace boost { namespace parser {
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename Iter, typename Sentinel>
|
||||
template<typename Iter, typename Sentinel, template<class> class Exception>
|
||||
std::ostream & write_formatted_expectation_failure_error_message(
|
||||
std::ostream & os,
|
||||
std::string_view filename,
|
||||
Iter first,
|
||||
Sentinel last,
|
||||
parse_error<Iter> const & e,
|
||||
Iter first_,
|
||||
Sentinel last_,
|
||||
Exception<Iter> const & e,
|
||||
int64_t preferred_max_line_length,
|
||||
int64_t max_after_caret)
|
||||
{
|
||||
std::string message = "error: Expected ";
|
||||
message += e.what();
|
||||
auto [first, it, last] = parser::normalize_iterators(first_, e, last_);
|
||||
return parser::write_formatted_message(
|
||||
os,
|
||||
filename,
|
||||
first,
|
||||
e.iter,
|
||||
it,
|
||||
last,
|
||||
message,
|
||||
preferred_max_line_length,
|
||||
@@ -163,13 +169,13 @@ namespace boost { namespace parser {
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
template<typename Iter, typename Sentinel>
|
||||
template<typename Iter, typename Sentinel, template<class> class Exception>
|
||||
std::ostream & write_formatted_expectation_failure_error_message(
|
||||
std::ostream & os,
|
||||
std::wstring_view filename,
|
||||
Iter first,
|
||||
Sentinel last,
|
||||
parse_error<Iter> const & e,
|
||||
Exception<Iter> const & e,
|
||||
int64_t preferred_max_line_length,
|
||||
int64_t max_after_caret)
|
||||
{
|
||||
@@ -180,6 +186,35 @@ namespace boost { namespace parser {
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace detail {
|
||||
template<typename I, typename S>
|
||||
auto normalize_iterators_impl(I first, I it, S last)
|
||||
{
|
||||
if constexpr (detail::is_token_iter_v<I>)
|
||||
return std::tuple(it.range_begin(), it.base(), it.range_end());
|
||||
else
|
||||
return std::tuple(first, it, last);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename I, typename S>
|
||||
auto normalize_iterators(I first, I it, S last)
|
||||
{
|
||||
return detail::normalize_iterators_impl(first, it, last);
|
||||
}
|
||||
|
||||
template<typename I, typename S>
|
||||
auto normalize_iterators(I first, parse_error<I> e, S last)
|
||||
{
|
||||
return detail::normalize_iterators_impl(first, e.iter, last);
|
||||
}
|
||||
|
||||
template<typename I, typename S>
|
||||
auto normalize_iterators(I first, lex_error<I> e, S last)
|
||||
{
|
||||
return detail::normalize_iterators_impl(first, e.iter, last);
|
||||
}
|
||||
|
||||
/** An error handler that allows users to supply callbacks to handle the
|
||||
reporting of warnings and errors. The reporting of errors and/or
|
||||
warnings can be suppressed by supplying one or both
|
||||
@@ -211,9 +246,13 @@ namespace boost { namespace parser {
|
||||
filename_.assign(r.begin(), r.end());
|
||||
}
|
||||
#endif
|
||||
template<typename Iter, typename Sentinel>
|
||||
template<
|
||||
typename Iter,
|
||||
typename Sentinel,
|
||||
template<class>
|
||||
class Exception>
|
||||
error_handler_result
|
||||
operator()(Iter first, Sentinel last, parse_error<Iter> const & e) const
|
||||
operator()(Iter first, Sentinel last, Exception<Iter> const & e) const
|
||||
{
|
||||
if (error_) {
|
||||
std::stringstream ss;
|
||||
@@ -260,13 +299,15 @@ namespace boost { namespace parser {
|
||||
std::string filename_;
|
||||
};
|
||||
|
||||
/** An error handler that just re-throws any exception generated by the
|
||||
parse. */
|
||||
struct rethrow_error_handler
|
||||
{
|
||||
template<typename Iter, typename Sentinel>
|
||||
template<
|
||||
typename Iter,
|
||||
typename Sentinel,
|
||||
template<class>
|
||||
class Exception>
|
||||
error_handler_result
|
||||
operator()(Iter first, Sentinel last, parse_error<Iter> const & e) const
|
||||
operator()(Iter first, Sentinel last, Exception<Iter> const & e) const
|
||||
{
|
||||
return error_handler_result::rethrow;
|
||||
}
|
||||
@@ -288,8 +329,6 @@ namespace boost { namespace parser {
|
||||
};
|
||||
|
||||
#if defined(_MSC_VER) || defined(BOOST_PARSER_DOXYGEN)
|
||||
/** An error handler that prints to the Visual Studio debugger via calls
|
||||
to `OutputDebugString()`. */
|
||||
struct vs_output_error_handler : stream_error_handler
|
||||
{
|
||||
vs_output_error_handler() :
|
||||
@@ -309,9 +348,9 @@ namespace boost { namespace parser {
|
||||
|
||||
// implementations
|
||||
|
||||
template<typename Iter, typename Sentinel>
|
||||
template<typename Iter, typename Sentinel, template<class> class Exception>
|
||||
error_handler_result default_error_handler::operator()(
|
||||
Iter first, Sentinel last, parse_error<Iter> const & e) const
|
||||
Iter first, Sentinel last, Exception<Iter> const & e) const
|
||||
{
|
||||
parser::write_formatted_expectation_failure_error_message(
|
||||
std::cerr, "", first, last, e);
|
||||
@@ -343,9 +382,9 @@ namespace boost { namespace parser {
|
||||
diagnose(kind, message, context, parser::_where(context).begin());
|
||||
}
|
||||
|
||||
template<typename Iter, typename Sentinel>
|
||||
template<typename Iter, typename Sentinel, template<class> class Exception>
|
||||
error_handler_result stream_error_handler::operator()(
|
||||
Iter first, Sentinel last, parse_error<Iter> const & e) const
|
||||
Iter first, Sentinel last, Exception<Iter> const & e) const
|
||||
{
|
||||
std::ostream * os = err_os_;
|
||||
if (!os)
|
||||
|
||||
@@ -24,10 +24,29 @@ namespace boost { namespace parser {
|
||||
template<typename Iter>
|
||||
struct parse_error : std::runtime_error
|
||||
{
|
||||
parse_error(Iter it, std::string const & msg) :
|
||||
runtime_error(msg), iter(it)
|
||||
parse_error(Iter it, std::string msg) :
|
||||
runtime_error(""), message(msg), iter(it)
|
||||
{}
|
||||
|
||||
char const * what() const noexcept override { return message.c_str(); }
|
||||
|
||||
std::string message;
|
||||
Iter iter;
|
||||
};
|
||||
|
||||
/** The exception thrown when a lexing error is encountered, consisting of
|
||||
an iterator to the point of failure, and a description of the value
|
||||
expected at the point of failure in `what()`. */
|
||||
template<typename Iter>
|
||||
struct lex_error : std::runtime_error
|
||||
{
|
||||
lex_error(Iter it, std::string msg) :
|
||||
runtime_error(""), message(msg), iter(it)
|
||||
{}
|
||||
|
||||
char const * what() const noexcept override { return message.c_str(); }
|
||||
|
||||
std::string message;
|
||||
Iter iter;
|
||||
};
|
||||
|
||||
@@ -42,7 +61,7 @@ namespace boost { namespace parser {
|
||||
};
|
||||
|
||||
/** Writes a formatted message (meaning prefixed with the file name, line,
|
||||
and column number) to `os`. */
|
||||
and column number) to `os`. Normalizes token iterators as needed. */
|
||||
template<typename Iter, typename Sentinel>
|
||||
std::ostream & write_formatted_message(
|
||||
std::ostream & os,
|
||||
@@ -56,7 +75,8 @@ namespace boost { namespace parser {
|
||||
|
||||
#if defined(_MSC_VER) || defined(BOOST_PARSER_DOXYGEN)
|
||||
/** Writes a formatted message (meaning prefixed with the file name, line,
|
||||
and column number) to `os`. This overload is Windows-only. */
|
||||
and column number) to `os`. Normalizes token iterators as needed.
|
||||
This overload is Windows-only. */
|
||||
template<typename Iter, typename Sentinel>
|
||||
std::ostream & write_formatted_message(
|
||||
std::ostream & os,
|
||||
@@ -70,32 +90,59 @@ namespace boost { namespace parser {
|
||||
#endif
|
||||
|
||||
/** Writes a formatted parse-expectation failure (meaning prefixed with
|
||||
the file name, line, and column number) to `os`. */
|
||||
template<typename Iter, typename Sentinel>
|
||||
the file name, line, and column number) to `os`. Normalizes token
|
||||
iterators as needed. */
|
||||
template<typename Iter, typename Sentinel, template<class> class Exception>
|
||||
std::ostream & write_formatted_expectation_failure_error_message(
|
||||
std::ostream & os,
|
||||
std::string_view filename,
|
||||
Iter first,
|
||||
Sentinel last,
|
||||
parse_error<Iter> const & e,
|
||||
Exception<Iter> const & e,
|
||||
int64_t preferred_max_line_length = 80,
|
||||
int64_t max_after_caret = 40);
|
||||
|
||||
#if defined(_MSC_VER) || defined(BOOST_PARSER_DOXYGEN)
|
||||
/** Writes a formatted parse-expectation failure (meaning prefixed with
|
||||
the file name, line, and column number) to `os`. This overload is
|
||||
Windows-only. */
|
||||
template<typename Iter, typename Sentinel>
|
||||
the file name, line, and column number) to `os`. Normalizes token
|
||||
iterators as needed. This overload is Windows-only. */
|
||||
template<typename Iter, typename Sentinel, template<class> class Exception>
|
||||
std::ostream & write_formatted_expectation_failure_error_message(
|
||||
std::ostream & os,
|
||||
std::wstring_view filename,
|
||||
Iter first,
|
||||
Sentinel last,
|
||||
parse_error<Iter> const & e,
|
||||
Exception<Iter> const & e,
|
||||
int64_t preferred_max_line_length = 80,
|
||||
int64_t max_after_caret = 40);
|
||||
#endif
|
||||
|
||||
/** Returns a tuple of three iterators (corresponding to `first`, `curr`,
|
||||
and `last`) that are suitable for use in the other error handling
|
||||
functions, many of which require iterators into the undelying sequence
|
||||
being parsed. For non-token parsing cases, this is effectively a
|
||||
no-op; the given iterators are simply returned as-is. */
|
||||
template<typename I, typename S>
|
||||
auto normalize_iterators(I first, I curr, S last);
|
||||
|
||||
/** Returns a tuple of three iterators (corresponding to `first`, the
|
||||
iterator captured in `e`, and `last`) that are suitable for use in the
|
||||
other error handling functions, many of which require iterators into
|
||||
the undelying sequence being parsed. For non-token parsing cases,
|
||||
this is effectively a no-op; the given iterators are simply returned
|
||||
as-is. */
|
||||
template<typename I, typename S>
|
||||
auto normalize_iterators(I first, parse_error<I> e, S last);
|
||||
|
||||
/** Returns a tuple of three iterators (corresponding to `first`, the
|
||||
iterator captured in `e`, and `last`) that are suitable for use in the
|
||||
other error handling functions, many of which require iterators into
|
||||
the undelying sequence being parsed. For non-token parsing cases,
|
||||
this is effectively a no-op; the given iterators are simply returned
|
||||
as-is. */
|
||||
template<typename I, typename S>
|
||||
auto normalize_iterators(I first, lex_error<I> e, S last);
|
||||
|
||||
/** The kinds of diagnostics that can be handled by an error handler. */
|
||||
enum class diagnostic_kind {
|
||||
error, /// An error diagnostic.
|
||||
@@ -109,12 +156,16 @@ namespace boost { namespace parser {
|
||||
{
|
||||
constexpr default_error_handler() = default;
|
||||
|
||||
/** Handles a `parse_error` exception thrown during parsing. A
|
||||
formatted parse-expectation failure is printed to `std::cerr`.
|
||||
Always returns `error_handler_result::fail`. */
|
||||
template<typename Iter, typename Sentinel>
|
||||
error_handler_result operator()(
|
||||
Iter first, Sentinel last, parse_error<Iter> const & e) const;
|
||||
/** Handles a `parse_error` or `lex_error` exception thrown during
|
||||
parsing/lexing. A formatted parse-expectation failure is printed
|
||||
to `std::cerr`. Always returns `error_handler_result::fail`. */
|
||||
template<
|
||||
typename Iter,
|
||||
typename Sentinel,
|
||||
template<class>
|
||||
class Exception>
|
||||
error_handler_result
|
||||
operator()(Iter first, Sentinel last, Exception<Iter> const & e) const;
|
||||
|
||||
/** Prints `message` to `std::cerr`. The diagnostic is printed with
|
||||
the given `kind`, indicating the location as being at `it`. This
|
||||
@@ -191,9 +242,13 @@ namespace boost { namespace parser {
|
||||
formatted parse-expectation failure is printed to `*err_os_` when
|
||||
`err_os_` is non-null, or `std::cerr` otherwise. Always returns
|
||||
`error_handler_result::fail`. */
|
||||
template<typename Iter, typename Sentinel>
|
||||
template<
|
||||
typename Iter,
|
||||
typename Sentinel,
|
||||
template<class>
|
||||
class Exception>
|
||||
error_handler_result
|
||||
operator()(Iter first, Sentinel last, parse_error<Iter> const & e) const;
|
||||
operator()(Iter first, Sentinel last, Exception<Iter> const & e) const;
|
||||
|
||||
/** Let `std::ostream * s = kind == diagnostic_kind::error : err_os_ :
|
||||
warn_os_`; prints `message` to `*s` when `s` is non-null, or
|
||||
@@ -225,6 +280,16 @@ namespace boost { namespace parser {
|
||||
std::ostream * warn_os_;
|
||||
};
|
||||
|
||||
/** An error handler that just re-throws any exception generated by the
|
||||
parse. */
|
||||
struct rethrow_error_handler;
|
||||
|
||||
#if defined(_MSC_VER) || defined(BOOST_PARSER_DOXYGEN)
|
||||
/** An error handler that prints to the Visual Studio debugger via calls
|
||||
to `OutputDebugString()`. */
|
||||
struct vs_output_error_handler;
|
||||
#endif
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
||||
|
||||
1159
include/boost/parser/lexer.hpp
Normal file
1159
include/boost/parser/lexer.hpp
Normal file
File diff suppressed because it is too large
Load Diff
36
include/boost/parser/lexer_fwd.hpp
Normal file
36
include/boost/parser/lexer_fwd.hpp
Normal file
@@ -0,0 +1,36 @@
|
||||
// Copyright (C) 2024 T. Zachary Laine
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See
|
||||
// accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef BOOST_PARSER_LEXER_FWD_HPP
|
||||
#define BOOST_PARSER_LEXER_FWD_HPP
|
||||
|
||||
#include <ranges>
|
||||
#include <vector>
|
||||
|
||||
namespace boost { namespace parser {
|
||||
|
||||
/** A `std::views`-compatible view that provides the tokens from the given
|
||||
contiguous range, using the given lexer and optional token cache. You
|
||||
should typically not need to use this type directly; use
|
||||
`boost::parser::to_tokens` instead. */
|
||||
template<
|
||||
std::ranges::contiguous_range V,
|
||||
typename Lexer,
|
||||
typename TokenCache = std::vector<typename Lexer::token_type>>
|
||||
requires std::ranges::view<V>
|
||||
struct tokens_view;
|
||||
|
||||
namespace detail {
|
||||
template<typename T>
|
||||
constexpr bool is_tokens_view_v = false;
|
||||
|
||||
template<typename V, typename Lexer, typename TokenCache>
|
||||
constexpr bool is_tokens_view_v<tokens_view<V, Lexer, TokenCache>> =
|
||||
true;
|
||||
}
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -11,6 +11,7 @@
|
||||
|
||||
#include <any>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
@@ -67,6 +68,32 @@ namespace boost { namespace parser {
|
||||
return BOOST_PARSER_SUBRANGE(ptr, detail::text::null_sentinel);
|
||||
}
|
||||
|
||||
/** The token ID used for whitespace tokens. */
|
||||
inline constexpr int ws_id = -1000000;
|
||||
|
||||
/** The token ID used for single-character tokens. */
|
||||
inline constexpr int character_id = -2000000;
|
||||
|
||||
#ifdef BOOST_PARSER_DOXYGEN
|
||||
|
||||
/** A type trait that evaluates to `true` iff `T` is a specialization of
|
||||
`boost::parser::token`. */
|
||||
template<typename T>
|
||||
constexpr bool is_token_v = detail::foo;
|
||||
|
||||
#else
|
||||
|
||||
template<typename CharType>
|
||||
struct token;
|
||||
|
||||
template<typename T>
|
||||
constexpr bool is_token_v = false;
|
||||
|
||||
template<typename CharType>
|
||||
constexpr bool is_token_v<token<CharType>> = true;
|
||||
|
||||
#endif
|
||||
|
||||
namespace detail {
|
||||
template<typename T>
|
||||
constexpr bool is_optional_v = enable_optional<T>;
|
||||
@@ -84,12 +111,24 @@ namespace boost { namespace parser {
|
||||
{
|
||||
std::any trie_;
|
||||
bool has_case_folded_;
|
||||
bool pending_operations_;
|
||||
};
|
||||
|
||||
using symbol_table_tries_t =
|
||||
std::map<void *, symbol_table_trie_element, std::less<void *>>;
|
||||
|
||||
using pending_symtab_ops_visitor = std::function<void()>;
|
||||
struct pending_symtab_ops_entry
|
||||
{
|
||||
pending_symtab_ops_visitor visit_;
|
||||
// Contains std::vector<detail::symbol_table_operation<T>> (T is
|
||||
// known to visit_).
|
||||
std::any ops_;
|
||||
};
|
||||
using pending_symbol_table_operations_t = std::map<
|
||||
void const *,
|
||||
pending_symtab_ops_entry,
|
||||
std::less<void const *>>;
|
||||
|
||||
template<
|
||||
bool DoTrace,
|
||||
bool UseCallbacks,
|
||||
@@ -103,7 +142,9 @@ namespace boost { namespace parser {
|
||||
int & indent,
|
||||
ErrorHandler const & error_handler,
|
||||
nope &,
|
||||
symbol_table_tries_t & symbol_table_tries) noexcept;
|
||||
symbol_table_tries_t & symbol_table_tries,
|
||||
pending_symbol_table_operations_t &
|
||||
pending_symbol_table_operations) noexcept;
|
||||
|
||||
struct skip_skipper;
|
||||
|
||||
@@ -132,6 +173,18 @@ namespace boost { namespace parser {
|
||||
{};
|
||||
struct upper_case_chars
|
||||
{};
|
||||
|
||||
struct any_token_value
|
||||
{
|
||||
template<typename T>
|
||||
bool matches_value(T) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename I, typename Context>
|
||||
struct scoped_lexeme;
|
||||
}
|
||||
|
||||
/** Repeats the application of another parser `p` of type `Parser`,
|
||||
@@ -413,6 +466,20 @@ namespace boost { namespace parser {
|
||||
template<typename T>
|
||||
struct float_parser;
|
||||
|
||||
/** A tag type used to represent a value type that is any specialization
|
||||
of `std::basic_string_view`. Which specialization is used depends on
|
||||
the input. */
|
||||
struct string_view_tag
|
||||
{};
|
||||
|
||||
/** Matches a token from the input with ID `TokenSpec::id`. Fails on any
|
||||
other input. The parse will also fail if `Expected` is anything but
|
||||
`detail::nope` (which it is by default), and `expected_.matches(attr)`
|
||||
is not `true` for the produced attribute `attr`. Used in token
|
||||
parsing only. */
|
||||
template<typename TokenSpec, typename Expected>
|
||||
struct token_parser;
|
||||
|
||||
/** Applies at most one of the parsers in `OrParser`. If `switch_value_`
|
||||
matches one or more of the values in the parsers in `OrParser`, the
|
||||
first such parser is applied, and the success or failure and attribute
|
||||
|
||||
305
include/boost/parser/token_parser.hpp
Normal file
305
include/boost/parser/token_parser.hpp
Normal file
@@ -0,0 +1,305 @@
|
||||
// Copyright (C) 2024 T. Zachary Laine
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See
|
||||
// accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef BOOST_PARSER_TOKEN_PARSER_HPP
|
||||
#define BOOST_PARSER_TOKEN_PARSER_HPP
|
||||
|
||||
#if !defined(BOOST_PARSER_PARSER_HPP) || !defined(BOOST_PARSER_LEXER_HPP)
|
||||
#error "token_parser.hpp must be included after lexer.hpp and parser.hpp."
|
||||
#endif
|
||||
|
||||
#include <boost/parser/parser_fwd.hpp>
|
||||
#include <boost/parser/concepts.hpp>
|
||||
#include <boost/parser/error_handling.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
namespace boost { namespace parser {
|
||||
|
||||
namespace detail {
|
||||
template<typename AttributeType, typename CharType>
|
||||
std::optional<AttributeType> token_as(token<CharType> tok)
|
||||
{
|
||||
if constexpr (std::is_floating_point_v<AttributeType>) {
|
||||
if (tok.has_long_double())
|
||||
return tok.get_long_double();
|
||||
return std::nullopt;
|
||||
} else if constexpr (std::is_integral_v<AttributeType>) {
|
||||
if (tok.has_long_long())
|
||||
return AttributeType(tok.get_long_long());
|
||||
return std::nullopt;
|
||||
} else {
|
||||
if (tok.has_string_view())
|
||||
return tok.get_string_view();
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Expected>
|
||||
struct token_with_value
|
||||
{
|
||||
explicit constexpr token_with_value(Expected value) :
|
||||
expected_(value)
|
||||
{}
|
||||
|
||||
template<typename T, typename Context>
|
||||
bool matches(T value, Context const & context) const
|
||||
{
|
||||
return value == detail::resolve(context, expected_);
|
||||
}
|
||||
|
||||
Expected expected_;
|
||||
};
|
||||
|
||||
template<typename Subrange>
|
||||
struct token_with_string_view
|
||||
{
|
||||
explicit constexpr token_with_string_view(Subrange subrange) :
|
||||
subrange_(subrange)
|
||||
{}
|
||||
|
||||
template<typename CharType, typename Context>
|
||||
bool matches(
|
||||
std::basic_string_view<CharType> value, Context const &) const
|
||||
{
|
||||
auto const value_cps =
|
||||
make_subrange<CharType>(value.begin(), value.end());
|
||||
auto const subrange_cps =
|
||||
make_subrange<CharType>(subrange_.begin(), subrange_.end());
|
||||
return std::ranges::equal(
|
||||
value_cps, subrange_cps, [](auto a, auto b) {
|
||||
return cast_char(a) == cast_char(b);
|
||||
});
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static auto cast_char(T c)
|
||||
{
|
||||
if constexpr (std::same_as<T, char>) {
|
||||
return (unsigned char)c;
|
||||
} else {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename CharType, typename I, typename S>
|
||||
static auto make_subrange(I f, S l)
|
||||
{
|
||||
auto subrange = BOOST_PARSER_SUBRANGE(f, l);
|
||||
if constexpr (std::is_same_v<CharType, char>) {
|
||||
return subrange;
|
||||
} else {
|
||||
return subrange | detail::text::as_utf32;
|
||||
}
|
||||
}
|
||||
|
||||
Subrange subrange_;
|
||||
};
|
||||
}
|
||||
|
||||
#ifndef BOOST_PARSER_DOXYGEN
|
||||
|
||||
template<typename TokenSpec, typename Expected>
|
||||
struct token_parser
|
||||
{
|
||||
using token_spec = TokenSpec;
|
||||
|
||||
template<typename Iter>
|
||||
using attribute_type = std::conditional_t<
|
||||
std::same_as<typename token_spec::value_type, string_view_tag>,
|
||||
std::basic_string_view<
|
||||
typename detail::iter_value_t<Iter>::char_type>,
|
||||
typename token_spec::value_type>;
|
||||
|
||||
constexpr token_parser() = default;
|
||||
constexpr token_parser(Expected expected) : expected_(expected) {}
|
||||
|
||||
template<
|
||||
typename Iter,
|
||||
typename Sentinel,
|
||||
typename Context,
|
||||
typename SkipParser>
|
||||
auto call(
|
||||
Iter & first,
|
||||
Sentinel last,
|
||||
Context const & context,
|
||||
SkipParser const & skip,
|
||||
detail::flags flags,
|
||||
bool & success) const -> attribute_type<Iter>
|
||||
{
|
||||
attribute_type<Iter> retval;
|
||||
call(first, last, context, skip, flags, success, retval);
|
||||
return retval;
|
||||
}
|
||||
|
||||
template<
|
||||
typename Iter,
|
||||
typename Sentinel,
|
||||
typename Context,
|
||||
typename SkipParser,
|
||||
typename Attribute>
|
||||
void call(
|
||||
Iter & first,
|
||||
Sentinel last,
|
||||
Context const & context,
|
||||
SkipParser const & skip,
|
||||
detail::flags flags,
|
||||
bool & success,
|
||||
Attribute & retval) const
|
||||
{
|
||||
using value_type = std::remove_cvref_t<decltype(*first)>;
|
||||
static_assert(
|
||||
is_token_v<value_type>,
|
||||
"token_parser can only be used when parsing sequences of "
|
||||
"tokens.");
|
||||
|
||||
[[maybe_unused]] auto _ = detail::scoped_trace(
|
||||
*this, first, last, context, flags, retval);
|
||||
|
||||
if (first == last) {
|
||||
success = false;
|
||||
return;
|
||||
}
|
||||
|
||||
value_type const x = *first;
|
||||
if (x.id() != (int)token_spec::id) {
|
||||
success = false;
|
||||
return;
|
||||
}
|
||||
|
||||
constexpr bool use_expected = !std::same_as<Expected, detail::nope>;
|
||||
if (use_expected || detail::gen_attrs(flags)) {
|
||||
auto opt_attr = detail::token_as<attribute_type<Iter>>(x);
|
||||
if constexpr (use_expected) {
|
||||
if (!opt_attr || !expected_.matches(*opt_attr, context)) {
|
||||
success = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (detail::gen_attrs(flags))
|
||||
detail::assign(retval, *opt_attr);
|
||||
}
|
||||
|
||||
++first;
|
||||
}
|
||||
|
||||
/** Returns a `parser_interface` containing a `token_parser` that
|
||||
matches `value`. */
|
||||
template<typename T>
|
||||
requires(!parsable_range_like<T>)
|
||||
constexpr auto operator()(T value) const noexcept
|
||||
{
|
||||
BOOST_PARSER_ASSERT(
|
||||
(detail::is_nope_v<Expected> &&
|
||||
"If you're seeing this, you tried to chain calls on one of "
|
||||
"your token_spec's, like 'my_token_spec(id1)(id2)'. Quit "
|
||||
"it!'"));
|
||||
return parser_interface(
|
||||
token_parser<TokenSpec, detail::token_with_value<T>>(
|
||||
detail::token_with_value(std::move(value))));
|
||||
}
|
||||
|
||||
/** Returns a `parser_interface` containing a `token_parser` that
|
||||
matches the range `r`. If the token being matched during the
|
||||
parse has a `char_type` of `char8_t`, `char16_t`, or `char32_t`,
|
||||
the elements of `r` are transcoded from their presumed encoding to
|
||||
UTF-32 during the comparison. Otherwise, the character being
|
||||
matched is directly compared to the elements of `r`. */
|
||||
template<parsable_range_like R>
|
||||
constexpr auto operator()(R && r) const noexcept
|
||||
{
|
||||
BOOST_PARSER_ASSERT(
|
||||
((!std::is_rvalue_reference_v<R &&> ||
|
||||
!detail::is_range<detail::remove_cv_ref_t<R>>) &&
|
||||
"It looks like you tried to pass an rvalue range to "
|
||||
"token_spec(). Don't do that, or you'll end up with dangling "
|
||||
"references."));
|
||||
BOOST_PARSER_ASSERT(
|
||||
(detail::is_nope_v<Expected> &&
|
||||
"If you're seeing this, you tried to chain calls on "
|
||||
"token_spec, like 'token_spec(char-set)(char-set)'. Quit "
|
||||
"it!'"));
|
||||
auto expected =
|
||||
detail::token_with_string_view{make_expected_range((R &&)r)};
|
||||
return parser_interface(
|
||||
token_parser<token_spec, decltype(expected)>(expected));
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
static constexpr auto make_expected_range(R && r)
|
||||
{
|
||||
using T = detail::remove_cv_ref_t<R>;
|
||||
if constexpr (std::is_bounded_array_v<T>) {
|
||||
constexpr auto n = std::extent_v<T>;
|
||||
auto const offset = n && !r[n - 1] ? 1 : 0;
|
||||
return BOOST_PARSER_SUBRANGE(
|
||||
std::ranges::begin(r), std::ranges::end(r) - offset);
|
||||
} else {
|
||||
return BOOST_PARSER_SUBRANGE(
|
||||
std::ranges::begin(r), std::ranges::end(r));
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Consider adding a special string_view-like type that can be
|
||||
// passed to the range overload above. It would be based on
|
||||
// adobe::name_t. When comparing it to a tokens' string_view, if it
|
||||
// matches, it would replace the token's string_view, so that
|
||||
// subsequent comparisons are O(1) in the length of the string.
|
||||
|
||||
Expected expected_;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/** A variable template that defines a token parser associated with
|
||||
`boost::parser::token_spec_t<Regex, ID, ValueType, Base>`. This token
|
||||
parser can be used to specify a lexer, and may also be used in
|
||||
parsers. */
|
||||
template<
|
||||
ctll::fixed_string Regex,
|
||||
auto ID,
|
||||
typename ValueType = string_view_tag,
|
||||
int Base = 10>
|
||||
constexpr parser_interface token_spec{
|
||||
token_parser<token_spec_t<Regex, ID, ValueType, Base>, detail::nope>()};
|
||||
|
||||
#ifndef BOOST_PARSER_DOXYGEN
|
||||
|
||||
template<
|
||||
typename CharType,
|
||||
typename ID,
|
||||
ctll::fixed_string WsStr,
|
||||
ctll::fixed_string RegexStr,
|
||||
detail::nttp_array IDs,
|
||||
detail::nttp_array Specs>
|
||||
template<
|
||||
ctll::fixed_string RegexStr2,
|
||||
auto ID2,
|
||||
typename ValueType,
|
||||
int Base>
|
||||
constexpr auto
|
||||
lexer_t<CharType, ID, WsStr, RegexStr, IDs, Specs>::operator|(
|
||||
parser_interface<token_parser<
|
||||
token_spec_t<RegexStr2, ID2, ValueType, Base>,
|
||||
detail::nope>> const &) const
|
||||
{
|
||||
static_assert(
|
||||
std::same_as<ID, decltype(ID2)>,
|
||||
"All id_types must be the same for all token_specs.");
|
||||
constexpr auto new_regex =
|
||||
detail::wrap_escape_concat<regex_str, RegexStr2>();
|
||||
constexpr auto new_ids = IDs.template append<(int)ID2>();
|
||||
constexpr auto new_specs = Specs.template append<detail::parse_spec_for<
|
||||
token_spec_t<RegexStr2, ID2, ValueType, Base>>()>();
|
||||
return lexer_t<CharType, ID, WsStr, new_regex, new_ids, new_specs>{};
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
||||
@@ -22,7 +22,7 @@ namespace boost::parser {
|
||||
std::declval<
|
||||
parse_context<false, false, I, S, default_error_handler>>(),
|
||||
ws,
|
||||
detail::default_flags(),
|
||||
flags(uint32_t(flags::gen_attrs) | uint32_t(flags::use_skip)),
|
||||
std::declval<bool &>()));
|
||||
template<typename R, typename Parser>
|
||||
using range_attr_t = attr_type<iterator_t<R>, sentinel_t<R>, Parser>;
|
||||
|
||||
10
index.html
Normal file
10
index.html
Normal file
@@ -0,0 +1,10 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Parser</title>
|
||||
<meta http-equiv="refresh" content="0; URL=../../doc/html/parser.html">
|
||||
</head>
|
||||
<body>
|
||||
Automatic redirection failed, please go to
|
||||
<a href="../../doc/html/parser.html">../../doc/html/parser.html</a>
|
||||
</body>
|
||||
</html>
|
||||
@@ -6,6 +6,23 @@ enable_testing()
|
||||
|
||||
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -j4 -C ${CMAKE_CFG_INTDIR})
|
||||
|
||||
if (CXX_STD GREATER_EQUAL 20)
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
ctre
|
||||
URL https://raw.githubusercontent.com/hanickadot/compile-time-regular-expressions/refs/heads/main/single-header/ctre-unicode.hpp
|
||||
DOWNLOAD_NO_EXTRACT true
|
||||
)
|
||||
|
||||
FetchContent_MakeAvailable(ctre)
|
||||
|
||||
set(ctre_include_dir ${CMAKE_BINARY_DIR}/_deps/ctre-src)
|
||||
add_library(ctre_single_header INTERFACE)
|
||||
target_include_directories(ctre_single_header INTERFACE ${ctre_include_dir})
|
||||
else()
|
||||
add_library(ctre_single_header INTERFACE)
|
||||
endif()
|
||||
|
||||
##################################################
|
||||
# Parser tests
|
||||
##################################################
|
||||
@@ -31,6 +48,7 @@ add_test(NAME parser_api COMMAND parser_api)
|
||||
|
||||
add_executable(
|
||||
compile_tests
|
||||
compile_include_lexer_parser.cpp
|
||||
compile_tests_main.cpp
|
||||
compile_attribute.cpp
|
||||
compile_seq_attribute.cpp
|
||||
@@ -39,12 +57,12 @@ add_executable(
|
||||
compile_all_t.cpp
|
||||
)
|
||||
set_property(TARGET compile_tests PROPERTY CXX_STANDARD ${CXX_STD})
|
||||
target_link_libraries(compile_tests parser boost)
|
||||
target_link_libraries(compile_tests parser boost ctre_single_header)
|
||||
|
||||
macro(add_test_executable name)
|
||||
add_executable(${name} ${name}.cpp)
|
||||
set_property(TARGET ${name} PROPERTY CXX_STANDARD ${CXX_STD})
|
||||
target_link_libraries(${name} parser boost ${link_flags})
|
||||
target_link_libraries(${name} parser boost ctre_single_header ${link_flags})
|
||||
if (MSVC)
|
||||
target_compile_options(${name} PRIVATE /source-charset:utf-8 /bigobj)
|
||||
elseif (USE_ASAN OR USE_UBSAN)
|
||||
@@ -82,6 +100,14 @@ add_test_executable(parser_seq_permutations_1)
|
||||
add_test_executable(parser_seq_permutations_2)
|
||||
add_test_executable(parser_or_permutations_1)
|
||||
add_test_executable(parser_or_permutations_2)
|
||||
if (CXX_STD GREATER_EQUAL 20)
|
||||
add_test_executable(lexer)
|
||||
add_test_executable(lexer_adobe_files)
|
||||
add_test_executable(lexer_and_parser)
|
||||
add_test_executable(lexer_and_parser_api)
|
||||
add_test_executable(lexer_and_parser_terminals)
|
||||
add_test_executable(lexer_and_parser_symbol_table)
|
||||
endif()
|
||||
|
||||
if (MSVC)
|
||||
add_executable(vs_output_tracing tracing.cpp)
|
||||
|
||||
76
test/adobe_lexer.hpp
Normal file
76
test/adobe_lexer.hpp
Normal file
@@ -0,0 +1,76 @@
|
||||
/**
|
||||
* Copyright (C) 2024 T. Zachary Laine
|
||||
*
|
||||
* Distributed under the Boost Software License, Version 1.0. (See
|
||||
* accompanying file LICENSE_1_0.txt or copy at
|
||||
* http://www.boost.org/LICENSE_1_0.txt)
|
||||
*/
|
||||
#ifndef BOOST_PARSER_TEST_ADOBE_LEXER
|
||||
#define BOOST_PARSER_TEST_ADOBE_LEXER
|
||||
|
||||
#include <boost/parser/parser.hpp>
|
||||
#include <boost/parser/lexer.hpp>
|
||||
|
||||
|
||||
namespace bp = boost::parser;
|
||||
|
||||
enum class adobe_tokens {
|
||||
keyword_true_false,
|
||||
keyword_empty,
|
||||
identifier,
|
||||
lead_comment,
|
||||
trail_comment,
|
||||
quoted_string,
|
||||
number,
|
||||
eq_op,
|
||||
rel_op,
|
||||
mul_op,
|
||||
define,
|
||||
or_,
|
||||
and_
|
||||
};
|
||||
|
||||
constexpr auto true_false =
|
||||
bp::token_spec<"true|false", adobe_tokens::keyword_true_false, bool>;
|
||||
constexpr auto empty = bp::token_spec<"empty", adobe_tokens::keyword_empty>;
|
||||
constexpr auto identifier =
|
||||
bp::token_spec<"[a-zA-Z]\\w*", adobe_tokens::identifier>;
|
||||
constexpr auto lead_comment = bp::token_spec<
|
||||
"\\/\\*[^*]*\\*+(?:[^/*][^*]*\\*+)*\\/",
|
||||
adobe_tokens::lead_comment>;
|
||||
constexpr auto trail_comment =
|
||||
bp::token_spec<"\\/\\/.*$", adobe_tokens::trail_comment>;
|
||||
constexpr auto quoted_string =
|
||||
bp::token_spec<"\\\"[^\\\"]*\\\"|'[^']*'", adobe_tokens::quoted_string>;
|
||||
constexpr auto number =
|
||||
bp::token_spec<"\\d+(?:\\.\\d*)?", adobe_tokens::number, double>;
|
||||
constexpr auto eq_op = bp::token_spec<"==|!=", adobe_tokens::eq_op>;
|
||||
constexpr auto define = bp::token_spec<"<==", adobe_tokens::define>;
|
||||
constexpr auto rel_op = bp::token_spec<"<|>|<=|>=", adobe_tokens::rel_op>;
|
||||
constexpr auto mul_op = bp::token_spec<"\\*|\\/|%", adobe_tokens::mul_op>;
|
||||
constexpr auto or_ = bp::token_spec<"\\|\\|", adobe_tokens::or_>;
|
||||
constexpr auto and_ = bp::token_spec<"&&", adobe_tokens::and_>;
|
||||
|
||||
constexpr auto adobe_lexer = bp::lexer<char, adobe_tokens> | true_false |
|
||||
empty | identifier | lead_comment | trail_comment |
|
||||
quoted_string | number | eq_op | define | rel_op |
|
||||
mul_op | or_ | and_ |
|
||||
bp::token_chars<
|
||||
'=',
|
||||
'+',
|
||||
'-',
|
||||
'!',
|
||||
'?',
|
||||
':',
|
||||
'.',
|
||||
',',
|
||||
'(',
|
||||
')',
|
||||
'[',
|
||||
']',
|
||||
'{',
|
||||
'}',
|
||||
'@',
|
||||
';'>;
|
||||
|
||||
#endif
|
||||
@@ -65,7 +65,7 @@ void compile_attribute_non_unicode()
|
||||
using attr_t = decltype(parse(null_term(r), parser));
|
||||
static_assert(std::is_same_v<attr_t, std::optional<char>>);
|
||||
static_assert(std::is_same_v<
|
||||
attribute_t<decltype(r), decltype(parser)>,
|
||||
attribute_t<decltype(null_term(r)), decltype(parser)>,
|
||||
char>);
|
||||
}
|
||||
{
|
||||
@@ -73,7 +73,7 @@ void compile_attribute_non_unicode()
|
||||
using attr_t = decltype(parse(null_term(r), parser));
|
||||
static_assert(std::is_same_v<attr_t, std::optional<std::string>>);
|
||||
static_assert(std::is_same_v<
|
||||
attribute_t<decltype(r), decltype(parser)>,
|
||||
attribute_t<decltype(null_term(r)), decltype(parser)>,
|
||||
std::string>);
|
||||
}
|
||||
{
|
||||
@@ -81,7 +81,7 @@ void compile_attribute_non_unicode()
|
||||
using attr_t = decltype(parse(null_term(r), parser));
|
||||
static_assert(std::is_same_v<attr_t, std::optional<std::string>>);
|
||||
static_assert(std::is_same_v<
|
||||
attribute_t<decltype(r), decltype(parser)>,
|
||||
attribute_t<decltype(null_term(r)), decltype(parser)>,
|
||||
std::string>);
|
||||
}
|
||||
{
|
||||
@@ -89,7 +89,7 @@ void compile_attribute_non_unicode()
|
||||
using attr_t = decltype(parse(null_term(r), parser));
|
||||
static_assert(std::is_same_v<attr_t, std::optional<std::string>>);
|
||||
static_assert(std::is_same_v<
|
||||
attribute_t<decltype(r), decltype(parser)>,
|
||||
attribute_t<decltype(null_term(r)), decltype(parser)>,
|
||||
std::string>);
|
||||
}
|
||||
{
|
||||
@@ -97,7 +97,7 @@ void compile_attribute_non_unicode()
|
||||
using attr_t = decltype(parse(null_term(r), parser));
|
||||
static_assert(std::is_same_v<attr_t, std::optional<std::string>>);
|
||||
static_assert(std::is_same_v<
|
||||
attribute_t<decltype(r), decltype(parser)>,
|
||||
attribute_t<decltype(null_term(r)), decltype(parser)>,
|
||||
std::string>);
|
||||
}
|
||||
}
|
||||
|
||||
10
test/compile_include_lexer_parser.cpp
Normal file
10
test/compile_include_lexer_parser.cpp
Normal file
@@ -0,0 +1,10 @@
|
||||
// Copyright (C) 2024 T. Zachary Laine
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See
|
||||
// accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
#include <boost/parser/config.hpp>
|
||||
#if BOOST_PARSER_USE_CONCEPTS
|
||||
#include <boost/parser/lexer.hpp>
|
||||
#endif
|
||||
#include <boost/parser/parser.hpp>
|
||||
569
test/lexer.cpp
Normal file
569
test/lexer.cpp
Normal file
@@ -0,0 +1,569 @@
|
||||
/**
|
||||
* Copyright (C) 2024 T. Zachary Laine
|
||||
*
|
||||
* Distributed under the Boost Software License, Version 1.0. (See
|
||||
* accompanying file LICENSE_1_0.txt or copy at
|
||||
* http://www.boost.org/LICENSE_1_0.txt)
|
||||
*/
|
||||
#define BOOST_PARSER_TESTING
|
||||
#include <boost/parser/lexer.hpp>
|
||||
#include <boost/parser/parser.hpp>
|
||||
|
||||
#include <boost/parser/transcode_view.hpp>
|
||||
|
||||
#include "ill_formed.hpp"
|
||||
|
||||
#include <boost/core/lightweight_test.hpp>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include <deque>
|
||||
|
||||
|
||||
namespace bp = boost::parser;
|
||||
|
||||
enum class my_tokens { ws, foo, bar, baz };
|
||||
|
||||
int main()
|
||||
{
|
||||
// formation of token_specs
|
||||
{
|
||||
auto const token_spec = bp::token_spec<"foo", 12>;
|
||||
|
||||
bp::token_spec_t<"foo", 12, bp::string_view_tag, 10>
|
||||
token_spec_explicit;
|
||||
static_assert(std::same_as<
|
||||
decltype(token_spec.parser_)::token_spec,
|
||||
decltype(token_spec_explicit)>);
|
||||
}
|
||||
{
|
||||
auto const token_spec = bp::token_spec<"foo", my_tokens::foo>;
|
||||
|
||||
bp::token_spec_t<"foo", my_tokens::foo, bp::string_view_tag, 10>
|
||||
token_spec_explicit;
|
||||
static_assert(std::same_as<
|
||||
decltype(token_spec.parser_)::token_spec,
|
||||
decltype(token_spec_explicit)>);
|
||||
}
|
||||
{
|
||||
auto const token_spec = bp::token_spec<"bar", my_tokens::bar>;
|
||||
|
||||
bp::token_spec_t<"bar", my_tokens::bar, bp::string_view_tag, 10>
|
||||
token_spec_explicit;
|
||||
static_assert(std::same_as<
|
||||
decltype(token_spec.parser_)::token_spec,
|
||||
decltype(token_spec_explicit)>);
|
||||
}
|
||||
{
|
||||
auto const token_spec = bp::token_spec<"foo", 12, int, 2>;
|
||||
|
||||
bp::token_spec_t<"foo", 12, int, 2> token_spec_explicit;
|
||||
static_assert(std::same_as<
|
||||
decltype(token_spec.parser_)::token_spec,
|
||||
decltype(token_spec_explicit)>);
|
||||
}
|
||||
{
|
||||
auto const token_spec = bp::token_spec<"foo", 12>;
|
||||
|
||||
bp::token_spec_t<"foo", 12, bp::string_view_tag, 10>
|
||||
token_spec_explicit;
|
||||
static_assert(std::same_as<
|
||||
decltype(token_spec.parser_)::token_spec,
|
||||
decltype(token_spec_explicit)>);
|
||||
}
|
||||
{
|
||||
auto const token_spec = bp::token_spec<"foo", 12, unsigned int, 8>;
|
||||
|
||||
bp::token_spec_t<"foo", 12, unsigned int, 8> token_spec_explicit;
|
||||
static_assert(std::same_as<
|
||||
decltype(token_spec.parser_)::token_spec,
|
||||
decltype(token_spec_explicit)>);
|
||||
}
|
||||
{
|
||||
auto const token_spec = bp::token_spec<"foo", 12, short>;
|
||||
|
||||
bp::token_spec_t<"foo", 12, short, 10> token_spec_explicit;
|
||||
static_assert(std::same_as<
|
||||
decltype(token_spec.parser_)::token_spec,
|
||||
decltype(token_spec_explicit)>);
|
||||
}
|
||||
{
|
||||
auto const token_spec = bp::token_spec<"foo", 12, float>;
|
||||
|
||||
bp::token_spec_t<"foo", 12, float, 10> token_spec_explicit;
|
||||
static_assert(std::same_as<
|
||||
decltype(token_spec.parser_)::token_spec,
|
||||
decltype(token_spec_explicit)>);
|
||||
}
|
||||
{
|
||||
auto const token_spec = bp::token_spec<"foo", 12, double>;
|
||||
|
||||
bp::token_spec_t<"foo", 12, double, 10> token_spec_explicit;
|
||||
static_assert(std::same_as<
|
||||
decltype(token_spec.parser_)::token_spec,
|
||||
decltype(token_spec_explicit)>);
|
||||
}
|
||||
|
||||
// making lexers
|
||||
{
|
||||
auto const lexer = bp::lexer<char, my_tokens> |
|
||||
bp::token_spec<"foo", my_tokens::foo> |
|
||||
bp::token_spec<"bar", my_tokens::bar> |
|
||||
bp::token_spec<"baz", my_tokens::baz>;
|
||||
|
||||
// +1 because of the 0-group
|
||||
static_assert(decltype(lexer)::size() == 3 + 1);
|
||||
static_assert(std::same_as<decltype(lexer)::id_type, my_tokens>);
|
||||
}
|
||||
{
|
||||
auto const lexer = bp::lexer<char, my_tokens> | bp::token_chars<'='>;
|
||||
|
||||
static_assert(decltype(lexer)::size() == 1 + 1);
|
||||
static_assert(std::same_as<decltype(lexer)::id_type, my_tokens>);
|
||||
}
|
||||
{
|
||||
auto const lexer = bp::lexer<char, my_tokens> | bp::token_chars<'='> |
|
||||
bp::token_spec<"foo", my_tokens::foo> |
|
||||
bp::token_spec<"bar", my_tokens::bar> |
|
||||
bp::token_spec<"baz", my_tokens::baz>;
|
||||
|
||||
static_assert(decltype(lexer)::size() == 4 + 1);
|
||||
static_assert(std::same_as<decltype(lexer)::id_type, my_tokens>);
|
||||
}
|
||||
{
|
||||
auto const lexer =
|
||||
bp::lexer<char, my_tokens> | bp::token_spec<"foo", my_tokens::foo> |
|
||||
bp::token_spec<"bar", my_tokens::bar> |
|
||||
bp::token_spec<"baz", my_tokens::baz> | bp::token_chars<'='>;
|
||||
|
||||
static_assert(decltype(lexer)::size() == 4 + 1);
|
||||
static_assert(std::same_as<decltype(lexer)::id_type, my_tokens>);
|
||||
}
|
||||
{
|
||||
auto const lexer = bp::lexer<char, my_tokens> | bp::token_chars<
|
||||
'=',
|
||||
'+',
|
||||
'-',
|
||||
'!',
|
||||
'?',
|
||||
':',
|
||||
'.',
|
||||
',',
|
||||
'(',
|
||||
')',
|
||||
'[',
|
||||
']',
|
||||
'{',
|
||||
'}',
|
||||
'@',
|
||||
';'>;
|
||||
|
||||
static_assert(decltype(lexer)::size() == 16 + 1);
|
||||
static_assert(std::same_as<decltype(lexer)::id_type, my_tokens>);
|
||||
}
|
||||
#if 0 // This is a test of whether the escapes work for every possible char
|
||||
// value accepted by detail::token_chars_spec. This takes a long time and
|
||||
// really only needs to happen once.
|
||||
{
|
||||
auto const lexer = bp::lexer<char, my_tokens> | bp::token_chars<
|
||||
char(0),
|
||||
char(1),
|
||||
char(2),
|
||||
char(3),
|
||||
char(4),
|
||||
char(5),
|
||||
char(6),
|
||||
char(7),
|
||||
char(8),
|
||||
char(9),
|
||||
char(10),
|
||||
char(11),
|
||||
char(12),
|
||||
char(13),
|
||||
char(14),
|
||||
char(15),
|
||||
char(16),
|
||||
char(17),
|
||||
char(18),
|
||||
char(19),
|
||||
char(20),
|
||||
char(21),
|
||||
char(22),
|
||||
char(23),
|
||||
char(24),
|
||||
char(25),
|
||||
char(26),
|
||||
char(27),
|
||||
char(28),
|
||||
char(29),
|
||||
char(30),
|
||||
char(31),
|
||||
char(32),
|
||||
char(33),
|
||||
char(34),
|
||||
char(35),
|
||||
char(36),
|
||||
char(37),
|
||||
char(38),
|
||||
char(39),
|
||||
char(40),
|
||||
char(41),
|
||||
char(42),
|
||||
char(43),
|
||||
char(44),
|
||||
char(45),
|
||||
char(46),
|
||||
char(47),
|
||||
char(48),
|
||||
char(49),
|
||||
char(50),
|
||||
char(51),
|
||||
char(52),
|
||||
char(53),
|
||||
char(54),
|
||||
char(55),
|
||||
char(56),
|
||||
char(57),
|
||||
char(58),
|
||||
char(59),
|
||||
char(60),
|
||||
char(61),
|
||||
char(62),
|
||||
char(63),
|
||||
char(64),
|
||||
char(65),
|
||||
char(66),
|
||||
char(67),
|
||||
char(68),
|
||||
char(69),
|
||||
char(70),
|
||||
char(71),
|
||||
char(72),
|
||||
char(73),
|
||||
char(74),
|
||||
char(75),
|
||||
char(76),
|
||||
char(77),
|
||||
char(78),
|
||||
char(79),
|
||||
char(80),
|
||||
char(81),
|
||||
char(82),
|
||||
char(83),
|
||||
char(84),
|
||||
char(85),
|
||||
char(86),
|
||||
char(87),
|
||||
char(88),
|
||||
char(89),
|
||||
char(90),
|
||||
char(91),
|
||||
char(92),
|
||||
char(93),
|
||||
char(94),
|
||||
char(95),
|
||||
char(96),
|
||||
char(97),
|
||||
char(98),
|
||||
char(99),
|
||||
|
||||
char(100),
|
||||
char(101),
|
||||
char(103),
|
||||
char(102),
|
||||
char(104),
|
||||
char(105),
|
||||
char(106),
|
||||
char(107),
|
||||
char(108),
|
||||
char(109),
|
||||
char(110),
|
||||
char(111),
|
||||
char(112),
|
||||
char(113),
|
||||
char(114),
|
||||
char(115),
|
||||
char(116),
|
||||
char(117),
|
||||
char(118),
|
||||
char(119),
|
||||
char(120),
|
||||
char(121),
|
||||
char(122),
|
||||
char(123),
|
||||
char(124),
|
||||
char(125),
|
||||
char(126),
|
||||
char(127)>;
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
// Mixed UTFs.
|
||||
auto const lexer =
|
||||
bp::lexer<char, my_tokens> | bp::token_spec<"foo", my_tokens::foo> |
|
||||
bp::token_spec<u"bar", my_tokens::bar> |
|
||||
bp::token_spec<U"baz", my_tokens::baz> | bp::token_chars<'='>;
|
||||
|
||||
// mutable vs. const token_views + mutable vs. const input views
|
||||
std::string input = "foo = bar";
|
||||
auto mr_mi = input | bp::to_tokens(lexer);
|
||||
auto const cr_mi = input | bp::to_tokens(lexer);
|
||||
|
||||
auto const const_input = input;
|
||||
auto mr_ci = input | bp::to_tokens(lexer);
|
||||
auto const cr_ci = input | bp::to_tokens(lexer);
|
||||
|
||||
using tok_t = bp::token<char>;
|
||||
tok_t const expected[] = {
|
||||
tok_t((int)my_tokens::foo, 0, "foo"),
|
||||
tok_t(bp::character_id, 0, (long long)'='),
|
||||
tok_t((int)my_tokens::bar, 0, "bar")};
|
||||
|
||||
int position = 0;
|
||||
|
||||
position = 0;
|
||||
for (auto tok : mr_mi) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
|
||||
position = 0;
|
||||
for (auto tok : cr_mi) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
|
||||
position = 0;
|
||||
for (auto tok : mr_ci) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
|
||||
position = 0;
|
||||
for (auto tok : cr_ci) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
}
|
||||
|
||||
// Check basic plumbing of connecting UTF inputs to CTRE.
|
||||
{
|
||||
auto const lexer =
|
||||
bp::lexer<char, my_tokens> | bp::token_spec<"foo", my_tokens::foo> |
|
||||
bp::token_spec<"bar", my_tokens::bar> |
|
||||
bp::token_spec<"baz", my_tokens::baz> | bp::token_chars<'='>;
|
||||
|
||||
std::string s = "foo = bar";
|
||||
using tok_t = bp::token<char>;
|
||||
tok_t const expected[] = {
|
||||
tok_t((int)my_tokens::foo, 0, "foo"),
|
||||
tok_t(bp::character_id, 0, (long long)'='),
|
||||
tok_t((int)my_tokens::bar, 0, "bar")};
|
||||
|
||||
auto const lexer8 = bp::lexer<char8_t, my_tokens> |
|
||||
bp::token_spec<"foo", my_tokens::foo> |
|
||||
bp::token_spec<"bar", my_tokens::bar> |
|
||||
bp::token_spec<"baz", my_tokens::baz> |
|
||||
bp::token_chars<'='>;
|
||||
|
||||
std::u8string u8s = u8"foo = bar";
|
||||
using tok8_t = bp::token<char8_t>;
|
||||
tok8_t const expected8[] = {
|
||||
tok8_t((int)my_tokens::foo, 0, u8"foo"),
|
||||
tok8_t(bp::character_id, 0, (long long)'='),
|
||||
tok8_t((int)my_tokens::bar, 0, u8"bar")};
|
||||
|
||||
auto const lexer16 = bp::lexer<char16_t, my_tokens> |
|
||||
bp::token_spec<"foo", my_tokens::foo> |
|
||||
bp::token_spec<"bar", my_tokens::bar> |
|
||||
bp::token_spec<"baz", my_tokens::baz> |
|
||||
bp::token_chars<'='>;
|
||||
|
||||
std::u16string u16s = u"foo = bar";
|
||||
using tok16_t = bp::token<char16_t>;
|
||||
tok16_t const expected16[] = {
|
||||
tok16_t((int)my_tokens::foo, 0, u"foo"),
|
||||
tok16_t(bp::character_id, 0, (long long)'='),
|
||||
tok16_t((int)my_tokens::bar, 0, u"bar")};
|
||||
|
||||
auto const lexer32 = bp::lexer<char32_t, my_tokens> |
|
||||
bp::token_spec<"foo", my_tokens::foo> |
|
||||
bp::token_spec<"bar", my_tokens::bar> |
|
||||
bp::token_spec<"baz", my_tokens::baz> |
|
||||
bp::token_chars<'='>;
|
||||
|
||||
std::u32string u32s = U"foo = bar";
|
||||
using tok32_t = bp::token<char32_t>;
|
||||
tok32_t const expected32[] = {
|
||||
tok32_t((int)my_tokens::foo, 0, U"foo"),
|
||||
tok32_t(bp::character_id, 0, (long long)'='),
|
||||
tok32_t((int)my_tokens::bar, 0, U"bar")};
|
||||
|
||||
|
||||
int position = 0;
|
||||
|
||||
position = 0;
|
||||
for (auto tok : s | bp::to_tokens(lexer)) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
static_assert(
|
||||
std::
|
||||
same_as<decltype(tok.get_string_view()), std::string_view>);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
|
||||
position = 0;
|
||||
for (auto tok : u8s | bp::to_tokens(lexer8)) {
|
||||
BOOST_TEST(tok == expected8[position]);
|
||||
static_assert(std::same_as<
|
||||
decltype(tok.get_string_view()),
|
||||
std::u8string_view>);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
|
||||
position = 0;
|
||||
for (auto tok : u16s | bp::to_tokens(lexer16)) {
|
||||
BOOST_TEST(tok == expected16[position]);
|
||||
static_assert(std::same_as<
|
||||
decltype(tok.get_string_view()),
|
||||
std::u16string_view>);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
|
||||
position = 0;
|
||||
for (auto tok : u32s | bp::to_tokens(lexer32)) {
|
||||
BOOST_TEST(tok == expected32[position]);
|
||||
static_assert(std::same_as<
|
||||
decltype(tok.get_string_view()),
|
||||
std::u32string_view>);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
}
|
||||
|
||||
// no-ws lexer
|
||||
{
|
||||
auto const lexer = bp::lexer<char, my_tokens, bp::no_ws> |
|
||||
bp::token_spec<"foo", my_tokens::foo> |
|
||||
bp::token_spec<"bar", my_tokens::bar> |
|
||||
bp::token_spec<"baz", my_tokens::baz> |
|
||||
bp::token_chars<'='>;
|
||||
|
||||
std::string s = "foo=bar";
|
||||
using tok_t = bp::token<char>;
|
||||
tok_t const expected[] = {
|
||||
tok_t((int)my_tokens::foo, 0, "foo"),
|
||||
tok_t(bp::character_id, 0, (long long)'='),
|
||||
tok_t((int)my_tokens::bar, 0, "bar")};
|
||||
|
||||
int position = 0;
|
||||
for (auto tok : s | bp::to_tokens(lexer)) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
}
|
||||
|
||||
// ws-as-token lexers
|
||||
{
|
||||
auto const lexer = bp::lexer<char, my_tokens, bp::no_ws> |
|
||||
bp::token_spec<"\\s+", my_tokens::ws> |
|
||||
bp::token_spec<"foo", my_tokens::foo> |
|
||||
bp::token_spec<"bar", my_tokens::bar> |
|
||||
bp::token_spec<"baz", my_tokens::baz> |
|
||||
bp::token_chars<'='>;
|
||||
|
||||
std::string s = "foo = bar";
|
||||
using tok_t = bp::token<char>;
|
||||
tok_t const expected[] = {
|
||||
tok_t((int)my_tokens::foo, 0, "foo"),
|
||||
tok_t((int)my_tokens::ws, 0, " "),
|
||||
tok_t(bp::character_id, 0, (long long)'='),
|
||||
tok_t((int)my_tokens::ws, 0, " "),
|
||||
tok_t((int)my_tokens::bar, 0, "bar")};
|
||||
|
||||
int position = 0;
|
||||
for (auto tok : s | bp::to_tokens(lexer)) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
}
|
||||
|
||||
// lexing errors
|
||||
{
|
||||
using namespace std::literals;
|
||||
|
||||
auto const lexer = bp::lexer<char, int> |
|
||||
bp::token_spec<"foo", 0, float> |
|
||||
bp::token_spec<"bar", 1, int> |
|
||||
bp::token_spec<"baz", 2, unsigned short> |
|
||||
bp::token_spec<"quux", 3, int, 8> |
|
||||
bp::token_spec<"next", 4, unsigned long long, 16>;
|
||||
|
||||
bool caught_exception = false;
|
||||
|
||||
caught_exception = false;
|
||||
try {
|
||||
for (auto tok : "foo" | bp::to_tokens(lexer)) {
|
||||
(void)tok;
|
||||
}
|
||||
} catch (std::exception const & e) {
|
||||
BOOST_TEST(e.what() == "32-bit floating-point number"sv);
|
||||
caught_exception = true;
|
||||
}
|
||||
BOOST_TEST(caught_exception);
|
||||
|
||||
caught_exception = false;
|
||||
try {
|
||||
for (auto tok : "bar" | bp::to_tokens(lexer)) {
|
||||
(void)tok;
|
||||
}
|
||||
} catch (std::exception const & e) {
|
||||
BOOST_TEST(e.what() == "32-bit signed integer"sv);
|
||||
caught_exception = true;
|
||||
}
|
||||
BOOST_TEST(caught_exception);
|
||||
|
||||
caught_exception = false;
|
||||
try {
|
||||
for (auto tok : "baz" | bp::to_tokens(lexer)) {
|
||||
(void)tok;
|
||||
}
|
||||
} catch (std::exception const & e) {
|
||||
BOOST_TEST(e.what() == "16-bit unsigned integer"sv);
|
||||
caught_exception = true;
|
||||
}
|
||||
BOOST_TEST(caught_exception);
|
||||
|
||||
caught_exception = false;
|
||||
try {
|
||||
for (auto tok : "quux" | bp::to_tokens(lexer)) {
|
||||
(void)tok;
|
||||
}
|
||||
} catch (std::exception const & e) {
|
||||
BOOST_TEST(e.what() == "32-bit, base-8 signed integer"sv);
|
||||
caught_exception = true;
|
||||
}
|
||||
BOOST_TEST(caught_exception);
|
||||
|
||||
caught_exception = false;
|
||||
try {
|
||||
for (auto tok : "next" | bp::to_tokens(lexer)) {
|
||||
(void)tok;
|
||||
}
|
||||
} catch (std::exception const & e) {
|
||||
BOOST_TEST(e.what() == "64-bit, base-16 unsigned integer"sv);
|
||||
caught_exception = true;
|
||||
}
|
||||
BOOST_TEST(caught_exception);
|
||||
}
|
||||
|
||||
return boost::report_errors();
|
||||
}
|
||||
828
test/lexer_adobe_files.cpp
Normal file
828
test/lexer_adobe_files.cpp
Normal file
@@ -0,0 +1,828 @@
|
||||
/**
|
||||
* Copyright (C) 2024 T. Zachary Laine
|
||||
*
|
||||
* Distributed under the Boost Software License, Version 1.0. (See
|
||||
* accompanying file LICENSE_1_0.txt or copy at
|
||||
* http://www.boost.org/LICENSE_1_0.txt)
|
||||
*/
|
||||
#define BOOST_PARSER_TESTING
|
||||
#include <boost/parser/lexer.hpp>
|
||||
|
||||
#include <boost/parser/transcode_view.hpp>
|
||||
|
||||
#include "ill_formed.hpp"
|
||||
#include "adobe_lexer.hpp"
|
||||
|
||||
#include <boost/core/lightweight_test.hpp>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include <deque>
|
||||
|
||||
|
||||
namespace bp = boost::parser;
|
||||
|
||||
int main()
|
||||
{
|
||||
{
|
||||
static_assert(decltype(adobe_lexer)::size() == 29 + 1);
|
||||
static_assert(
|
||||
std::same_as<decltype(adobe_lexer)::id_type, adobe_tokens>);
|
||||
|
||||
// tokens_view from adobe_lexer
|
||||
{
|
||||
char const input[] = R"(/*
|
||||
Copyright 2005-2007 Adobe Systems Incorporated
|
||||
Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
|
||||
or a copy at http://stlab.adobe.com/licenses.html)
|
||||
*/
|
||||
|
||||
sheet alert_dialog
|
||||
{
|
||||
output:
|
||||
result <== { dummy_value: 42 };
|
||||
})";
|
||||
// first, just make a ctre range
|
||||
{
|
||||
std::string_view const expected[] = {
|
||||
R"(/*
|
||||
Copyright 2005-2007 Adobe Systems Incorporated
|
||||
Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
|
||||
or a copy at http://stlab.adobe.com/licenses.html)
|
||||
*/)",
|
||||
R"(
|
||||
|
||||
)", R"(sheet)", R"( )", R"(alert_dialog)",
|
||||
R"(
|
||||
)", R"({)",
|
||||
R"(
|
||||
)", R"(output)", R"(:)",
|
||||
R"(
|
||||
)", R"(result)", R"( )", R"(<==)",
|
||||
R"( )", R"({)", R"( )", R"(dummy_value)",
|
||||
R"(:)", R"( )", R"(42)", R"( )",
|
||||
R"(})", R"(;)",
|
||||
R"(
|
||||
)", R"(})"};
|
||||
auto r = adobe_lexer.regex_range(input);
|
||||
int position = 0;
|
||||
for (auto subrange : r) {
|
||||
std::string_view sv = subrange;
|
||||
BOOST_TEST(sv == expected[position]);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
||||
using tok_t = bp::token<char>;
|
||||
tok_t const expected[] = {
|
||||
tok_t((int)adobe_tokens::lead_comment, 0, R"(/*
|
||||
Copyright 2005-2007 Adobe Systems Incorporated
|
||||
Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
|
||||
or a copy at http://stlab.adobe.com/licenses.html)
|
||||
*/)"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "sheet"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "alert_dialog"),
|
||||
tok_t(bp::character_id, 0, (long long)'{'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "output"),
|
||||
tok_t(bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "result"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t(bp::character_id, 0, (long long)'{'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "dummy_value"),
|
||||
tok_t(bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)42.0),
|
||||
tok_t(bp::character_id, 0, (long long)'}'),
|
||||
tok_t(bp::character_id, 0, (long long)';'),
|
||||
tok_t(bp::character_id, 0, (long long)'}')};
|
||||
|
||||
// make a tokens_view
|
||||
{
|
||||
auto r = bp::tokens_view(input, adobe_lexer);
|
||||
int position = 0;
|
||||
for (auto tok : r) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
}
|
||||
|
||||
// to_tokens range adaptor
|
||||
{
|
||||
int position = 0;
|
||||
for (auto tok : bp::to_tokens(input, adobe_lexer)) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
}
|
||||
{
|
||||
std::string const input_str = input;
|
||||
int position = 0;
|
||||
for (auto tok : bp::to_tokens(input_str, adobe_lexer)) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
}
|
||||
{
|
||||
int position = 0;
|
||||
for (auto tok :
|
||||
std::string(input) | bp::to_tokens(adobe_lexer)) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
}
|
||||
|
||||
// using external caches
|
||||
{
|
||||
std::vector<bp::token<char>> cache;
|
||||
int position = 0;
|
||||
for (auto tok :
|
||||
bp::to_tokens(input, adobe_lexer, std::ref(cache))) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
}
|
||||
{
|
||||
boost::container::small_vector<bp::token<char>, 10> cache;
|
||||
int position = 0;
|
||||
for (auto tok :
|
||||
input | bp::to_tokens(adobe_lexer, std::ref(cache))) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
}
|
||||
|
||||
{
|
||||
char const large_input[] = R"(/*
|
||||
Copyright 2005-2007 Adobe Systems Incorporated
|
||||
Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
|
||||
or a copy at http://stlab.adobe.com/licenses.html)
|
||||
*/
|
||||
|
||||
sheet image_size
|
||||
{
|
||||
input:
|
||||
original_width : 1600;
|
||||
original_height : 1200;
|
||||
original_resolution : 300;
|
||||
|
||||
constant:
|
||||
original_doc_width : original_width / original_resolution;
|
||||
original_doc_height : original_height / original_resolution;
|
||||
|
||||
interface:
|
||||
resample : true;
|
||||
unlink constrain : true <== resample ? constrain : true;
|
||||
unlink scale_styles : true <== resample && constrain ? scale_styles : false;
|
||||
|
||||
resample_method : @bicubic;
|
||||
|
||||
dim_width_pixels : original_width <== resample ? round(dim_width_pixels) : original_width;
|
||||
dim_width_percent : 100 <== resample ? dim_width_percent : 100;
|
||||
|
||||
dim_height_pixels : original_height <== resample ? round(dim_height_pixels) : original_height;
|
||||
dim_height_percent : 100 <== resample ? dim_height_percent : 100;
|
||||
|
||||
doc_width_inches : original_doc_width;
|
||||
doc_width_percent : 100;
|
||||
|
||||
/*
|
||||
Resolution must be initialized before width and height inches to allow proportions
|
||||
to be constrained.
|
||||
*/
|
||||
doc_resolution : original_resolution;
|
||||
|
||||
doc_height_inches : original_doc_height;
|
||||
doc_height_percent : 100;
|
||||
|
||||
auto_quality : @draft;
|
||||
|
||||
screen_lpi; // initialized from doc_resolution
|
||||
|
||||
logic:
|
||||
relate {
|
||||
doc_width_inches <== doc_width_percent * original_doc_width / 100;
|
||||
doc_width_percent <== doc_width_inches * 100 / original_doc_width;
|
||||
}
|
||||
|
||||
relate {
|
||||
doc_height_inches <== doc_height_percent * original_doc_height / 100;
|
||||
doc_height_percent <== doc_height_inches * 100 / original_doc_height;
|
||||
}
|
||||
|
||||
relate {
|
||||
screen_lpi <== doc_resolution / (auto_quality == @draft ? 1 : (auto_quality == @good ? 1.5 : 2.0));
|
||||
doc_resolution <== screen_lpi * (auto_quality == @draft ? 1 : (auto_quality == @good ? 1.5 : 2.0));
|
||||
}
|
||||
|
||||
when (resample) relate {
|
||||
dim_width_pixels <== dim_width_percent * original_width / 100;
|
||||
dim_width_percent <== dim_width_pixels * 100 / original_width;
|
||||
}
|
||||
|
||||
when (resample) relate {
|
||||
dim_height_pixels <== dim_height_percent * original_height / 100;
|
||||
dim_height_percent <== dim_height_pixels * 100 / original_height;
|
||||
}
|
||||
|
||||
when (resample) relate {
|
||||
doc_width_inches <== dim_width_pixels / doc_resolution;
|
||||
dim_width_pixels <== doc_width_inches * doc_resolution;
|
||||
doc_resolution <== dim_width_pixels / doc_width_inches;
|
||||
}
|
||||
|
||||
when (resample) relate {
|
||||
doc_height_inches <== dim_height_pixels / doc_resolution;
|
||||
dim_height_pixels <== doc_height_inches * doc_resolution;
|
||||
doc_resolution <== dim_height_pixels / doc_height_inches;
|
||||
}
|
||||
|
||||
when (!resample) relate {
|
||||
doc_resolution <== original_width / doc_width_inches;
|
||||
doc_width_inches <== original_width / doc_resolution;
|
||||
}
|
||||
|
||||
when (!resample) relate {
|
||||
doc_resolution <== original_height / doc_height_inches;
|
||||
doc_height_inches <== original_height / doc_resolution;
|
||||
}
|
||||
|
||||
when (constrain && resample) relate {
|
||||
dim_width_percent <== dim_height_percent;
|
||||
dim_height_percent <== dim_width_percent;
|
||||
}
|
||||
|
||||
output:
|
||||
byte_count <== dim_width_pixels * dim_height_pixels * 32;
|
||||
|
||||
result <== resample ? {
|
||||
command: @resize_image,
|
||||
width: dim_width_pixels,
|
||||
height: dim_height_pixels,
|
||||
resolution: doc_resolution,
|
||||
scale_styles: scale_styles,
|
||||
resample_method: resample_method
|
||||
} : {
|
||||
command: @set_resolution,
|
||||
resolution: doc_resolution
|
||||
};
|
||||
|
||||
invariant:
|
||||
width_max <== dim_width_pixels <= 300000;
|
||||
height_max <== dim_height_pixels <= 300000;
|
||||
}
|
||||
)";
|
||||
|
||||
tok_t const expected[] = {
|
||||
tok_t((int)adobe_tokens::lead_comment, 0, R"(/*
|
||||
Copyright 2005-2007 Adobe Systems Incorporated
|
||||
Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
|
||||
or a copy at http://stlab.adobe.com/licenses.html)
|
||||
*/)"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "sheet"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "image_size"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'{'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "input"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)1600.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)1200.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier,
|
||||
0,
|
||||
"original_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)300.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "constant"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "original_doc_width"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier,
|
||||
0,
|
||||
"original_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier,
|
||||
0,
|
||||
"original_doc_height"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier,
|
||||
0,
|
||||
"original_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "interface"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::keyword_true_false, 0, 1ll),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "unlink"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "constrain"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::keyword_true_false, 0, 1ll),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'?'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "constrain"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::keyword_true_false, 0, 1ll),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "unlink"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "scale_styles"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::keyword_true_false, 0, 1ll),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)adobe_tokens::and_, 0, "&&"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "constrain"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'?'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "scale_styles"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::keyword_true_false, 0, 0ll),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample_method"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'@'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "bicubic"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'?'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "round"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'('),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
|
||||
tok_t((int)bp::character_id, 0, (long long)')'),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_width_percent"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'?'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_width_percent"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'?'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "round"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'('),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
|
||||
tok_t((int)bp::character_id, 0, (long long)')'),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_percent"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'?'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_percent"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "original_doc_width"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_width_percent"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::lead_comment, 0, R"(/*
|
||||
Resolution must be initialized before width and height inches to allow proportions
|
||||
to be constrained.
|
||||
*/)"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier,
|
||||
0,
|
||||
"original_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier,
|
||||
0,
|
||||
"original_doc_height"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_height_percent"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "auto_quality"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'@'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "draft"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "screen_lpi"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::trail_comment,
|
||||
0,
|
||||
"// initialized from doc_resolution"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "logic"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "relate"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'{'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_width_percent"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "*"),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "original_doc_width"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_width_percent"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "*"),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "original_doc_width"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'}'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "relate"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'{'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_height_percent"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "*"),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier,
|
||||
0,
|
||||
"original_doc_height"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_height_percent"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "*"),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier,
|
||||
0,
|
||||
"original_doc_height"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'}'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "relate"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'{'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "screen_lpi"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'('),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "auto_quality"),
|
||||
tok_t((int)adobe_tokens::eq_op, 0, "=="),
|
||||
tok_t((int)bp::character_id, 0, (long long)'@'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "draft"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'?'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)1.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'('),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "auto_quality"),
|
||||
tok_t((int)adobe_tokens::eq_op, 0, "=="),
|
||||
tok_t((int)bp::character_id, 0, (long long)'@'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "good"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'?'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)1.5),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)2.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)')'),
|
||||
tok_t((int)bp::character_id, 0, (long long)')'),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "screen_lpi"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "*"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'('),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "auto_quality"),
|
||||
tok_t((int)adobe_tokens::eq_op, 0, "=="),
|
||||
tok_t((int)bp::character_id, 0, (long long)'@'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "draft"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'?'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)1.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'('),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "auto_quality"),
|
||||
tok_t((int)adobe_tokens::eq_op, 0, "=="),
|
||||
tok_t((int)bp::character_id, 0, (long long)'@'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "good"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'?'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)1.5),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)2.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)')'),
|
||||
tok_t((int)bp::character_id, 0, (long long)')'),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'}'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "when"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'('),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)')'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "relate"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'{'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_width_percent"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "*"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_width_percent"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "*"),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'}'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "when"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'('),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)')'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "relate"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'{'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_percent"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "*"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_percent"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "*"),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)100.0),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'}'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "when"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'('),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)')'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "relate"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'{'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "*"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'}'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "when"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'('),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)')'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "relate"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'{'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "*"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'}'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "when"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'('),
|
||||
tok_t((int)bp::character_id, 0, (long long)'!'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)')'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "relate"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'{'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_width_inches"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_width"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'}'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "when"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'('),
|
||||
tok_t((int)bp::character_id, 0, (long long)'!'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)')'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "relate"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'{'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "doc_height_inches"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "original_height"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "/"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'}'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "when"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'('),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "constrain"),
|
||||
tok_t((int)adobe_tokens::and_, 0, "&&"),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)')'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "relate"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'{'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_width_percent"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_percent"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_percent"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_width_percent"),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'}'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "output"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "byte_count"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "*"),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
|
||||
tok_t((int)adobe_tokens::mul_op, 0, "*"),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)32.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "result"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'?'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'{'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "command"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'@'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resize_image"),
|
||||
tok_t((int)bp::character_id, 0, (long long)','),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "width"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
|
||||
tok_t((int)bp::character_id, 0, (long long)','),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "height"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
|
||||
tok_t((int)bp::character_id, 0, (long long)','),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)','),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "scale_styles"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "scale_styles"),
|
||||
tok_t((int)bp::character_id, 0, (long long)','),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample_method"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resample_method"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'}'),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'{'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "command"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'@'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "set_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)','),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "doc_resolution"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'}'),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "invariant"),
|
||||
tok_t((int)bp::character_id, 0, (long long)':'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "width_max"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "dim_width_pixels"),
|
||||
tok_t((int)adobe_tokens::rel_op, 0, "<"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'='),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)300000.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)adobe_tokens::identifier, 0, "height_max"),
|
||||
tok_t((int)adobe_tokens::define, 0, "<=="),
|
||||
tok_t(
|
||||
(int)adobe_tokens::identifier, 0, "dim_height_pixels"),
|
||||
tok_t((int)adobe_tokens::rel_op, 0, "<"),
|
||||
tok_t((int)bp::character_id, 0, (long long)'='),
|
||||
tok_t((int)adobe_tokens::number, 0, (long double)300000.0),
|
||||
tok_t((int)bp::character_id, 0, (long long)';'),
|
||||
tok_t((int)bp::character_id, 0, (long long)'}')};
|
||||
|
||||
int position = 0;
|
||||
for (auto tok :
|
||||
std::string(large_input) | bp::to_tokens(adobe_lexer)) {
|
||||
BOOST_TEST(tok == expected[position]);
|
||||
if (tok != expected[position]) {
|
||||
std::cout << "At pos=" << position << ": got " << tok
|
||||
<< " expected " << expected[position] << "\n";
|
||||
}
|
||||
++position;
|
||||
}
|
||||
BOOST_TEST(position == (int)std::size(expected));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return boost::report_errors();
|
||||
}
|
||||
237
test/lexer_and_parser.cpp
Normal file
237
test/lexer_and_parser.cpp
Normal file
@@ -0,0 +1,237 @@
|
||||
/**
|
||||
* Copyright (C) 2024 T. Zachary Laine
|
||||
*
|
||||
* Distributed under the Boost Software License, Version 1.0. (See
|
||||
* accompanying file LICENSE_1_0.txt or copy at
|
||||
* http://www.boost.org/LICENSE_1_0.txt)
|
||||
*/
|
||||
|
||||
#define BOOST_PARSER_TESTING
|
||||
//[ tokens_basics_headers
|
||||
#include <boost/parser/lexer.hpp>
|
||||
#include <boost/parser/parser.hpp>
|
||||
//]
|
||||
|
||||
#include <boost/core/lightweight_test.hpp>
|
||||
|
||||
#include "adobe_lexer.hpp"
|
||||
|
||||
|
||||
namespace bp = boost::parser;
|
||||
|
||||
int main()
|
||||
{
|
||||
// Minimal test; just instantiate the member functions, without involving
|
||||
// the parse() API.
|
||||
{
|
||||
bp::token<char> tokens[1] = {};
|
||||
auto p = bp::token_spec<"12", 12, int>;
|
||||
auto first = std::begin(tokens);
|
||||
auto const last = std::end(tokens);
|
||||
|
||||
bp::detail::nope globals;
|
||||
bp::default_error_handler error_handler;
|
||||
|
||||
// From parse_impl().
|
||||
bool success = true;
|
||||
int trace_indent = 0;
|
||||
bp::detail::symbol_table_tries_t symbol_table_tries;
|
||||
bp::detail::pending_symbol_table_operations_t
|
||||
pending_symbol_table_operations;
|
||||
bp::detail::scoped_apply_pending_symbol_table_operations apply_pending(
|
||||
pending_symbol_table_operations);
|
||||
auto context = bp::detail::make_context<false, false>(
|
||||
first,
|
||||
last,
|
||||
success,
|
||||
trace_indent,
|
||||
error_handler,
|
||||
globals,
|
||||
symbol_table_tries,
|
||||
pending_symbol_table_operations);
|
||||
auto const flags = bp::detail::flags::gen_attrs;
|
||||
|
||||
std::optional<int> result =
|
||||
p(first, last, context, bp::ws, flags, success);
|
||||
(void)result;
|
||||
}
|
||||
|
||||
// Minimal tests of building parsers from token_parser and token_spec.
|
||||
{
|
||||
auto parser1 = true_false(true);
|
||||
auto parser2 = true_false(false);
|
||||
(void)parser1;
|
||||
(void)parser2;
|
||||
}
|
||||
{
|
||||
auto parser = identifier("foo") >> '=' >> true_false >> ';';
|
||||
(void)parser;
|
||||
}
|
||||
|
||||
// Minimal tests of using a lexer and parser together.
|
||||
{
|
||||
auto parser = identifier("foo") >> '=' >> true_false >> ';';
|
||||
auto r = "some input" | bp::to_tokens(adobe_lexer);
|
||||
auto result = bp::parse(r, parser);
|
||||
BOOST_TEST(!result);
|
||||
|
||||
static_assert(!std::same_as<
|
||||
std::remove_cvref_t<
|
||||
decltype(bp::detail::tokens_view_or_nope(r))>,
|
||||
bp::detail::nope>);
|
||||
|
||||
auto const & cr = r;
|
||||
static_assert(!std::same_as<
|
||||
std::remove_cvref_t<
|
||||
decltype(bp::detail::tokens_view_or_nope(cr))>,
|
||||
bp::detail::nope>);
|
||||
}
|
||||
{
|
||||
auto parser = identifier >> '=' >> true_false >> ';';
|
||||
auto r = "foo = false;" | bp::to_tokens(adobe_lexer);
|
||||
auto result = bp::parse(r, parser);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(std::get<0>(*result) == "foo");
|
||||
BOOST_TEST(std::get<1>(*result) == false);
|
||||
}
|
||||
|
||||
// Test the use of an external token cache.
|
||||
{
|
||||
auto parser = identifier >> '=' >> true_false >> ';';
|
||||
std::vector<bp::token<char>> cache;
|
||||
auto r = "foo = false;" | bp::to_tokens(adobe_lexer, std::ref(cache));
|
||||
auto result = bp::parse(r, parser);
|
||||
BOOST_TEST(std::get<0>(*result) == "foo");
|
||||
BOOST_TEST(std::get<1>(*result) == false);
|
||||
BOOST_TEST(cache.size() == 4u);
|
||||
}
|
||||
|
||||
// Test the clearing of the token cache at expectation points.
|
||||
{
|
||||
auto parser = identifier >> '=' > true_false >> ';';
|
||||
std::vector<bp::token<char>> cache;
|
||||
auto r = "foo = false;" | bp::to_tokens(adobe_lexer, std::ref(cache));
|
||||
auto result = bp::parse(r, parser);
|
||||
BOOST_TEST(std::get<0>(*result) == "foo");
|
||||
BOOST_TEST(std::get<1>(*result) == false);
|
||||
BOOST_TEST(cache.size() == 2u);
|
||||
}
|
||||
|
||||
// doc examples
|
||||
// clang-format off
|
||||
{
|
||||
//[ tokens_basics_lexer
|
||||
auto const foo = bp::token_spec<"foo", 0>;
|
||||
auto const bar = bp::token_spec<"b.r", 1>;
|
||||
auto const baz = bp::token_spec<"b.z", 2>;
|
||||
|
||||
auto const lexer = bp::lexer<char, int> | foo | bar | baz;
|
||||
//]
|
||||
|
||||
//[ tokens_basics_input_range
|
||||
auto r = "foobazbar" | bp::to_tokens(lexer);
|
||||
//]
|
||||
|
||||
//[ tokens_basics_parser
|
||||
auto parser = foo >> baz >> bar;
|
||||
//]
|
||||
|
||||
//[ tokens_basics_parse
|
||||
auto result = bp::parse(r, parser);
|
||||
assert(result);
|
||||
assert(std::get<0>(*result) == "foo");
|
||||
assert(std::get<1>(*result) == "baz");
|
||||
assert(std::get<2>(*result) == "bar");
|
||||
//]
|
||||
}
|
||||
|
||||
{
|
||||
//[ tokens_attrs
|
||||
constexpr auto true_false = bp::token_spec<"true|false", 0, bool>;
|
||||
constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>;
|
||||
constexpr auto number = bp::token_spec<"\\d+(?:\\.\\d*)?", 2, double>;
|
||||
//]
|
||||
(void)true_false;
|
||||
(void)identifier;
|
||||
(void)number;
|
||||
}
|
||||
|
||||
{
|
||||
//[ tokens_token_char
|
||||
constexpr auto true_false = bp::token_spec<"true|false", 0, bool>;
|
||||
constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>;
|
||||
|
||||
constexpr auto lexer =
|
||||
bp::lexer<char, int> | true_false | identifier | bp::token_chars<'=', ';'>;
|
||||
|
||||
auto parser = identifier >> '=' >> true_false >> ';';
|
||||
auto r = "foo = false;" | bp::to_tokens(lexer);
|
||||
auto result = bp::parse(r, parser);
|
||||
assert(result);
|
||||
assert(std::get<0>(*result) == "foo");
|
||||
assert(std::get<1>(*result) == false);
|
||||
//]
|
||||
}
|
||||
|
||||
{
|
||||
//[ tokens_caching_simple
|
||||
constexpr auto true_false = bp::token_spec<"true|false", 0, bool>;
|
||||
constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>;
|
||||
|
||||
constexpr auto lexer =
|
||||
bp::lexer<char, int> | true_false | identifier | bp::token_chars<'=', ';'>;
|
||||
|
||||
auto parser = identifier >> '=' >> true_false >> ';';
|
||||
std::vector<bp::token<char>> cache;
|
||||
auto r = "foo = false;" | bp::to_tokens(lexer, std::ref(cache));
|
||||
auto result = bp::parse(r, parser);
|
||||
assert(result);
|
||||
assert(std::get<0>(*result) == "foo");
|
||||
assert(std::get<1>(*result) == false);
|
||||
assert(cache.size() == 4u);
|
||||
//]
|
||||
}
|
||||
|
||||
{
|
||||
constexpr auto true_false = bp::token_spec<"true|false", 0, bool>;
|
||||
constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>;
|
||||
|
||||
constexpr auto lexer =
|
||||
bp::lexer<char, int> | true_false | identifier | bp::token_chars<'=', ';'>;
|
||||
|
||||
//[ tokens_caching_expectation_point
|
||||
auto parser = identifier >> '=' > true_false >> ';';
|
||||
std::vector<bp::token<char>> cache;
|
||||
auto r = "foo = false;" | bp::to_tokens(lexer, std::ref(cache));
|
||||
auto result = bp::parse(r, parser);
|
||||
assert(result);
|
||||
assert(std::get<0>(*result) == "foo");
|
||||
assert(std::get<1>(*result) == false);
|
||||
assert(cache.size() == 2u);
|
||||
//]
|
||||
}
|
||||
|
||||
{
|
||||
//[ tokens_string_in_character_vs_token_parsing
|
||||
constexpr auto true_false = bp::token_spec<"true|false", 0, bool>;
|
||||
constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>;
|
||||
|
||||
constexpr auto lexer =
|
||||
bp::lexer<char, int> | true_false | identifier | bp::token_chars<'=', ';'>;
|
||||
|
||||
auto parser = bp::string("=;");
|
||||
|
||||
// NOTE: Character parsing here.
|
||||
auto character_parse_result = bp::parse("=;", parser);
|
||||
assert(character_parse_result);
|
||||
assert(*character_parse_result == "=;");
|
||||
|
||||
// NOTE: Token parsing here.
|
||||
auto token_parse_result = bp::parse("=;" | bp::to_tokens(lexer), parser);
|
||||
assert(!token_parse_result);
|
||||
//]
|
||||
}
|
||||
// clang-format on
|
||||
|
||||
return boost::report_errors();
|
||||
}
|
||||
184
test/lexer_and_parser_api.cpp
Normal file
184
test/lexer_and_parser_api.cpp
Normal file
@@ -0,0 +1,184 @@
|
||||
/**
|
||||
* Copyright (C) 2024 T. Zachary Laine
|
||||
*
|
||||
* Distributed under the Boost Software License, Version 1.0. (See
|
||||
* accompanying file LICENSE_1_0.txt or copy at
|
||||
* http://www.boost.org/LICENSE_1_0.txt)
|
||||
*/
|
||||
|
||||
#define BOOST_PARSER_TESTING
|
||||
#include <boost/parser/lexer.hpp>
|
||||
#include <boost/parser/parser.hpp>
|
||||
|
||||
#include <boost/core/lightweight_test.hpp>
|
||||
|
||||
|
||||
namespace bp = boost::parser;
|
||||
|
||||
constexpr auto true_false = bp::token_spec<"true|false", 0, bool>;
|
||||
constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>;
|
||||
|
||||
struct tf_tag
|
||||
{};
|
||||
struct id_tag
|
||||
{};
|
||||
constexpr bp::callback_rule<tf_tag, bool> callback_true_false = "";
|
||||
constexpr bp::callback_rule<id_tag, std::string_view> callback_identifier = "";
|
||||
constexpr auto callback_true_false_def = true_false;
|
||||
constexpr auto callback_identifier_def = identifier;
|
||||
BOOST_PARSER_DEFINE_RULES(callback_true_false, callback_identifier);
|
||||
|
||||
struct callbacks
|
||||
{
|
||||
void operator()(id_tag, std::string_view sv) const { sv_ = sv; }
|
||||
void operator()(tf_tag, bool b) const { b_ = b; }
|
||||
std::string_view & sv_;
|
||||
bool & b_;
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
auto assign_bool_parser = identifier >> '=' >> true_false >> ';';
|
||||
auto assign_bool_no_semi_parser = identifier >> '=' >> true_false;
|
||||
|
||||
constexpr auto lexer = bp::lexer<char, int> | true_false | identifier |
|
||||
bp::token_chars<'=', ';'>;
|
||||
auto r = "foo = false;" | bp::to_tokens(lexer);
|
||||
|
||||
// prefix_parse() w/attr
|
||||
{
|
||||
auto f = r.begin();
|
||||
auto const l = r.end();
|
||||
std::tuple<std::string_view, bool> result;
|
||||
auto success = bp::prefix_parse(f, l, assign_bool_parser, result);
|
||||
BOOST_TEST(success);
|
||||
BOOST_TEST(std::get<0>(result) == "foo");
|
||||
BOOST_TEST(std::get<1>(result) == false);
|
||||
}
|
||||
{
|
||||
auto f = r.begin();
|
||||
auto const l = r.end();
|
||||
std::tuple<std::string_view, bool> result;
|
||||
auto success = bp::prefix_parse(f, l, assign_bool_no_semi_parser, result);
|
||||
BOOST_TEST(success);
|
||||
BOOST_TEST(std::get<0>(result) == "foo");
|
||||
BOOST_TEST(std::get<1>(result) == false);
|
||||
BOOST_TEST(f != l);
|
||||
}
|
||||
|
||||
// parse() w/attr
|
||||
{
|
||||
std::tuple<std::string_view, bool> result;
|
||||
auto success = bp::parse(r, assign_bool_parser, result);
|
||||
BOOST_TEST(success);
|
||||
BOOST_TEST(std::get<0>(result) == "foo");
|
||||
BOOST_TEST(std::get<1>(result) == false);
|
||||
}
|
||||
{
|
||||
constexpr auto lexer = bp::lexer<char8_t, int> | true_false |
|
||||
identifier | bp::token_chars<'=', ';'>;
|
||||
auto r8 = u8"foo = false;" | bp::to_tokens(lexer);
|
||||
|
||||
std::tuple<std::u8string_view, bool> result;
|
||||
auto success = bp::parse(r8, assign_bool_parser, result);
|
||||
BOOST_TEST(success);
|
||||
BOOST_TEST(std::get<0>(result) == u8"foo");
|
||||
BOOST_TEST(std::get<1>(result) == false);
|
||||
}
|
||||
{
|
||||
constexpr auto lexer = bp::lexer<char16_t, int> | true_false |
|
||||
identifier | bp::token_chars<'=', ';'>;
|
||||
auto r16 = u"foo = false;" | bp::to_tokens(lexer);
|
||||
|
||||
std::tuple<std::u16string_view, bool> result;
|
||||
auto success = bp::parse(r16, assign_bool_parser, result);
|
||||
BOOST_TEST(success);
|
||||
BOOST_TEST(std::get<0>(result) == u"foo");
|
||||
BOOST_TEST(std::get<1>(result) == false);
|
||||
}
|
||||
{
|
||||
constexpr auto lexer = bp::lexer<char32_t, int> | true_false |
|
||||
identifier | bp::token_chars<'=', ';'>;
|
||||
auto r32 = U"foo = false;" | bp::to_tokens(lexer);
|
||||
|
||||
std::tuple<std::u32string_view, bool> result;
|
||||
auto success = bp::parse(r32, assign_bool_parser, result);
|
||||
BOOST_TEST(success);
|
||||
BOOST_TEST(std::get<0>(result) == U"foo");
|
||||
BOOST_TEST(std::get<1>(result) == false);
|
||||
}
|
||||
|
||||
// prefix_parse() no attr
|
||||
{
|
||||
auto f = r.begin();
|
||||
auto const l = r.end();
|
||||
auto result = bp::prefix_parse(f, l, assign_bool_parser);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(std::get<0>(*result) == "foo");
|
||||
BOOST_TEST(std::get<1>(*result) == false);
|
||||
}
|
||||
{
|
||||
auto f = r.begin();
|
||||
auto const l = r.end();
|
||||
auto result = bp::prefix_parse(f, l, assign_bool_no_semi_parser);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(std::get<0>(*result) == "foo");
|
||||
BOOST_TEST(std::get<1>(*result) == false);
|
||||
BOOST_TEST(f != l);
|
||||
}
|
||||
|
||||
// parse() no attr
|
||||
{
|
||||
auto result = bp::parse(r, assign_bool_parser);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(std::get<0>(*result) == "foo");
|
||||
BOOST_TEST(std::get<1>(*result) == false);
|
||||
}
|
||||
|
||||
// callback_prefix_parse()
|
||||
{
|
||||
auto assign_bool_parser =
|
||||
callback_identifier >> '=' >> callback_true_false >> ';';
|
||||
|
||||
auto f = r.begin();
|
||||
auto const l = r.end();
|
||||
std::string_view sv;
|
||||
bool b = false;
|
||||
auto success = bp::callback_prefix_parse(
|
||||
f, l, assign_bool_parser, callbacks{sv, b});
|
||||
BOOST_TEST(success);
|
||||
BOOST_TEST(sv == "foo");
|
||||
BOOST_TEST(b == false);
|
||||
}
|
||||
{
|
||||
auto assign_bool_no_semi_parser =
|
||||
callback_identifier >> '=' >> callback_true_false;
|
||||
|
||||
auto f = r.begin();
|
||||
auto const l = r.end();
|
||||
std::string_view sv;
|
||||
bool b = false;
|
||||
auto success = bp::callback_prefix_parse(
|
||||
f, l, assign_bool_no_semi_parser, callbacks{sv, b});
|
||||
BOOST_TEST(success);
|
||||
BOOST_TEST(sv == "foo");
|
||||
BOOST_TEST(b == false);
|
||||
BOOST_TEST(f != l);
|
||||
}
|
||||
|
||||
// callback_parse()
|
||||
{
|
||||
auto assign_bool_parser =
|
||||
callback_identifier >> '=' >> callback_true_false >> ';';
|
||||
|
||||
std::string_view sv;
|
||||
bool b = false;
|
||||
auto success =
|
||||
bp::callback_parse(r, assign_bool_parser, callbacks{sv, b});
|
||||
BOOST_TEST(success);
|
||||
BOOST_TEST(sv == "foo");
|
||||
BOOST_TEST(b == false);
|
||||
}
|
||||
|
||||
return boost::report_errors();
|
||||
}
|
||||
112
test/lexer_and_parser_symbol_table.cpp
Normal file
112
test/lexer_and_parser_symbol_table.cpp
Normal file
@@ -0,0 +1,112 @@
|
||||
// Copyright (C) 2024 T. Zachary Laine
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See
|
||||
// accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
#define BOOST_PARSER_TESTING
|
||||
#include <boost/parser/lexer.hpp>
|
||||
#include <boost/parser/parser.hpp>
|
||||
|
||||
#include <boost/core/lightweight_test.hpp>
|
||||
|
||||
|
||||
namespace bp = boost::parser;
|
||||
|
||||
bp::rule<class symbol_rule, std::string_view> const symrule = "symbols";
|
||||
bp::symbols<std::string_view> rule_symbols;
|
||||
auto const fwd_attr = [](auto & ctx) { _val(ctx) = _attr(ctx); };
|
||||
auto symrule_def = rule_symbols[fwd_attr];
|
||||
BOOST_PARSER_DEFINE_RULES(symrule);
|
||||
|
||||
constexpr auto I = bp::token_spec<"I", 0>;
|
||||
constexpr auto V = bp::token_spec<"V", 1>;
|
||||
constexpr auto X = bp::token_spec<"X", 2>;
|
||||
constexpr auto L = bp::token_spec<"L", 3>;
|
||||
constexpr auto C = bp::token_spec<"C", 4>;
|
||||
constexpr auto arabic_num = bp::token_spec<"\\d+", 5, int>;
|
||||
|
||||
constexpr auto lexer = bp::lexer<char, int> | I | V | X | L | C | arabic_num;
|
||||
|
||||
int main()
|
||||
{
|
||||
// symbols_empty
|
||||
{
|
||||
bp::symbols<int> roman_numerals;
|
||||
bp::symbols<std::string> named_strings;
|
||||
|
||||
auto r = "I" | bp::to_tokens(lexer);
|
||||
BOOST_TEST(!bp::parse(r, roman_numerals));
|
||||
BOOST_TEST(!bp::parse(r, named_strings));
|
||||
}
|
||||
|
||||
// symbols_simple
|
||||
{
|
||||
bp::symbols<int> const roman_numerals = {
|
||||
{"I", 1}, {"V", 5}, {"X", 10}, {"L", 50}, {"C", 100}};
|
||||
bp::symbols<std::string> const named_strings = {
|
||||
{"I", "1"}, {"V", "5"}, {"X", "10"}, {"L", "50"}, {"C", "100"}};
|
||||
|
||||
{
|
||||
auto const result =
|
||||
bp::parse("I" | bp::to_tokens(lexer), roman_numerals);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == 1);
|
||||
}
|
||||
{
|
||||
auto const result =
|
||||
bp::parse("I" | bp::to_tokens(lexer), named_strings);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == "1");
|
||||
}
|
||||
|
||||
{
|
||||
auto const result =
|
||||
bp::parse("L" | bp::to_tokens(lexer), roman_numerals);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == 50);
|
||||
}
|
||||
{
|
||||
auto const result =
|
||||
bp::parse("L" | bp::to_tokens(lexer), named_strings);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == "50");
|
||||
}
|
||||
}
|
||||
|
||||
// symbols_mutating
|
||||
{
|
||||
bp::symbols<int> roman_numerals;
|
||||
roman_numerals.insert_for_next_parse("I", 1);
|
||||
roman_numerals.insert_for_next_parse("V", 5);
|
||||
roman_numerals.insert_for_next_parse("X", 10);
|
||||
auto const add_numeral = [&roman_numerals](auto & context) {
|
||||
using namespace boost::parser::literals;
|
||||
const std::string_view sv = bp::get(_attr(context), 0_c);
|
||||
roman_numerals.insert(context, sv, bp::get(_attr(context), 1_c));
|
||||
};
|
||||
auto const numerals_parser =
|
||||
((I | V | X | L | C) >> arabic_num)[add_numeral] >> roman_numerals;
|
||||
|
||||
{
|
||||
auto const result =
|
||||
bp::parse("L50L" | bp::to_tokens(lexer), numerals_parser);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == 50);
|
||||
BOOST_TEST(!bp::parse("L", roman_numerals));
|
||||
}
|
||||
{
|
||||
auto const result =
|
||||
bp::parse("C100C" | bp::to_tokens(lexer), numerals_parser);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == 100);
|
||||
BOOST_TEST(!bp::parse("C", roman_numerals));
|
||||
}
|
||||
{
|
||||
auto const result =
|
||||
bp::parse("L50C" | bp::to_tokens(lexer), numerals_parser);
|
||||
BOOST_TEST(!result);
|
||||
}
|
||||
}
|
||||
|
||||
return boost::report_errors();
|
||||
}
|
||||
2228
test/lexer_and_parser_terminals.cpp
Normal file
2228
test/lexer_and_parser_terminals.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -228,7 +228,9 @@ constexpr auto double_s = u8"sS"; // U+0073 U+0073
|
||||
// with mutation
|
||||
{
|
||||
symbols<int> roman_numerals;
|
||||
roman_numerals.insert_for_next_parse("I", 1)("V", 5)("X", 10);
|
||||
roman_numerals.insert_for_next_parse("I", 1);
|
||||
roman_numerals.insert_for_next_parse("V", 5);
|
||||
roman_numerals.insert_for_next_parse("X", 10);
|
||||
auto const add_numeral = [&roman_numerals](auto & context) {
|
||||
using namespace boost::parser::literals;
|
||||
char chars[2] = {get(_attr(context), 0_c), 0};
|
||||
|
||||
@@ -10,6 +10,12 @@
|
||||
|
||||
using namespace boost::parser;
|
||||
|
||||
rule<class symbol_rule, std::string_view> const symrule = "symbols";
|
||||
symbols<std::string_view> rule_symbols;
|
||||
auto const fwd_attr = [](auto & ctx) { _val(ctx) = _attr(ctx); };
|
||||
auto symrule_def = rule_symbols[fwd_attr];
|
||||
BOOST_PARSER_DEFINE_RULES(symrule);
|
||||
|
||||
int main()
|
||||
{
|
||||
// symbols_empty
|
||||
@@ -137,7 +143,9 @@ int main()
|
||||
// symbols_mutating
|
||||
{
|
||||
symbols<int> roman_numerals;
|
||||
roman_numerals.insert_for_next_parse("I", 1)("V", 5)("X", 10);
|
||||
roman_numerals.insert_for_next_parse("I", 1);
|
||||
roman_numerals.insert_for_next_parse("V", 5);
|
||||
roman_numerals.insert_for_next_parse("X", 10);
|
||||
auto const add_numeral = [&roman_numerals](auto & context) {
|
||||
using namespace boost::parser::literals;
|
||||
char chars[2] = {get(_attr(context), 0_c), 0};
|
||||
@@ -166,7 +174,9 @@ int main()
|
||||
// insert/erase/clear
|
||||
{
|
||||
symbols<int> roman_numerals;
|
||||
roman_numerals.insert_for_next_parse("I", 1)("V", 5)("X", 10);
|
||||
roman_numerals.insert_for_next_parse("I", 1);
|
||||
roman_numerals.insert_for_next_parse("V", 5);
|
||||
roman_numerals.insert_for_next_parse("X", 10);
|
||||
|
||||
auto const insert_numeral = [&roman_numerals](auto & context) {
|
||||
using namespace boost::parser::literals;
|
||||
@@ -189,15 +199,16 @@ int main()
|
||||
auto const next_insert_numeral = [&roman_numerals](auto & context) {
|
||||
using namespace boost::parser::literals;
|
||||
char chars[2] = {get(_attr(context), 0_c), 0};
|
||||
roman_numerals.insert_for_next_parse(chars, get(_attr(context), 1_c));
|
||||
roman_numerals.insert_for_next_parse(
|
||||
context, chars, get(_attr(context), 1_c));
|
||||
};
|
||||
auto const next_erase_numeral = [&roman_numerals](auto & context) {
|
||||
using namespace boost::parser::literals;
|
||||
char chars[2] = {_attr(context), 0};
|
||||
roman_numerals.erase_for_next_parse(chars);
|
||||
roman_numerals.erase_for_next_parse(context, chars);
|
||||
};
|
||||
auto const next_clear_numerals = [&roman_numerals](auto &) {
|
||||
roman_numerals.clear_for_next_parse();
|
||||
auto const next_clear_numerals = [&roman_numerals](auto & context) {
|
||||
roman_numerals.clear_for_next_parse(context);
|
||||
};
|
||||
|
||||
auto const next_add_parser =
|
||||
@@ -217,7 +228,7 @@ int main()
|
||||
{
|
||||
// add only for the next parse
|
||||
auto result = parse("next-addL50L", next_add_parser >> roman_numerals);
|
||||
BOOST_TEST(!result); // TODO
|
||||
BOOST_TEST(!result);
|
||||
|
||||
result = parse("L", roman_numerals);
|
||||
BOOST_TEST(result);
|
||||
@@ -273,8 +284,8 @@ int main()
|
||||
BOOST_TEST(*parse("V", roman_numerals) == 5);
|
||||
|
||||
auto result = parse("next-delVV", next_delete_parser >> roman_numerals);
|
||||
BOOST_TEST(result); // TODO
|
||||
BOOST_TEST(*result == 5); // TODO
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == 5);
|
||||
|
||||
result = parse("V", roman_numerals);
|
||||
BOOST_TEST(!result);
|
||||
@@ -326,12 +337,68 @@ int main()
|
||||
BOOST_TEST(*parse("L", roman_numerals) == 50);
|
||||
|
||||
auto result = parse("next-clearI", next_clear_parser >> roman_numerals);
|
||||
BOOST_TEST(result); // TODO
|
||||
BOOST_TEST(*result == 1); // TODO
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == 1);
|
||||
|
||||
BOOST_TEST(!parse("I", roman_numerals));
|
||||
BOOST_TEST(!parse("L", roman_numerals));
|
||||
}
|
||||
|
||||
{
|
||||
// parse using symbols directly -- not using the table within a rule
|
||||
rule_symbols.clear_for_next_parse();
|
||||
rule_symbols.insert_for_next_parse("I", "one");
|
||||
rule_symbols.insert_for_next_parse("L", "50");
|
||||
|
||||
auto result = parse("I", rule_symbols);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == "one");
|
||||
|
||||
result = parse("L", rule_symbols);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == "50");
|
||||
|
||||
BOOST_TEST(!parse("X", rule_symbols));
|
||||
}
|
||||
|
||||
{
|
||||
// symbols within a rule
|
||||
rule_symbols.clear_for_next_parse();
|
||||
rule_symbols.insert_for_next_parse("foo", "foofie");
|
||||
rule_symbols.insert_for_next_parse("bar", "barrie");
|
||||
|
||||
auto result = parse("foo", symrule);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == "foofie");
|
||||
|
||||
result = parse("bar", symrule);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == "barrie");
|
||||
|
||||
BOOST_TEST(!parse("X", symrule));
|
||||
BOOST_TEST(!parse("I", symrule));
|
||||
}
|
||||
|
||||
{
|
||||
// symbols within a rule w/error handler
|
||||
rule_symbols.clear_for_next_parse();
|
||||
rule_symbols.insert_for_next_parse("foo", "foofie");
|
||||
rule_symbols.insert_for_next_parse("bar", "barrie");
|
||||
|
||||
callback_error_handler error_handler(
|
||||
[](std::string_view m) { std::cout << m << "\n"; });
|
||||
auto parser = with_error_handler(symrule, error_handler);
|
||||
|
||||
auto result = parse("foo", parser);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == "foofie");
|
||||
|
||||
result = parse("bar", parser);
|
||||
BOOST_TEST(result);
|
||||
BOOST_TEST(*result == "barrie");
|
||||
|
||||
BOOST_TEST(!parse("baz", parser));
|
||||
}
|
||||
}
|
||||
|
||||
return boost::report_errors();
|
||||
|
||||
@@ -6,6 +6,10 @@
|
||||
* http://www.boost.org/LICENSE_1_0.txt)
|
||||
*/
|
||||
|
||||
#include <boost/parser/config.hpp>
|
||||
#if BOOST_PARSER_USE_CONCEPTS
|
||||
#include <boost/parser/lexer.hpp>
|
||||
#endif
|
||||
#include <boost/parser/parser.hpp>
|
||||
|
||||
|
||||
@@ -26,6 +30,17 @@ struct globals_t
|
||||
|
||||
globals_t const globals;
|
||||
|
||||
enum class unprintable_tokens { foo, bar };
|
||||
enum class printable_tokens { foo, bar };
|
||||
std::ostream & operator<<(std::ostream & os, printable_tokens tok)
|
||||
{
|
||||
switch (tok) {
|
||||
case printable_tokens::foo: os << "foo"; break;
|
||||
case printable_tokens::bar: os << "bar"; break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
auto i = [](auto & ctx) { return _globals(ctx).i; };
|
||||
auto i2 = [](auto & ctx) { return _globals(ctx).i2; };
|
||||
auto u = [](auto & ctx) { return _globals(ctx).u; };
|
||||
@@ -484,4 +499,64 @@ int main()
|
||||
|
||||
PARSE_CHAR32(float_);
|
||||
PARSE_CHAR32(double_);
|
||||
|
||||
#if BOOST_PARSER_USE_CONCEPTS
|
||||
{
|
||||
std::cout << "\n\n"
|
||||
<< "----------------------------------------\n"
|
||||
<< "| unprintable_foo (token_spec) |\n"
|
||||
<< "----------------------------------------\n";
|
||||
|
||||
constexpr auto unprintable_foo =
|
||||
token_spec<"\\w\\w\\w", unprintable_tokens::foo>;
|
||||
constexpr auto unprintable_lexer =
|
||||
lexer<char, unprintable_tokens> | unprintable_foo;
|
||||
|
||||
std::cout << "token_spec<\"\\w\\w\\w\", unprintable_tokens::foo>:\n";
|
||||
parse(str | to_tokens(unprintable_lexer), unprintable_foo, trace::on);
|
||||
|
||||
std::cout
|
||||
<< "token_spec<\"\\w\\w\\w\", unprintable_tokens::foo>(\"foo\"):\n";
|
||||
parse(
|
||||
str | to_tokens(unprintable_lexer),
|
||||
unprintable_foo("foo"),
|
||||
trace::on);
|
||||
}
|
||||
|
||||
{
|
||||
std::cout << "\n\n"
|
||||
<< "----------------------------------------\n"
|
||||
<< "| printable_foo (token_spec) |\n"
|
||||
<< "----------------------------------------\n";
|
||||
|
||||
constexpr auto printable_foo =
|
||||
token_spec<"\\w\\w\\w", printable_tokens::foo>;
|
||||
constexpr auto printable_lexer =
|
||||
lexer<char, printable_tokens> | printable_foo;
|
||||
|
||||
std::cout << "token_spec<\"\\w\\w\\w\", printable_tokens::foo>:\n";
|
||||
parse(str | to_tokens(printable_lexer), printable_foo, trace::on);
|
||||
|
||||
std::cout
|
||||
<< "token_spec<\"\\w\\w\\w\", printable_tokens::foo>(\"bar\"):\n";
|
||||
parse(
|
||||
str | to_tokens(printable_lexer), printable_foo("bar"), trace::on);
|
||||
}
|
||||
|
||||
{
|
||||
std::cout << "\n\n"
|
||||
<< "----------------------------------------\n"
|
||||
<< "| int_foo (token_spec) |\n"
|
||||
<< "----------------------------------------\n";
|
||||
|
||||
constexpr auto int_foo = token_spec<"\\w\\w\\w", 42, int>;
|
||||
constexpr auto int_lexer = lexer<char, int> | int_foo;
|
||||
|
||||
std::cout << "token_spec<\"\\w\\w\\w\", 42, int>:\n";
|
||||
parse(str | to_tokens(int_lexer), int_foo, trace::on);
|
||||
|
||||
std::cout << "token_spec<\"\\w\\w\\w\", 42, int>(13):\n";
|
||||
parse(str | to_tokens(int_lexer), int_foo(13), trace::on);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user