diff --git a/doc/html/BOOST_PARSER_ALGO_CON_id34.html b/doc/html/BOOST_PARSER_ALGO_CON_id34.html new file mode 100644 index 00000000..dc72dfb9 --- /dev/null +++ b/doc/html/BOOST_PARSER_ALGO_CON_id34.html @@ -0,0 +1,38 @@ + +
+ +BOOST_PARSER_ALGO_CONSTEXPR
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_ALGO_CONSTEXPRBOOST_PARSER_ASSERT
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_ASSERT(condition)Asserts that the given condition is true. If BOOST_PARSER_NO_RUNTIME_ASSERTIONS macro is defined by the user, BOOST_PARSER_ASSERT expends to a compile-time static_assert(). Otherwise, it expands to a run-time BOOST_ASSERT(). Note that defining BOOST_DISABLE_ASSERTS disables the use of C assert, even when BOOST_ASSERT is unavailble.
BOOST_PARSER_CONSTEXPR
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_CONSTEXPRBOOST_PARSER_DEFINE_RULES
+// In header: <boost/parser/parser.hpp>
+
+BOOST_PARSER_DEFINE_RULES(...)For each given token t, defines a pair of parse_rule() overloads, used internally within Boost.Parser. Each such pair implements the parsing behavior rule t, using the parser t_def. This implementation is in the form of a pair of function templates. You should therefore write this macro only at namespace scope.
BOOST_PARSER_DIAGNOSTIC_PUSH
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_DIAGNOSTIC_PUSHBOOST_PARSER_DIAGNOSTIC_POP
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_DIAGNOSTIC_POPBOOST_PARSER_DISABLE_CONCEPTS
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_DISABLE_CONCEPTSBOOST_PARSER_MAX_AGGREGATE_SIZE
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_MAX_AGGREGATE_SIZEBoost.Parser automatically treats aggregate structs as if they were tuples. It uses some metaprogramming to do this. The technique used has a hard limit on the number of data members a struct can have. Re-define this macro to change the hard limit. Note that large values may increase compile times.
+BOOST_PARSER_NO_RUNTIME_ASSERTIONS
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_NO_RUNTIME_ASSERTIONSBoost.Parser uses assertions (BOOST_ASSERT()) in several places to indicate that your use of the library has an error in it. All of those places could heve instead been ill-formed code, caught at compile time. It is far quicker and easier to determine exactly where in your code such an error is located if this is a runtime failure; you can just look at the stack in your favorite debugger. However, if you want to make thes kinds of errors always ill-formed code, define this macro.
BOOST_PARSER_SUBRANGE
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_SUBRANGEThe subrange template that is used throughout Boost.Parser. This will be boost::parser::subrange in C++17 builds, and std::ranges::subrange in all other builds.
BOOST_PARSER_TOKEN_POSITION_TYPE
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_TOKEN_POSITION_TYPEBOOST_PARSER_TRACE_OSTREAM
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_TRACE_OSTREAMBOOST_PARSER_TRACE_TO_VS_OUTPUT
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_TRACE_TO_VS_OUTPUTIf you are using Visual Studio to run your program, and don't have a terminal in which to observe the output when parsing with trace::on, define this macro and you'll see the trace output in the Visual Studio debugger's output panel. This macro has no effect when _MSC_VER is not also defined.
BOOST_PARSER_USE_CONCEPTS
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_USE_CONCEPTSBOOST_PARSER_USE_HANA_TUPLE
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_USE_HANA_TUPLEBOOST_PARSER_USE_STD_TUPLE
+// In header: <boost/parser/config.hpp>
+
+BOOST_PARSER_USE_STD_TUPLEboost::parser::Inf
+// In header: <boost/parser/parser.hpp> + +int64_t const Inf;
boost::parser::_attr
+// In header: <boost/parser/parser_fwd.hpp> + + +template<typename Context> decltype(auto) _attr(Context const & context);
boost::parser::_begin
+// In header: <boost/parser/parser_fwd.hpp> + + +template<typename Context> decltype(auto) _begin(Context const & context);
boost::parser::_end
+// In header: <boost/parser/parser_fwd.hpp> + + +template<typename Context> decltype(auto) _end(Context const & context);
boost::parser::_error_handler
+// In header: <boost/parser/parser_fwd.hpp> + + +template<typename Context> + decltype(auto) _error_handler(Context const & context);
boost::parser::_globals
+// In header: <boost/parser/parser_fwd.hpp> + + +template<typename Context> decltype(auto) _globals(Context const & context);
boost::parser::_locals
+// In header: <boost/parser/parser_fwd.hpp> + + +template<typename Context> decltype(auto) _locals(Context const & context);
boost::parser::_p
+// In header: <boost/parser/parser.hpp> + +unspecified _p;
boost::parser::_params
+// In header: <boost/parser/parser_fwd.hpp> + + +template<typename Context> decltype(auto) _params(Context const & context);
boost::parser::_pass
+// In header: <boost/parser/parser_fwd.hpp> + + +template<typename Context> decltype(auto) _pass(Context const & context);
boost::parser::_val
+// In header: <boost/parser/parser_fwd.hpp> + + +decltype(auto) _val(Context const & context);
Returns a reference to the attribute(s) (i.e. return value) of the bottommost parser; multiple attributes will be stored within a parser::tuple. You may write to this value in a semantic action to control what attribute value(s) the associated parser produces. Returns none if the bottommost parser does produce an attribute.
boost::parser::_where
+// In header: <boost/parser/parser_fwd.hpp> + + +template<typename Context> decltype(auto) _where(Context const & context);
boost::parser::action_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Parser, typename Action> +struct action_parser { +};
boost::parser::as_utf16
+// In header: <boost/parser/transcode_view.hpp> + +constexpr auto as_utf16;
boost::parser::as_utf32
+// In header: <boost/parser/transcode_view.hpp> + +constexpr auto as_utf32;
boost::parser::as_utf8
+// In header: <boost/parser/transcode_view.hpp> + +constexpr auto as_utf8;
boost::parser::attr
+// In header: <boost/parser/parser.hpp> + + +template<typename Attribute> constexpr auto attr(Attribute a);
Returns an which matches anything, and consumes no input, and which produces attr_parsera as its attribute.
boost::parser::attr_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Attribute> +struct attr_parser { +};
boost::parser::attribute
+// In header: <boost/parser/parser.hpp> + +template<typename R, typename Parser> +struct attribute { + // types + typedef unspecified initial_type; + typedef unspecified type; +};
A type trait that evaluates to the attribute type for parser Parser used to parse range R, as if by calling parse(r, parser), using some R r and Parser parser. Note that this implies that pointers to null-terminated strings are supported types for R. The result is not wrapped in a std::optional like the result of a call to parse() would be. If Parser produces no attribute, the result is the no-attribute sentinel type none.
attribute_t
+// In header: <boost/parser/parser_fwd.hpp> + + +typedef typename attribute< R, Parser >::type attribute_t;
boost::parser::bin
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< uint_parser< unsigned int, 2 > > bin;
boost::parser::blank
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< ws_parser< false, true > > blank;
The whitespace parser that does not match end-of-line. This matches any one of the Unicode code points with the White_Space property, as defined in https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt, except for the ones matched by eol. Produces no attribute.
boost::parser::bool_
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< bool_parser > bool_;
boost::parser::callback_error_handler
+// In header: <boost/parser/error_handling.hpp> + + +struct callback_error_handler { + // types + typedef std::function< void(std::string const &)> callback_type; + + // construct/copy/destruct + callback_error_handler(); + callback_error_handler(callback_type, callback_type = callback_type()); + callback_error_handler(std::string_view, callback_type, + callback_type = callback_type()); + callback_error_handler(std::wstring_view, callback_type, + callback_type = callback_type()); + + // public member functions + template<typename Iter, typename Sentinel, + template< class > class Exception> + error_handler_result + operator()(Iter, Sentinel, Exception< Iter > const &) const; + template<typename Context, typename Iter> + void diagnose(diagnostic_kind, std::string_view, Context const &, Iter) const; + template<typename Context> + void diagnose(diagnostic_kind, std::string_view, Context const &) const; + + // public data members + callback_type error_; + callback_type warning_; + std::string filename_; +};
An error handler that allows users to supply callbacks to handle the reporting of warnings and errors. The reporting of errors and/or warnings can be suppressed by supplying one or both default-constructed callbacks.
+callback_error_handler
+ public
+ construct/copy/destructcallback_error_handler();
callback_error_handler(callback_type error, + callback_type warning = callback_type());
callback_error_handler(std::string_view filename, callback_type error, + callback_type warning = callback_type());
callback_error_handler(std::wstring_view filename, callback_type error, + callback_type warning = callback_type());+
This overload is Windows-only.
+callback_error_handler public member functionstemplate<typename Iter, typename Sentinel, template< class > class Exception> + error_handler_result + operator()(Iter first, Sentinel last, Exception< Iter > const & e) const;
template<typename Context, typename Iter> + void diagnose(diagnostic_kind kind, std::string_view message, + Context const & context, Iter it) const;
template<typename Context> + void diagnose(diagnostic_kind kind, std::string_view message, + Context const & context) const;
boost::parser::callback_parse
+// In header: <boost/parser/parser.hpp> + + +template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler, typename Callbacks> + bool callback_parse(R const & r, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + Callbacks const & callbacks, + trace trace_mode = trace::off);
Parses r using parser, and returns whether the parse was successful. The entire input range r must be consumed for the parse to be considered successful. When a callback rule r is successful during the parse, one of two things happens: 1) if r has an attribute, callbacks(tag, x) will be called (where tag is decltype(r)::tag_type{}, and x is the attribute produced by r); or 2) if r has no attribute, callbacks(tag) will be called. Callbacks is expected to be an invocable with the correct overloads required to support all successful rule parses that might occur. If trace_mode == trace::on, a verbose trace of the parse will be streamed to std::cout.
+
+Template Parameters: |
+
|
+
boost::parser::callback_parse
+// In header: <boost/parser/parser.hpp> + + +template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser, typename Callbacks> + bool callback_parse(R const & r, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + parser_interface< SkipParser > const & skip, + Callbacks const & callbacks, + trace trace_mode = trace::off);
Parses r using parser, skipping all input recognized by skip between the application of any two parsers, and returns whether the parse was successful. The entire input range r must be consumed for the parse to be considered successful. When a callback rule r is successful during the parse, one of two things happens: 1) if r has an attribute, callbacks(tag, x) will be called (where tag is decltype(r)::tag_type{}, and x is the attribute produced by r); or 2) if r has no attribute, callbacks(tag) will be called. Callbacks is expected to be an invocable with the correct overloads required to support all successful rule parses that might occur. If trace_mode == trace::on, a verbose trace of the parse will be streamed to std::cout.
+
+Template Parameters: |
+
|
+
boost::parser::callback_prefix_parse
+// In header: <boost/parser/parser.hpp> + + +template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler, typename Callbacks> + bool callback_prefix_parse(I & first, S last, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + Callbacks const & callbacks, + trace trace_mode = trace::off);
Parses [first, last) using parser, and returns whether the parse was successful. When a callback rule r is successful during the parse, one of two things happens: 1) if r has an attribute, callbacks(tag, x) will be called (where tag is decltype(r)::tag_type{}, and x is the attribute produced by r); or 2) if r has no attribute, callbacks(tag) will be called. Callbacks is expected to be an invocable with the correct overloads required to support all successful rule parses that might occur. If trace_mode == trace::on, a verbose trace of the parse will be streamed to std::cout.
boost::parser::callback_prefix_parse
+// In header: <boost/parser/parser.hpp> + + +template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler, typename SkipParser, + typename Callbacks> + bool callback_prefix_parse(I & first, S last, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + parser_interface< SkipParser > const & skip, + Callbacks const & callbacks, + trace trace_mode = trace::off);
Parses [first, last) using parser, skipping all input recognized by skip between the application of any two parsers, and returns whether the parse was successful. When a callback rule r is successful during the parse, one of two things happens: 1) if r has an attribute, callbacks(tag, x) will be called (where tag is decltype(r)::tag_type{}, and x is the attribute produced by r); or 2) if r has no attribute, callbacks(tag) will be called. Callbacks is expected to be an invocable with the correct overloads required to support all successful rule parses that might occur. If trace_mode == trace::on, a verbose trace of the parse will be streamed to std::cout.
boost::parser::callback_rule
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename TagType, typename Attribute = no_attribute, + typename LocalState = no_local_state, + typename ParamsTuple = no_params> +struct callback_rule { +};
A type used to declare named parsing rules that support reporting of attributes via callback. The TagType template parameter is used to associate a particular rule with the rule_parser used during parsing.
boost::parser::char_
+// In header: <boost/parser/parser.hpp> + +unspecified char_;
The single-character parser. The produced attribute is the type of the matched code point (char or char32_t). Used as-is, char_ matches any code point. char_ can also can be used to create code point parsers that match one or more specific code point values, by calling it with: a single value comparable to a code point; a closed range of code point values [lo, hi], or a set of code point values passed as a range. When calling with a range, only the iterators that bound the range are stored. Make sure the range you pass outlives the use of the resulting parser. Note that a string literal is a range, and that it outlives any parser it is used to construct.
boost::parser::char_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Expected, typename AttributeType = void> +struct char_parser { +};
Matches a single code point. If AttributeType is not void, AttributeType is the attribute type produced; otherwise, the attribute type is the decayed type of the matched code point. The parse fails only if the parser is constructed with a specific set of expected code point values that does not include the matched code point.
boost::parser::char_set_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Tag> +struct char_set_parser { +};
boost::parser::char_subrange_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Tag> +struct char_subrange_parser { +};
Matches a single code point that falls into one of the subranges of code points associated with tag type Tag. This is used to create sets of characters for matching Unicode character classes like hex digits or control characters. Attribute type is the attribute type of the character being matched.
boost::parser::character_id
+// In header: <boost/parser/parser_fwd.hpp> + +constexpr int character_id;
boost::parser::control
+// In header: <boost/parser/parser.hpp> + +unspecified control;
boost::parser::cp
+// In header: <boost/parser/parser.hpp> + +unspecified cp;
The code point parser. It produces a char32_t attribute. Used as-is, cp matches any code point. cp can also can be used to create code point parsers that match one or more specific code point values, by calling it with: a single value comparable to a code point; a closed range of code point values [lo, hi], or a set of code point values passed as a range. When calling with a range, only the iterators that bound the range are stored. Make sure the range you pass outlives the use of the resulting parser. Note that a string literal is a range, and that it outlives any parser it is used to construct.
boost::parser::cu
+// In header: <boost/parser/parser.hpp> + +unspecified cu;
The code unit parser. It produces a char attribute. Used as-is, cu matches any code point. cu can also can be used to create code point parsers that match one or more specific code point values, by calling it with: a single value comparable to a code point; a closed range of code point values [lo, hi], or a set of code point values passed as a range. When calling with a range, only the iterators that bound the range are stored. Make sure the range you pass outlives the use of the resulting parser. Note that a string literal is a range, and that it outlives any parser it is used to construct.
boost::parser::default_error_handler
+// In header: <boost/parser/error_handling_fwd.hpp> + + +struct default_error_handler { + // construct/copy/destruct + default_error_handler() = default; + + // public member functions + template<typename Iter, typename Sentinel, + template< class > class Exception> + error_handler_result + operator()(Iter, Sentinel, Exception< Iter > const &) const; + template<typename Context, typename Iter> + void diagnose(diagnostic_kind, std::string_view, Context const &, Iter) const; + template<typename Context> + void diagnose(diagnostic_kind, std::string_view, Context const &) const; +};
The error handler used when the user does not specify a custom one. This error handler prints warnings and errors to std::cerr, and does not have an associcated filename.
default_error_handler public member functionstemplate<typename Iter, typename Sentinel, template< class > class Exception> + error_handler_result + operator()(Iter first, Sentinel last, Exception< Iter > const & e) const;+
Handles a or parse_error exception thrown during parsing/lexing. A formatted parse-expectation failure is printed to lex_errorstd::cerr. Always returns error_handler_result::fail.
template<typename Context, typename Iter> + void diagnose(diagnostic_kind kind, std::string_view message, + Context const & context, Iter it) const;+
Prints message to std::cerr. The diagnostic is printed with the given kind, indicating the location as being at it. This must be called within a parser semantic action, providing the parse context.
template<typename Context> + void diagnose(diagnostic_kind kind, std::string_view message, + Context const & context) const;+
Prints message to std::cerr. The diagnostic is printed with the given kind, at no particular location. This must be called within a parser semantic action, providing the parse context.
boost::parser::delimited_seq_parser
+// In header: <boost/parser/parser.hpp> + +template<typename Parser, typename DelimiterParser> +struct delimited_seq_parser : + public boost::parser::repeat_parser< Parser, DelimiterParser > +{ + // construct/copy/destruct + delimited_seq_parser(Parser, DelimiterParser); +};
Repeats the application of another parser p of type Parser, [1, Inf) times, applying a parser d of type DelimiterParser in between each pair of applications of p. The parse succeeds iff p succeeds at least once, and d succeeds each time it is applied. The attribute produced is a sequence of the type of attribute produced by Parser.
boost::parser::diagnostic_kind
+// In header: <boost/parser/error_handling_fwd.hpp> + + + +enum diagnostic_kind { error, warning };
boost::parser::digit
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< digit_parser > digit;
boost::parser::directive
+// In header: <boost/parser/parser.hpp> + +template<template< class > class Parser> +struct directive { + + // public member functions + template<typename Parser2> + constexpr auto operator[](parser_interface< Parser2 >) const noexcept; +};
Represents a unparameterized higher-order parser (e.g. omit_parser) as a directive (e.g. omit[other_parser]).
directive public member functionstemplate<typename Parser2> + constexpr auto operator[](parser_interface< Parser2 > rhs) const noexcept;
boost::parser::double_
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< float_parser< double > > double_;
boost::parser::enable_optional
+// In header: <boost/parser/parser_fwd.hpp> + +constexpr bool enable_optional;
boost::parser::enable_variant
+// In header: <boost/parser/parser_fwd.hpp> + +constexpr bool enable_variant;
boost::parser::eoi
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< eoi_parser > eoi;
boost::parser::eol
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< ws_parser< true, false > > eol;
The end-of-line parser. This matches "\r\n", or any one of the line break code points from the Unicode Line Break Algorithm, described in https://unicode.org/reports/tr14. Produces no attribute.
+boost::parser::eps
+// In header: <boost/parser/parser.hpp> + +unspecified eps;
boost::parser::eps_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Predicate> +struct eps_parser { +};
boost::parser::error_handler_result
+// In header: <boost/parser/error_handling_fwd.hpp> + + + +enum error_handler_result { fail, rethrow };
boost::parser::expect_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Parser, bool FailOnMatch> +struct expect_parser { +};
boost::parser::find_line_end
+// In header: <boost/parser/error_handling.hpp> + + +template<typename Iter, typename Sentinel> + Iter find_line_end(Iter it, Sentinel last);
boost::parser::find_line_position
+// In header: <boost/parser/error_handling.hpp> + + +template<typename Iter> + line_position< Iter > find_line_position(Iter first, Iter it);
Returns the for line_positionit, counting lines from the beginning of the input first. Requires non-token iterators.
boost::parser::float_
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< float_parser< float > > float_;
boost::parser::float_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename T> +struct float_parser { +};
boost::parser::get
+// In header: <boost/parser/tuple.hpp> + + +template<typename T, typename U, U I> + constexpr decltype(auto) get(T && x, integral_constant< U, I > i);
boost::parser::hex
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< uint_parser< unsigned int, 16 > > hex;
boost::parser::hex_digit
+// In header: <boost/parser/parser.hpp> + +unspecified hex_digit;
boost::parser::if_
+// In header: <boost/parser/parser.hpp> + + +template<typename Predicate> constexpr auto if_(Predicate pred);
Returns an that fails if the given predicate if_directivepred is false, and otherwise, applies another parser. For instance, in if_(pred)[p], p is only applied if pred is true.
boost::parser::if_directive
+// In header: <boost/parser/parser.hpp> + +template<typename Predicate> +struct if_directive { + + // public member functions + template<typename Parser2> + constexpr auto operator[](parser_interface< Parser2 >) const noexcept; + + // public data members + Predicate pred_; +};
Represents a sequence parser, the first parser of which is an epsilon_parser with predicate, as a directive (e.g. if_(pred)[p]).
if_directive public member functionstemplate<typename Parser2> + constexpr auto operator[](parser_interface< Parser2 > rhs) const noexcept;
boost::parser::int_
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< int_parser< int > > int_;
boost::parser::int_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename T, int Radix = 10, int MinDigits = 1, int MaxDigits = -1, + typename Expected = detail::nope> +struct int_parser { +};
Matches a signed number of radix Radix, of at least MinDigits and at most MaxDigits, producing an attribute of type T. Fails on any other input. The parse will also fail if Expected is anything but detail::nope (which it is by default), and the produced attribute is not equal to expected_. Radix must be one of 2, 8, 10, or 16.
integral_constant
+// In header: <boost/parser/tuple.hpp> + + +typedef hana::integral_constant< T, I > integral_constant;
boost::parser::is_token_v
+// In header: <boost/parser/parser_fwd.hpp> + +constexpr bool is_token_v;
A type trait that evaluates to true iff T is a specialization of boost::parser::token.
boost::parser::lex_error
+// In header: <boost/parser/error_handling_fwd.hpp> + +template<typename Iter> +struct lex_error : public std::runtime_error { + // construct/copy/destruct + lex_error(Iter, std::string); + + // public member functions + char const * what() const noexcept; + + // public data members + std::string message; + Iter iter; +};
boost::parser::lexeme
+// In header: <boost/parser/parser.hpp> + +constexpr directive< lexeme_parser > lexeme;
The lexeme directive, whose operator[] returns a parser_interface<lexeme_parser<P>> from a given parser of type parser_interface<P>.
boost::parser::lexeme_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Parser> +struct lexeme_parser { +};
boost::parser::lexer
+// In header: <boost/parser/lexer.hpp> + +constexpr auto lexer;
boost::parser::lexer_t
+// In header: <boost/parser/lexer.hpp> + +template<typename CharType, typename ID, ctll::fixed_string WsStr = "\\s+", + ctll::fixed_string RegexStr = "", + unspecified IDs = detail::nttp_array<-1>{}, + unspecified Specs = detail::nttp_array<detail::parse_spec{}>{}> +struct lexer_t { + // types + typedef ID id_type; + typedef token< CharType > token_type; + typedef std::basic_string_view< CharType > string_view; + + // public static functions + static constexpr size_t size(); + static constexpr auto ids(); + static constexpr auto specs(); + template<parsable_range V> static constexpr auto regex_range(V &); + + // public member functions + template<ctll::fixed_string RegexStr2, auto ID2, typename ValueType, + int Base> + constexpr auto operator|(unspecified) const; + template<auto Ch, auto... Chs> constexpr auto operator|(unspecified) const; + + // public data members + static constexpr ctll::fixed_string ws_str; + static constexpr ctll::fixed_string regex_str; + static constexpr bool has_ws; +};
boost::parser::line_position
+// In header: <boost/parser/error_handling_fwd.hpp> + +template<typename Iter> +struct line_position { + + // public data members + Iter line_start; + int64_t line_number; + int64_t column_number; +};
boost::parser::lit
+// In header: <boost/parser/parser.hpp> + + +constexpr auto lit(char8_t c);
boost::parser::lit
+// In header: <boost/parser/parser.hpp> + + +constexpr auto lit(char32_t c);
boost::parser::lit
+// In header: <boost/parser/parser.hpp> + + +template<parsable_range_like R> constexpr auto lit(R && str);
boost::parser::lit
+// In header: <boost/parser/parser.hpp> + + +constexpr auto lit(char c);
boost::parser::literals::operator""_c
+// In header: <boost/parser/tuple.hpp> + + +template<char... chars> constexpr auto operator""_c();
boost::parser::literals::operator""_l
+// In header: <boost/parser/parser.hpp> + + +constexpr auto operator""_l(char c);
boost::parser::literals::operator""_l
+// In header: <boost/parser/parser.hpp> + + +constexpr auto operator""_l(char8_t c);
boost::parser::literals::operator""_l
+// In header: <boost/parser/parser.hpp> + + +constexpr auto operator""_l(char32_t c);
boost::parser::literals::operator""_l
+// In header: <boost/parser/parser.hpp> + + +constexpr auto operator""_l(char const * str, std::size_t);
boost::parser::literals::operator""_l
+// In header: <boost/parser/parser.hpp> + + +constexpr auto operator""_l(char8_t const * str, std::size_t);
boost::parser::literals::operator""_l
+// In header: <boost/parser/parser.hpp> + + +constexpr auto operator""_l(char32_t const * str, std::size_t);
boost::parser::literals::operator""_p
+// In header: <boost/parser/parser.hpp> + + +constexpr auto operator""_p(char c);
boost::parser::literals::operator""_p
+// In header: <boost/parser/parser.hpp> + + +constexpr auto operator""_p(char8_t c);
boost::parser::literals::operator""_p
+// In header: <boost/parser/parser.hpp> + + +constexpr auto operator""_p(char32_t c);
boost::parser::literals::operator""_p
+// In header: <boost/parser/parser.hpp> + + +constexpr auto operator""_p(char const * str, std::size_t);
boost::parser::literals::operator""_p
+// In header: <boost/parser/parser.hpp> + + +constexpr auto operator""_p(char8_t const * str, std::size_t);
boost::parser::literals::operator""_p
+// In header: <boost/parser/parser.hpp> + + +constexpr auto operator""_p(char32_t const * str, std::size_t);
llong
+// In header: <boost/parser/tuple.hpp> + + +typedef integral_constant< long long, I > llong;
boost::parser::long_
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< int_parser< long > > long_;
boost::parser::long_long
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< int_parser< long long > > long_long;
boost::parser::lower
+// In header: <boost/parser/parser.hpp> + +unspecified lower;
boost::parser::make_subrange
+// In header: <boost/parser/subrange.hpp> + + +template<std::forward_iterator I, std::sentinel_for< I > S = I> + constexpr subrange< I, S > make_subrange(I first, S last);
boost::parser::merge
+// In header: <boost/parser/parser.hpp> + +constexpr merge_directive merge;
The merge_directive, whose operator[] returns a parser_interface<P2>, from a given parser of type parser_interface<P>, where P is a seq_parser. P2 is the same as P, except that its CombiningGroups template parameter is replaced with a tag type that causes the subparser's attributes to be merged into a single attribute.
boost::parser::merge_directive
+// In header: <boost/parser/parser.hpp> + + +struct merge_directive { + + // public member functions + template<typename ParserTuple, typename BacktrackingTuple, + typename CombiningGroups> + constexpr auto + operator[](parser_interface< seq_parser< ParserTuple, BacktrackingTuple, CombiningGroups > >) const noexcept; +};
A directive type that can only be used on sequence parsers, that forces the merge of all the sequence_parser's subparser's attributes into a single attribute.
+merge_directive public member functionstemplate<typename ParserTuple, typename BacktrackingTuple, + typename CombiningGroups> + constexpr auto + operator[](parser_interface< seq_parser< ParserTuple, BacktrackingTuple, CombiningGroups > > rhs) const noexcept;
boost::parser::no_case
+// In header: <boost/parser/parser.hpp> + +constexpr directive< no_case_parser > no_case;
The no_case directive, whose operator[] returns a parser_interface<no_case_parser<P>> from a given parser of type parser_interface<P>.
boost::parser::no_case_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Parser> +struct no_case_parser { +};
boost::parser::no_ws
+// In header: <boost/parser/lexer.hpp> + +constexpr ctll::fixed_string no_ws;
boost::parser::none
+// In header: <boost/parser/parser.hpp> + + +struct none { + // construct/copy/destruct + none() = default; + template<typename T> none(T const &); + template<typename T> none & operator=(T const &); + + // public member functions + template<typename T> operator T() const; + none operator+() const; + none operator-() const; + none operator*() const; + none operator~() const; + none operator&() const; + none operator!() const; + none operator++(); + none & operator++(int); + none operator--(); + none operator--(int); + template<typename T> none operator<<(T const &) const; + template<typename T> none operator>>(T const &) const; + template<typename T> none operator*(T const &) const; + template<typename T> none operator/(T const &) const; + template<typename T> none operator%(T const &) const; + template<typename T> none operator+(T const &) const; + template<typename T> none operator-(T const &) const; + template<typename T> none operator<(T const &) const; + template<typename T> none operator>(T const &) const; + template<typename T> none operator<=(T const &) const; + template<typename T> none operator>=(T const &) const; + template<typename T> none operator==(T const &) const; + template<typename T> none operator!=(T const &) const; + template<typename T> none operator||(T const &) const; + template<typename T> none operator&&(T const &) const; + template<typename T> none operator&(T const &) const; + template<typename T> none operator|(T const &) const; + template<typename T> none operator^(T const &) const; + template<typename T> none operator,(T const &) const; + template<typename T> none operator->*(T const &) const; + template<typename T> none operator<<=(T const &); + template<typename T> none operator>>=(T const &); + template<typename T> none operator*=(T const &); + template<typename T> none operator/=(T const &); + template<typename T> none operator%=(T const &); + template<typename T> none operator+=(T const &); + template<typename T> none operator-=(T const &); + template<typename T> none operator&=(T const &); + template<typename T> none operator|=(T const &); + template<typename T> none operator^=(T const &); + template<typename T> none operator[](T const &) const; + template<typename... Args> none operator()(Args const &) const; + void fail() const; +};
none
+ public
+ construct/copy/destructnone() = default;
template<typename T> none(T const &);
template<typename T> none & operator=(T const &);
none public member functionstemplate<typename T> operator T() const;
none operator+() const;
none operator-() const;
none operator*() const;
none operator~() const;
none operator&() const;
none operator!() const;
none operator++();
none & operator++(int);
none operator--();
none operator--(int);
template<typename T> none operator<<(T const &) const;
template<typename T> none operator>>(T const &) const;
template<typename T> none operator*(T const &) const;
template<typename T> none operator/(T const &) const;
template<typename T> none operator%(T const &) const;
template<typename T> none operator+(T const &) const;
template<typename T> none operator-(T const &) const;
template<typename T> none operator<(T const &) const;
template<typename T> none operator>(T const &) const;
template<typename T> none operator<=(T const &) const;
template<typename T> none operator>=(T const &) const;
template<typename T> none operator==(T const &) const;
template<typename T> none operator!=(T const &) const;
template<typename T> none operator||(T const &) const;
template<typename T> none operator&&(T const &) const;
template<typename T> none operator&(T const &) const;
template<typename T> none operator|(T const &) const;
template<typename T> none operator^(T const &) const;
template<typename T> none operator,(T const &) const;
template<typename T> none operator->*(T const &) const;
template<typename T> none operator<<=(T const &);
template<typename T> none operator>>=(T const &);
template<typename T> none operator*=(T const &);
template<typename T> none operator/=(T const &);
template<typename T> none operator%=(T const &);
template<typename T> none operator+=(T const &);
template<typename T> none operator-=(T const &);
template<typename T> none operator&=(T const &);
template<typename T> none operator|=(T const &);
template<typename T> none operator^=(T const &);
template<typename T> none operator[](T const &) const;
template<typename... Args> none operator()(Args const & ...) const;
void fail() const;
boost::parser::normalize_iterators
+// In header: <boost/parser/error_handling.hpp> + + +template<typename I, typename S> + auto normalize_iterators(I first, I curr, S last);
Returns a tuple of three iterators (corresponding to first, curr, and last) that are suitable for use in the other error handling functions, many of which require iterators into the undelying sequence being parsed. For non-token parsing cases, this is effectively a no-op; the given iterators are simply returned as-is.
boost::parser::normalize_iterators
+// In header: <boost/parser/error_handling.hpp> + + +template<typename I, typename S> + auto normalize_iterators(I first, parse_error< I > e, S last);
Returns a tuple of three iterators (corresponding to first, the iterator captured in e, and last) that are suitable for use in the other error handling functions, many of which require iterators into the undelying sequence being parsed. For non-token parsing cases, this is effectively a no-op; the given iterators are simply returned as-is.
boost::parser::normalize_iterators
+// In header: <boost/parser/error_handling.hpp> + + +template<typename I, typename S> + auto normalize_iterators(I first, lex_error< I > e, S last);
Returns a tuple of three iterators (corresponding to first, the iterator captured in e, and last) that are suitable for use in the other error handling functions, many of which require iterators into the undelying sequence being parsed. For non-token parsing cases, this is effectively a no-op; the given iterators are simply returned as-is.
null_sentinel_t
+// In header: <boost/parser/parser_fwd.hpp> + + +typedef unspecified null_sentinel_t;
boost::parser::null_term
+// In header: <boost/parser/parser_fwd.hpp> + + +template<typename CharT> constexpr auto null_term(CharT * ptr);
boost::parser::oct
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< uint_parser< unsigned int, 8 > > oct;
boost::parser::omit
+// In header: <boost/parser/parser.hpp> + +constexpr directive< omit_parser > omit;
The omit directive, whose operator[] returns a parser_interface<omit_parser<P>> from a given parser of type parser_interface<P>.
boost::parser::omit_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Parser> +struct omit_parser { +};
boost::parser::one_plus_parser
+// In header: <boost/parser/parser.hpp> + +template<typename Parser> +struct one_plus_parser : public boost::parser::repeat_parser< Parser > { + // construct/copy/destruct + one_plus_parser(Parser); +};
boost::parser::operator%
+// In header: <boost/parser/parser.hpp> + + +template<typename Parser> + constexpr auto operator%(char c, parser_interface< Parser > rhs);
boost::parser::operator%
+// In header: <boost/parser/parser.hpp> + + +template<typename Parser> + constexpr auto operator%(char32_t c, parser_interface< Parser > rhs);
boost::parser::operator%
+// In header: <boost/parser/parser.hpp> + + +template<parsable_range_like R, typename Parser> + constexpr auto operator%(R && r, parser_interface< Parser > rhs);
boost::parser::operator-
+// In header: <boost/parser/parser.hpp> + + +template<typename Parser> + constexpr auto operator-(char c, parser_interface< Parser > rhs);
boost::parser::operator-
+// In header: <boost/parser/parser.hpp> + + +template<typename Parser> + constexpr auto operator-(char32_t c, parser_interface< Parser > rhs);
boost::parser::operator-
+// In header: <boost/parser/parser.hpp> + + +template<parsable_range_like R, typename Parser> + constexpr auto operator-(R && r, parser_interface< Parser > rhs);
boost::parser::operator>>
+// In header: <boost/parser/parser.hpp> + + +template<typename Parser> + constexpr auto operator>>(char c, parser_interface< Parser > rhs);
boost::parser::operator>>
+// In header: <boost/parser/parser.hpp> + + +template<typename Parser> + constexpr auto operator>>(char32_t c, parser_interface< Parser > rhs);
boost::parser::operator>>
+// In header: <boost/parser/parser.hpp> + + +template<parsable_range_like R, typename Parser> + constexpr auto operator>>(R && r, parser_interface< Parser > rhs);
boost::parser::operator>
+// In header: <boost/parser/parser.hpp> + + +template<typename Parser> + constexpr auto operator>(char c, parser_interface< Parser > rhs);
boost::parser::operator>
+// In header: <boost/parser/parser.hpp> + + +template<typename Parser> + constexpr auto operator>(char32_t c, parser_interface< Parser > rhs);
boost::parser::operator>
+// In header: <boost/parser/parser.hpp> + + +template<parsable_range_like R, typename Parser> + constexpr auto operator>(R && r, parser_interface< Parser > rhs);
boost::parser::operator|
+// In header: <boost/parser/parser.hpp> + + +template<typename Parser> + constexpr auto operator|(char c, parser_interface< Parser > rhs);
boost::parser::operator|
+// In header: <boost/parser/parser.hpp> + + +template<typename Parser> + constexpr auto operator|(char32_t c, parser_interface< Parser > rhs);
boost::parser::operator|
+// In header: <boost/parser/parser.hpp> + + +template<parsable_range_like R, typename Parser> + constexpr auto operator|(R && r, parser_interface< Parser > rhs);
boost::parser::opt_parser
+// In header: <boost/parser/parser.hpp> + +template<typename Parser> +struct opt_parser { + + // public member functions + template<typename Iter, typename Sentinel, typename Context, + typename SkipParser> + auto call(Iter &, Sentinel, Context const &, SkipParser const &, + unspecified, bool &) const; + template<typename Iter, typename Sentinel, typename Context, + typename SkipParser, typename Attribute> + void call(Iter &, Sentinel, Context const &, SkipParser const &, + unspecified, bool &, Attribute &) const; + + // public data members + Parser parser_; +};
Repeats the application of another parser of type Parser, [0, 1] times. The parse always succeeds. The attribute produced is a std::optional<T>, where T is the type of attribute produced by Parser.
opt_parser public member functionstemplate<typename Iter, typename Sentinel, typename Context, + typename SkipParser> + auto call(Iter & first, Sentinel last, Context const & context, + SkipParser const & skip, unspecified flags, bool & success) const;
template<typename Iter, typename Sentinel, typename Context, + typename SkipParser, typename Attribute> + void call(Iter & first, Sentinel last, Context const & context, + SkipParser const & skip, unspecified flags, bool & success, + Attribute & retval) const;
boost::parser::or_parser
+// In header: <boost/parser/parser.hpp> + +template<typename ParserTuple> +struct or_parser { + // construct/copy/destruct + or_parser(ParserTuple); + + // public member functions + template<typename Iter, typename Sentinel, typename Context, + typename SkipParser> + auto call(Iter &, Sentinel, Context const &, SkipParser const &, + unspecified, bool &) const; + template<typename Iter, typename Sentinel, typename Context, + typename SkipParser, typename Attribute> + void call(Iter &, Sentinel, Context const &, SkipParser const &, + unspecified, bool &, Attribute &) const; + + // public data members + ParserTuple parsers_; +};
Applies each parser in ParserTuple, in order, stopping after the application of the first one that succeeds. The parse succeeds iff one of the sub-parsers succeeds. The attribute produced is a std::variant over the types of attribute produced by the parsers in ParserTuple.
or_parser public member functionstemplate<typename Iter, typename Sentinel, typename Context, + typename SkipParser> + auto call(Iter & first, Sentinel last, Context const & context, + SkipParser const & skip, unspecified flags, bool & success) const;
template<typename Iter, typename Sentinel, typename Context, + typename SkipParser, typename Attribute> + void call(Iter & first, Sentinel last, Context const & context, + SkipParser const & skip, unspecified flags, bool & success, + Attribute & retval) const;
boost::parser::parse_error
+// In header: <boost/parser/error_handling_fwd.hpp> + +template<typename Iter> +struct parse_error : public std::runtime_error { + // construct/copy/destruct + parse_error(Iter, std::string); + + // public member functions + char const * what() const noexcept; + + // public data members + std::string message; + Iter iter; +};
boost::parser::parse
+// In header: <boost/parser/parser.hpp> + + +template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler, typename Attr> + bool parse(R const & r, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + Attr & attr, trace trace_mode = trace::off);
Parses r using parser, and returns whether the parse was successful. The entire input range r must be consumed for the parse to be considered successful. On success, attr will be assigned the value of the attribute produced by parser. If trace_mode == trace::on, a verbose trace of the parse will be streamed to std::cout.
+
+Template Parameters: |
+
|
+
boost::parser::parse
+// In header: <boost/parser/parser.hpp> + + +template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler> + auto parse(R const & r, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + trace trace_mode = trace::off);
Parses r using parser. Returns a std::optional containing the attribute produced by parser on parse success, and std::nullopt on parse failure. The entire input range r must be consumed for the parse to be considered successful. If trace_mode == trace::on, a verbose trace of the parse will be streamed to std::cout.
+
+Template Parameters: |
+
|
+
boost::parser::parse
+// In header: <boost/parser/parser.hpp> + + +template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser, typename Attr> + bool parse(R const & r, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + parser_interface< SkipParser > const & skip, Attr & attr, + trace trace_mode = trace::off);
Parses r using parser, skipping all input recognized by skip between the application of any two parsers, and returns whether the parse was successful. The entire input range r must be consumed for the parse to be considered successful. On success, attr will be assigned the value of the attribute produced by parser. If trace_mode == trace::on, a verbose trace of the parse will be streamed to std::cout.
+
+Template Parameters: |
+
|
+
boost::parser::parse
+// In header: <boost/parser/parser.hpp> + + +template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser> + auto parse(R const & r, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + parser_interface< SkipParser > const & skip, + trace trace_mode = trace::off);
Parses r using parser, skipping all input recognized by skip between the application of any two parsers. Returns a std::optional containing the attribute produced by parser on parse success, and std::nullopt on parse failure. The entire input range r must be consumed for the parse to be considered successful. If trace_mode == trace::on, a verbose trace of the parse will be streamed to std::cout.
+
+Template Parameters: |
+
|
+
boost::parser::parser_interface
+// In header: <boost/parser/parser.hpp> + +template<typename Parser, typename GlobalState, typename ErrorHandler> +struct parser_interface { + // types + typedef Parser parser_type; + typedef GlobalState global_state_type; + typedef ErrorHandler error_handler_type; + typedef int parser_interface_derivation_tag; + + // construct/copy/destruct + parser_interface(); + parser_interface(parser_type); + parser_interface(parser_type, global_state_type, error_handler_type); + + // public member functions + constexpr auto operator!() const noexcept; + constexpr auto operator&() const noexcept; + constexpr auto operator*() const noexcept; + constexpr auto operator+() const noexcept; + constexpr auto operator-() const noexcept; + template<typename ParserType2> + constexpr auto operator>>(parser_interface< ParserType2 >) const noexcept; + constexpr auto operator>>(char) const noexcept; + constexpr auto operator>>(char32_t) const noexcept; + template<parsable_range_like R> + constexpr auto operator>>(R &&) const noexcept; + template<typename ParserType2> + constexpr auto operator>(parser_interface< ParserType2 >) const noexcept; + constexpr auto operator>(char) const noexcept; + constexpr auto operator>(char32_t) const noexcept; + template<parsable_range_like R> + constexpr auto operator>(R &&) const noexcept; + template<typename ParserType2> + constexpr auto operator|(parser_interface< ParserType2 >) const noexcept; + template<typename ParserType2> + constexpr auto operator||(parser_interface< ParserType2 >) const noexcept; + constexpr auto operator|(char) const noexcept; + constexpr auto operator|(char32_t) const noexcept; + template<parsable_range_like R> + constexpr auto operator|(R &&) const noexcept; + template<typename ParserType2> + constexpr auto operator-(parser_interface< ParserType2 >) const noexcept; + constexpr auto operator-(char) const noexcept; + constexpr auto operator-(char32_t) const noexcept; + template<parsable_range_like R> + constexpr auto operator-(R &&) const noexcept; + template<typename ParserType2> + constexpr auto operator%(parser_interface< ParserType2 >) const noexcept; + constexpr auto operator%(char) const noexcept; + constexpr auto operator%(char32_t) const noexcept; + template<parsable_range_like R> + constexpr auto operator%(R &&) const noexcept; + template<typename Action> constexpr auto operator[](Action) const; + template<typename Arg, typename... Args> + constexpr auto operator()(Arg &&, Args &&...) const noexcept; +};
A wrapper for parsers that provides the operations that must be supported by all parsers (e.g. operator>>()). GlobalState is an optional state object that can be accessed within semantic actions via a call to _globals(). This global state object is ignored for all but the topmost parser; the topmost global state object is available in the semantic actions of all nested parsers. ErrorHandler is the type of the error handler to be used on parse failure. This handler is ignored on all but the topmost parser; the topmost parser's error handler is used for all errors encountered during parsing.
parser_interface public member functionsconstexpr auto operator!() const noexcept;+
Returns a containing a parser equivalent to an parser_interface containing expect_parserparser_, with FailOnMatch == true.
constexpr auto operator&() const noexcept;+
Returns a containing a parser equivalent to an parser_interface containing expect_parserparser_, with FailOnMatch == false.
constexpr auto operator*() const noexcept;+
Returns a containing a parser equivalent to a parser_interface containing zero_plus_parserparser_.
constexpr auto operator+() const noexcept;+
Returns a containing a parser equivalent to a parser_interface containing one_plus_parserparser_.
constexpr auto operator-() const noexcept;+
Returns a containing a parser equivalent to a parser_interface containing opt_parserparser_.
template<typename ParserType2> + constexpr auto + operator>>(parser_interface< ParserType2 > rhs) const noexcept;+
Returns a containing a parser equivalent to a parser_interface containing seq_parserparser_ followed by rhs.parser_.
constexpr auto operator>>(char rhs) const noexcept;+
Returns a containing a parser equivalent to a parser_interface containing seq_parserparser_ followed by lit(rhs).
constexpr auto operator>>(char32_t rhs) const noexcept;+
Returns a containing a parser equivalent to a parser_interface containing seq_parserparser_ followed by lit(rhs).
template<parsable_range_like R> + constexpr auto operator>>(R && r) const noexcept;+
Returns a containing a parser equivalent to a parser_interface containing seq_parserparser_ followed by lit(rhs).
template<typename ParserType2> + constexpr auto operator>(parser_interface< ParserType2 > rhs) const noexcept;+
Returns a containing a parser equivalent to a parser_interface containing seq_parserparser_ followed by rhs.parser_. No back-tracking is allowed after parser_ succeeds; if rhs.parser_ fails after parser_ succeeds, the top-level parse fails.
constexpr auto operator>(char rhs) const noexcept;+
Returns a containing a parser equivalent to a parser_interface containing seq_parserparser_ followed by lit(rhs). No back-tracking is allowed after parser_ succeeds; if lit(rhs) fails after parser_ succeeds, the top-level parse fails.
constexpr auto operator>(char32_t rhs) const noexcept;+
Returns a containing a parser equivalent to a parser_interface containing seq_parserparser_ followed by lit(rhs). No back-tracking is allowed after parser_ succeeds; if lit(rhs) fails after parser_ succeeds, the top-level parse fails.
template<parsable_range_like R> + constexpr auto operator>(R && r) const noexcept;+
Returns a containing a parser equivalent to a parser_interface containing seq_parserparser_ followed by lit(rhs). No back-tracking is allowed after parser_ succeeds; if lit(rhs) fails after parser_ succeeds, the top-level parse fails.
template<typename ParserType2> + constexpr auto operator|(parser_interface< ParserType2 > rhs) const noexcept;+
Returns a containing a parser equivalent to an parser_interface containing or_parserparser_ followed by rhs.parser_.
template<typename ParserType2> + constexpr auto + operator||(parser_interface< ParserType2 > rhs) const noexcept;+
Returns a containing a parser equivalent to a parser_interface containing perm_parserparser_ followed by rhs.parser_. It is an error to use eps (conditional or not) with this operator.
constexpr auto operator|(char rhs) const noexcept;+
Returns a containing a parser equivalent to an parser_interface containing or_parserparser_ followed by lit(rhs).
constexpr auto operator|(char32_t rhs) const noexcept;+
Returns a containing a parser equivalent to an parser_interface containing or_parserparser_ followed by lit(rhs).
template<parsable_range_like R> + constexpr auto operator|(R && r) const noexcept;+
Returns a containing a parser equivalent to an parser_interface containing or_parserparser_ followed by lit(rhs).
template<typename ParserType2> + constexpr auto operator-(parser_interface< ParserType2 > rhs) const noexcept;+
Returns a containing a parser equivalent to parser_interface!rhs >> *this.
constexpr auto operator-(char rhs) const noexcept;+
Returns a containing a parser equivalent to parser_interface!lit(rhs) >> *this.
constexpr auto operator-(char32_t rhs) const noexcept;+
Returns a containing a parser equivalent to parser_interface!lit(rhs) >> *this.
template<parsable_range_like R> + constexpr auto operator-(R && r) const noexcept;+
Returns a containing a parser equivalent to parser_interface!lit(rhs) >> *this.
template<typename ParserType2> + constexpr auto operator%(parser_interface< ParserType2 > rhs) const noexcept;+
Returns a containing a parser equivalent to an parser_interface containing delimited_seq_parserparser_ and rhs.parser_.
constexpr auto operator%(char rhs) const noexcept;+
Returns a containing a parser equivalent to an parser_interface containing delimited_seq_parserparser_ and lit(rhs).
constexpr auto operator%(char32_t rhs) const noexcept;+
Returns a containing a parser equivalent to an parser_interface containing delimited_seq_parserparser_ and lit(rhs).
template<parsable_range_like R> + constexpr auto operator%(R && r) const noexcept;+
Returns a containing a parser equivalent to an parser_interface containing delimited_seq_parserparser_ and lit(rhs).
template<typename Action> constexpr auto operator[](Action action) const;+
Returns a containing a parser equivalent to an parser_interface containing action_parserparser_, with semantic action action.
template<typename Arg, typename... Args> + constexpr auto operator()(Arg && arg, Args &&... args) const noexcept;+
Returns parser_((Arg &&)arg, (Args &&)args...). This is useful for those parsers that have operator() overloads, e.g. <lsquo></lsquo>char_('x<rsquo></rsquo>)<lsquo></lsquo>. By convention, parsers<rsquo></rsquo> operator()s return s.parser_interface
This function does not participate in overload resolution unless parser_((Arg &&)arg, (Args &&)args...) is well-formed.
boost::parser::perm_parser
+// In header: <boost/parser/parser.hpp> + +template<typename ParserTuple> +struct perm_parser { + // construct/copy/destruct + perm_parser(ParserTuple); + + // public member functions + template<typename Iter, typename Sentinel, typename Context, + typename SkipParser> + auto call(Iter &, Sentinel, Context const &, SkipParser const &, + unspecified, bool &) const; + template<typename Iter, typename Sentinel, typename Context, + typename SkipParser, typename Attribute> + void call(Iter &, Sentinel, Context const &, SkipParser const &, + unspecified, bool &, Attribute &) const; + template<typename Iter, typename Sentinel, typename Context, + typename SkipParser, typename... Ts, int... Is> + void call_impl(Iter &, Sentinel, Context const &, SkipParser const &, + unspecified, bool &, tuple< Ts... > &, + std::integer_sequence< int, Is... >) const; + + // public data members + ParserTuple parsers_; +};
Applies each parsers in ParserTuple, an any order, stopping after all of them have matched the input. The parse succeeds iff all the parsers match, regardless of the order in which they do. The attribute produced is a parser::tuple containing the attributes of the subparsers, in their order of the parsers' appearance in ParserTuple, not the order of the parsers' matches. It is an error to specialize perm_parser with a ParserTuple template parameter that includes an eps_parser.
perm_parser public member functionstemplate<typename Iter, typename Sentinel, typename Context, + typename SkipParser> + auto call(Iter & first_, Sentinel last, Context const & context, + SkipParser const & skip, unspecified flags, bool & success) const;
template<typename Iter, typename Sentinel, typename Context, + typename SkipParser, typename Attribute> + void call(Iter & first_, Sentinel last, Context const & context, + SkipParser const & skip, unspecified flags, bool & success, + Attribute & retval) const;
template<typename Iter, typename Sentinel, typename Context, + typename SkipParser, typename... Ts, int... Is> + void call_impl(Iter & first, Sentinel last, Context const & context, + SkipParser const & skip, unspecified flags, bool & success, + tuple< Ts... > & retval, + std::integer_sequence< int, Is... >) const;
boost::parser::prefix_parse
+// In header: <boost/parser/parser.hpp> + + +template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler, typename Attr> + bool prefix_parse(I & first, S last, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + Attr & attr, trace trace_mode = trace::off);
Parses [first, last) using parser, and returns whether the parse was successful. On success, attr will be assigned the value of the attribute produced by parser. If trace_mode == trace::on, a verbose trace of the parse will be streamed to std::cout.
+
+Template Parameters: |
+
|
+
boost::parser::prefix_parse
+// In header: <boost/parser/parser.hpp> + + +template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler> + auto prefix_parse(I & first, S last, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + trace trace_mode = trace::off);
boost::parser::prefix_parse
+// In header: <boost/parser/parser.hpp> + + +template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler, typename SkipParser, + typename Attr> + bool prefix_parse(I & first, S last, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + parser_interface< SkipParser > const & skip, Attr & attr, + trace trace_mode = trace::off);
Parses [first, last) using parser, skipping all input recognized by skip between the application of any two parsers, and returns whether the parse was successful. On success, attr will be assigned the value of the attribute produced by parser. If trace_mode == trace::on, a verbose trace of the parse will be streamed to std::cout.
boost::parser::prefix_parse
+// In header: <boost/parser/parser.hpp> + + +template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler, typename SkipParser> + auto prefix_parse(I & first, S last, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + parser_interface< SkipParser > const & skip, + trace trace_mode = trace::off);
Parses [first, last) using parser, skipping all input recognized by skip between the application of any two parsers. Returns a std::optional containing the attribute produced by parser on parse success, and std::nullopt on parse failure. If trace_mode == trace::on, a verbose trace of the parse will be streamed to std::cout.
boost::parser::punct
+// In header: <boost/parser/parser.hpp> + +unspecified punct;
boost::parser::quoted_string
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< quoted_string_parser<> > quoted_string;
Parses a string delimited by quotation marks. This parser can be used to create parsers that accept one or more specific quotation mark characters. By default, the quotation marks are <lsquo></lsquo><rsquo></rsquo>"'; an alternate quotation mark can be specified by calling this parser with a single character, or a range of characters. If a range is specified, the opening quote must be one of the characters specified, and the closing quote must match the opening quote. Quotation marks may appear within the string if escaped with a backslash, and a pair of backslashes is treated as a single escaped backslash; all other backslashes cause the parse to fail, unless a symbol table is in use. A symbol table can be provided as a second parameter after the single character or range described above. The symbol table is used to recognize escape sequences. Each escape sequence is a backslash followed by a value in the symbol table. When using a symbol table, any backslash that is not followed by another backslash, the opening quote character, or a symbol from the symbol table will cause the parse to fail. Skipping is disabled during parsing of the entire quoted string, including the quotation marks. There is an expectation point before the closing quotation mark. Produces astd::string` attribute.
boost::parser::quoted_string_parser
+// In header: <boost/parser/parser.hpp> + +template<typename Quotes, typename Escapes> +struct quoted_string_parser { + + // public member functions + constexpr quoted_string_parser() return + parser_interface(quoted_string_parser(std::move(x))); + template<parsable_range_like R> + constexpr auto operator()(R &&) const noexcept; + template<typename T, typename U> + auto operator()(T, symbols< U > const &) const noexcept; + template<parsable_range_like R, typename T> + auto operator()(R &&, symbols< T > const &) const noexcept; + + // public data members + Quotes chs_; + Escapes escapes_; + char32_t ch_; +};
Matches a string delimited by quotation marks; produces a std::string attribute.
quoted_string_parser public member functionsconstexpr quoted_string_parser() return +parser_interface(quoted_string_parser(std::move(x)));
template<parsable_range_like R> + constexpr auto operator()(R && r) const noexcept;+
Returns a containing a parser_interface that accepts any of the values in quoted_string_parserr as its quotation marks. If the input being matched during the parse is a a sequence of char32_t, the elements of r are transcoded from their presumed encoding to UTF-32 during the comparison. Otherwise, the character being matched is directly compared to the elements of r.
template<typename T, typename U> + auto operator()(T x, symbols< U > const & escapes) const noexcept;+
Returns a containing a parser_interface that uses quoted_string_parserx as its quotation marks. symbols provides a list of strings that may appear after a backslash to form an escape sequence, and what character(s) each escape sequence represents. Note that "\\"</tt> and <tt>"\ch" are always valid escape sequences.
template<parsable_range_like R, typename T> + auto operator()(R && r, symbols< T > const & escapes) const noexcept;+
Returns a containing a parser_interface that accepts any of the values in quoted_string_parserr as its quotation marks. If the input being matched during the parse is a a sequence of char32_t, the elements of r are transcoded from their presumed encoding to UTF-32 during the comparison. Otherwise, the character being matched is directly compared to the elements of r. symbols provides a list of strings that may appear after a backslash to form an escape sequence, and what character(s) each escape sequence represents. Note that "\\"</tt> and <tt>"\ch" are always valid escape sequences.
boost::parser::raw
+// In header: <boost/parser/parser.hpp> + +constexpr directive< raw_parser > raw;
The raw directive, whose operator[] returns a parser_interface<raw_parser<P>> from a given parser of type parser_interface<P>.
boost::parser::raw_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Parser> +struct raw_parser { +};
boost::parser::repeat_directive
+// In header: <boost/parser/parser.hpp> + +template<typename MinType, typename MaxType> +struct repeat_directive { + + // public member functions + template<typename Parser2> + constexpr auto operator[](parser_interface< Parser2 >) const noexcept; + + // public data members + MinType min_; + MaxType max_; +};
Represents a repeat_parser as a directive (e.g. repeat[other_parser]).
repeat_directive public member functionstemplate<typename Parser2> + constexpr auto operator[](parser_interface< Parser2 > rhs) const noexcept;
boost::parser::repeat
+// In header: <boost/parser/parser.hpp> + + +template<typename T> constexpr repeat_directive< T, T > repeat(T n);
Returns a that repeats exactly repeat_directiven times, and whose operator[] returns a from a given parser of type parser_interface<repeat_parser<P>>. parser_interface<P>
boost::parser::repeat
+// In header: <boost/parser/parser.hpp> + + +template<typename MinType, typename MaxType> + constexpr repeat_directive< MinType, MaxType > + repeat(MinType min_, MaxType max_);
Returns a that repeats between repeat_directivemin_ and max_ times, inclusive, and whose operator[] returns a from a given parser of type parser_interface<repeat_parser<P>>. parser_interface<P>
boost::parser::repeat_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Parser, typename DelimiterParser = detail::nope, + typename MinType = int64_t, typename MaxType = int64_t> +struct repeat_parser { +};
Repeats the application of another parser p of type Parser, optionally applying another parser d of type DelimiterParser in between each pair of applications of p. The parse succeeds if p succeeds at least the minumum number of times, and d succeeds each time it is applied. The attribute produced is a sequence of the type of attribute produced by Parser.
boost::parser::replace
+// In header: <boost/parser/replace.hpp> + +unspecified replace;
A range adaptor object ([range.adaptor.object]). Given subexpressions E and P, Q, R, and 'S', each of the expressions replace(E,
+P), replace(E, P, Q). replace(E, P, Q, R), and replace(E, P, Q,
+R, S) are expression-equivalent to replace_view(E, P), replace_view(E, P, Q), replace_view(E, P, Q, R), replace_view(E,
+P, Q, R, S), respectively.
boost::parser::replace_view
+// In header: <boost/parser/replace.hpp> + +template<std::ranges::viewable_range V, + std::ranges::viewable_range ReplacementV, typename Parser, + typename GlobalState, typename ErrorHandler, typename SkipParser> +struct replace_view { + // member classes/structs/unions + template<bool Const> + struct iterator { + // types + typedef unspecified I; + typedef unspecified S; + typedef unspecified ref_t_iter; + typedef BOOST_PARSER_SUBRANGE< ref_t_iter > reference_type; + typedef unspecified base_type; + + // construct/copy/destruct + iterator() = default; + iterator(unspecified); + + // public member functions + constexpr iterator & operator++(); + constexpr reference_type operator*() const; + }; + template<bool Const> + struct sentinel { + }; + // construct/copy/destruct + replace_view() = default; + replace_view(V, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + parser_interface< SkipParser > const &, ReplacementV, + trace = trace::off); + replace_view(V, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + ReplacementV, trace = trace::off); + + // public member functions + constexpr V base() const; + constexpr V base(); + constexpr V replacement() const; + constexpr V replacement(); + constexpr auto begin(); + constexpr auto end(); + constexpr auto begin() const; + constexpr auto end() const; +};
Produces a range of subranges of a given range base. Each subrange is either a subrange of base that does not match the given parser parser, or is the given replacement for a match, replacement.
In addition to the template parameter constraints, V and ReplacementV must be ranges of char, or must have the same UTF format, and V and ReplacementV must meet the same compatibility requirements as described in std::ranges::join_view.
replace_view
+ public
+ construct/copy/destructreplace_view() = default;
replace_view(V base, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + parser_interface< SkipParser > const & skip, + ReplacementV replacement, trace trace_mode = trace::off);
replace_view(V base, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + ReplacementV replacement, trace trace_mode = trace::off);
boost::parser::replace_view::iterator
+// In header: <boost/parser/replace.hpp> + + +template<bool Const> +struct iterator { + // types + typedef unspecified I; + typedef unspecified S; + typedef unspecified ref_t_iter; + typedef BOOST_PARSER_SUBRANGE< ref_t_iter > reference_type; + typedef unspecified base_type; + + // construct/copy/destruct + iterator() = default; + iterator(unspecified); + + // public member functions + constexpr iterator & operator++(); + constexpr reference_type operator*() const; +};
iterator public member functionsconstexpr iterator & operator++();
constexpr reference_type operator*() const;
boost::parser::replace_view::sentinel
+// In header: <boost/parser/replace.hpp> + + +template<bool Const> +struct sentinel { +};
boost::parser::_report_error
+// In header: <boost/parser/parser_fwd.hpp> + + +template<std::forward_iterator I, typename Context> + void _report_error(Context const & context, std::string_view message, + I location);
boost::parser::_report_error
+// In header: <boost/parser/parser_fwd.hpp> + + +template<typename Context> + void _report_error(Context const & context, std::string_view message);
boost::parser::_report_warning
+// In header: <boost/parser/parser_fwd.hpp> + + +template<std::forward_iterator I, typename Context> + void _report_warning(Context const & context, std::string_view message, + I location);
boost::parser::_report_warning
+// In header: <boost/parser/parser_fwd.hpp> + + +template<typename Context> + void _report_warning(Context const & context, std::string_view message);
boost::parser::rethrow_error_handler
+// In header: <boost/parser/error_handling.hpp> + + +struct rethrow_error_handler { + + // public member functions + template<typename Iter, typename Sentinel, + template< class > class Exception> + error_handler_result + operator()(Iter, Sentinel, Exception< Iter > const &) const; + template<typename Context, typename Iter> + void diagnose(diagnostic_kind, std::string_view, Context const &, Iter) const; + template<typename Context> + void diagnose(diagnostic_kind, std::string_view, Context const &) const; +};
rethrow_error_handler public member functionstemplate<typename Iter, typename Sentinel, template< class > class Exception> + error_handler_result + operator()(Iter first, Sentinel last, Exception< Iter > const & e) const;
template<typename Context, typename Iter> + void diagnose(diagnostic_kind kind, std::string_view message, + Context const & context, Iter it) const;
template<typename Context> + void diagnose(diagnostic_kind kind, std::string_view message, + Context const & context) const;
boost::parser::rule
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename TagType, typename Attribute = no_attribute, + typename LocalState = no_local_state, + typename ParamsTuple = no_params> +struct rule { +};
A type used to declare named parsing rules. The TagType template parameter is used to associate a particular rule with the rule_parser used during parsing.
boost::parser::rule_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<bool CanUseCallbacks, typename TagType, typename Attribute, + typename LocalState, typename ParamsTuple> +struct rule_parser { +};
Applies another parser p, associated with this parser via TagType. The attribute produced is Attribute. Both a default-constructed object of type LocalState, and a default-constructed object of type ParamsTuple, are added to the parse context before the associated parser is applied. The parse succeeds iff p succeeds. If CanUseCallbacks is true, and if this parser is used within a call to callback_parse(), the attribute is produced via callback; otherwise, the attribute is produced as normal (as a return value, or as an out-param). The rule may be constructed with user-friendly diagnostic text that will appear if the top-level parse is executed with trace_mode == boost::parser::trace::on.
boost::parser::search_all
+// In header: <boost/parser/search.hpp> + +unspecified search_all;
A range adaptor object ([range.adaptor.object]). Given subexpressions E and P, Q, and R, each of the expressions search_all(E, P), search_all(E, P, Q), and search_all(E, P, Q, R) are expression-equivalent to search_all_view(E, P), search_all_view(E,
+P, Q), and search_all_view(E, P, Q, R), respectively.
boost::parser::search_all_view
+// In header: <boost/parser/search.hpp> + +template<std::ranges::viewable_range V, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser> +struct search_all_view { + // member classes/structs/unions + template<bool Const> + struct iterator { + // types + typedef unspecified I; + typedef unspecified S; + typedef unspecified base_type; + + // construct/copy/destruct + iterator() = default; + iterator(unspecified); + + // public member functions + constexpr iterator & operator++(); + constexpr BOOST_PARSER_SUBRANGE< I > operator*() const; + }; + template<bool Const> + struct sentinel { + }; + // construct/copy/destruct + search_all_view() = default; + search_all_view(V, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + parser_interface< SkipParser > const &, trace = trace::off); + search_all_view(V, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + trace = trace::off); + + // public member functions + constexpr V base() const; + constexpr V base(); + constexpr auto begin(); + constexpr auto end(); + constexpr auto begin() const; + constexpr auto end() const; +};
Produces a sequence of subranges of the underlying sequence of type V. Each subrange is a nonoverlapping match of the given parser, using a skip-parser if provided.
search_all_view
+ public
+ construct/copy/destructsearch_all_view() = default;
search_all_view(V base, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + parser_interface< SkipParser > const & skip, + trace trace_mode = trace::off);
search_all_view(V base, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + trace trace_mode = trace::off);
boost::parser::search_all_view::iterator
+// In header: <boost/parser/search.hpp> + + +template<bool Const> +struct iterator { + // types + typedef unspecified I; + typedef unspecified S; + typedef unspecified base_type; + + // construct/copy/destruct + iterator() = default; + iterator(unspecified); + + // public member functions + constexpr iterator & operator++(); + constexpr BOOST_PARSER_SUBRANGE< I > operator*() const; +};
iterator public member functionsconstexpr iterator & operator++();
constexpr BOOST_PARSER_SUBRANGE< I > operator*() const;
boost::parser::search_all_view::sentinel
+// In header: <boost/parser/search.hpp> + + +template<bool Const> +struct sentinel { +};
boost::parser::search
+// In header: <boost/parser/search.hpp> + + +template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser> + auto search(R && r, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + parser_interface< SkipParser > const & skip, + trace trace_mode = trace::off);
boost::parser::search
+// In header: <boost/parser/search.hpp> + + +template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename SkipParser, typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler> + auto search(I first, S last, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + parser_interface< SkipParser > const & skip, + trace trace_mode = trace::off);
boost::parser::search
+// In header: <boost/parser/search.hpp> + + +template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler> + auto search(R && r, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + trace trace_mode = trace::off);
boost::parser::search
+// In header: <boost/parser/search.hpp> + + +template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler> + auto search(I first, S last, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + trace trace_mode = trace::off);
boost::parser::separate
+// In header: <boost/parser/parser.hpp> + +constexpr separate_directive separate;
The separate_directive, whose operator[] returns a parser_interface<P2>, from a given parser of type parser_interface<P>, where P is a seq_parser. P2 is the same as P, except that its CombiningGroups template parameter is replaced with a tag type that prevents each subparser's attribute from merging with any other subparser's attribute.
boost::parser::separate_directive
+// In header: <boost/parser/parser.hpp> + + +struct separate_directive { + + // public member functions + template<typename ParserTuple, typename BacktrackingTuple, + typename CombiningGroups> + constexpr auto + operator[](parser_interface< seq_parser< ParserTuple, BacktrackingTuple, CombiningGroups > >) const noexcept; +};
A directive type that can only be used on sequence parsers, that prevents each of the sequence_parser's subparser's attributes from merging with any other subparser's attribute.
+separate_directive public member functionstemplate<typename ParserTuple, typename BacktrackingTuple, + typename CombiningGroups> + constexpr auto + operator[](parser_interface< seq_parser< ParserTuple, BacktrackingTuple, CombiningGroups > > rhs) const noexcept;
boost::parser::seq_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename ParserTuple, typename BacktrackingTuple, + typename CombiningGroups> +struct seq_parser { +};
Applies each parser in ParserTuple, in order. The parse succeeds iff all of the sub-parsers succeed. The attribute produced is a std::tuple over the types of attribute produced by the parsers in ParserTuple. The BacktrackingTuple template parameter is a parser::tuple of std::bool_constant values. The ith such value indicates whether backtracking is allowed if the ith parser fails.
boost::parser::short_
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< int_parser< short > > short_;
boost::parser::skip
+// In header: <boost/parser/parser.hpp> + +constexpr skip_directive skip;
The skip_directive, whose operator[] returns a parser_interface<skip_parser<P>> from a given parser of type parser_interface<P>.
boost::parser::skip_directive
+// In header: <boost/parser/parser.hpp> + +template<typename SkipParser = detail::nope> +struct skip_directive { + + // public member functions + template<typename Parser> + constexpr auto operator[](parser_interface< Parser >) const noexcept; + template<typename SkipParser2> + constexpr auto operator()(parser_interface< SkipParser2 >) const noexcept; + + // public data members + SkipParser skip_parser_; +};
Represents a skip parser as a directive. When used without a skip parser, e.g. skip[parser_in_which_to_do_skipping], the skipper for the entire parse is used. When given another parser, e.g. skip(skip_parser)[parser_in_which_to_do_skipping], that other parser is used as the skipper within the directive.
skip_directive public member functionstemplate<typename Parser> + constexpr auto operator[](parser_interface< Parser > rhs) const noexcept;
template<typename SkipParser2> + constexpr auto + operator()(parser_interface< SkipParser2 > skip_parser) const noexcept;+
Returns a with skip_directive as its skipper. skip_parser
boost::parser::skip_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Parser, typename SkipParser = detail::nope> +struct skip_parser { +};
boost::parser::sorted
+// In header: <boost/parser/parser_fwd.hpp> + +constexpr sorted_t sorted;
boost::parser::sorted_t
+// In header: <boost/parser/parser_fwd.hpp> + + +struct sorted_t { +};
boost::parser::split
+// In header: <boost/parser/split.hpp> + +unspecified split;
boost::parser::split_view
+// In header: <boost/parser/split.hpp> + +template<std::ranges::viewable_range V, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser> +struct split_view { + // member classes/structs/unions + template<bool Const> + struct iterator { + // types + typedef unspecified I; + typedef unspecified S; + typedef unspecified base_type; + + // construct/copy/destruct + iterator() = default; + iterator(unspecified); + + // public member functions + constexpr iterator & operator++(); + constexpr BOOST_PARSER_SUBRANGE< I > operator*() const; + }; + template<bool Const> + struct sentinel { + }; + // construct/copy/destruct + split_view() = default; + split_view(V, parser_interface< Parser, GlobalState, ErrorHandler > const &, + parser_interface< SkipParser > const &, trace = trace::off); + split_view(V, parser_interface< Parser, GlobalState, ErrorHandler > const &, + trace = trace::off); + + // public member functions + constexpr V base() const; + constexpr V base(); + constexpr auto begin(); + constexpr auto end(); + constexpr auto begin() const; + constexpr auto end() const; +};
Produces a sequence of subranges of the underlying sequence of type V. the underlying sequence is split into subranges delimited by matches of the given parser, possibly using a given skip-parser.
split_view
+ public
+ construct/copy/destructsplit_view() = default;
split_view(V base, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + parser_interface< SkipParser > const & skip, + trace trace_mode = trace::off);
split_view(V base, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + trace trace_mode = trace::off);
boost::parser::split_view::iterator
+// In header: <boost/parser/split.hpp> + + +template<bool Const> +struct iterator { + // types + typedef unspecified I; + typedef unspecified S; + typedef unspecified base_type; + + // construct/copy/destruct + iterator() = default; + iterator(unspecified); + + // public member functions + constexpr iterator & operator++(); + constexpr BOOST_PARSER_SUBRANGE< I > operator*() const; +};
iterator public member functionsconstexpr iterator & operator++();
constexpr BOOST_PARSER_SUBRANGE< I > operator*() const;
boost::parser::split_view::sentinel
+// In header: <boost/parser/split.hpp> + + +template<bool Const> +struct sentinel { +};
boost::parser::stream_error_handler
+// In header: <boost/parser/error_handling_fwd.hpp> + + +struct stream_error_handler { + // construct/copy/destruct + stream_error_handler(); + stream_error_handler(std::string_view); + stream_error_handler(std::string_view, std::ostream &); + stream_error_handler(std::string_view, std::ostream &, std::ostream &); + stream_error_handler(std::wstring_view); + stream_error_handler(std::wstring_view, std::ostream &); + stream_error_handler(std::wstring_view, std::ostream &, std::ostream &); + + // public member functions + template<typename Iter, typename Sentinel, + template< class > class Exception> + error_handler_result + operator()(Iter, Sentinel, Exception< Iter > const &) const; + template<typename Context, typename Iter> + void diagnose(diagnostic_kind, std::string_view, Context const &, Iter) const; + template<typename Context> + void diagnose(diagnostic_kind, std::string_view, Context const &) const; +};
Prints warnings and errors to the std::ostreams provided by the user, or std::cerr if neither stream is specified. If a filename is provided, that is used to print all diagnostics.
stream_error_handler
+ public
+ construct/copy/destructstream_error_handler();
stream_error_handler(std::string_view filename);
stream_error_handler(std::string_view filename, std::ostream & errors);
stream_error_handler(std::string_view filename, std::ostream & errors, + std::ostream & warnings);
stream_error_handler(std::wstring_view filename);+
This overload is Windows-only.
+stream_error_handler(std::wstring_view filename, std::ostream & errors);+
This overload is Windows-only.
+stream_error_handler(std::wstring_view filename, std::ostream & errors, + std::ostream & warnings);+
This overload is Windows-only.
+stream_error_handler public member functionstemplate<typename Iter, typename Sentinel, template< class > class Exception> + error_handler_result + operator()(Iter first, Sentinel last, Exception< Iter > const & e) const;+
Handles a exception thrown during parsing. A formatted parse-expectation failure is printed to parse_error*err_os_ when err_os_ is non-null, or std::cerr otherwise. Always returns error_handler_result::fail.
template<typename Context, typename Iter> + void diagnose(diagnostic_kind kind, std::string_view message, + Context const & context, Iter it) const;+
Let std::ostream * s = kind == diagnostic_kind::error : err_os_ : warn_os_; prints message to *s when s is non-null, or std::cerr otherwise. The diagnostic is printed with the given kind, indicating the location as being at it. This must be called within a parser semantic action, providing the parse context.
template<typename Context> + void diagnose(diagnostic_kind kind, std::string_view message, + Context const & context) const;+
Let std::ostream * s = kind == diagnostic_kind::error : err_os_ : warn_os_; prints message to *s when s is non-null, or std::cerr otherwise. The diagnostic is printed with the given kind, at no particular location. This must be called within a parser semantic action, providing the parse context.
boost::parser::string
+// In header: <boost/parser/parser.hpp> + + +template<parsable_range_like R> constexpr auto string(R && str);
boost::parser::string_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename StrIter, typename StrSentinel> +struct string_parser { +};
boost::parser::string_view
+// In header: <boost/parser/parser.hpp> + +constexpr directive< string_view_parser > string_view;
The string_view directive, whose operator[] returns a parser_interface<string_view_parser<P>> from a given parser of type parser_interface<P>. This is only available in C++20 and later.
boost::parser::string_view_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Parser> +struct string_view_parser { +};
Applies the given parser p of type Parser. Regardless of the attribute produced by Parser, this parser's attribute is equivalent to std::basic_string_view<char_type> within a semantic action on p, where char_type is the type of character in the sequence being parsed. If the parsed range is transcoded, char_type will be the type being transcoded from. If the underlying range of char_type is non-contiguous, code using string_view_parser is ill-formed. The parse succeeds iff p succeeds. This parser is only available in C++20 and later.
boost::parser::string_view_tag
+// In header: <boost/parser/parser_fwd.hpp> + + +struct string_view_tag { +};
boost::parser::subrange
+// In header: <boost/parser/subrange.hpp> + +template<std::forward_iterator I, std::sentinel_for< I > S = I> +struct subrange { + // construct/copy/destruct + subrange() = default; + subrange(I, S); + template<typename R> explicit subrange(R const &); + + // public member functions + constexpr I begin() const; + constexpr S end() const; + constexpr subrange next(std::ptrdiff_t = 1) const; + constexpr subrange prev(std::ptrdiff_t = 1) const; + constexpr subrange & advance(std::ptrdiff_t); + template<typename I2, typename S2, + typename Enable = std::enable_if_t< std::is_convertible<I, I2>::value && std::is_convertible<S, S2>::value> > + constexpr operator subrange< I2, S2 >() const; +};
A simple view type used throughout the rest of the library in C++17 builds; similar to std::ranges::subrange.
subrange public member functionsconstexpr I begin() const;
constexpr S end() const;
constexpr subrange next(std::ptrdiff_t n = 1) const;
constexpr subrange prev(std::ptrdiff_t n = 1) const;
constexpr subrange & advance(std::ptrdiff_t n);
template<typename I2, typename S2, + typename Enable = std::enable_if_t< std::is_convertible<I, I2>::value && std::is_convertible<S, S2>::value> > + constexpr operator subrange< I2, S2 >() const;
boost::parser::switch_
+// In header: <boost/parser/parser.hpp> + + +template<typename T> constexpr auto switch_(T x);
Returns a switch-like parser. The resulting parser uses the given value x to select one of the following value/parser pairs, and to apply the selected parser. x may be a value to be used directly, or a unary invocable that takes a reference to the parse context, and returns the value to use. You can add more value/parser cases to the returned parser, using its call operator, e.g. switch_(x)(y1, p1)(y2, p2). As with the x passed to this function, each yN value can be a value or a unary invocable.
boost::parser::switch_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename SwitchValue, typename OrParser = detail::nope> +struct switch_parser { +};
boost::parser::symbol_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename T> +struct symbol_parser { +};
Matches one of a set S of possible inputs, each of which is associated with an attribute value of type T, forming a symbol table. New elements and their associated attributes may be added to or removed from S dynamically, during parsing; any such changes are reverted at the end of parsing. The parse succeeds iff an element of S is matched.
See Also:
+ ++
++
+boost::parser::symbols
+// In header: <boost/parser/parser.hpp> + +template<typename T> +struct symbols : public boost::parser::parser_interface< symbol_parser< T > > { + // construct/copy/destruct + symbols(); + symbols(char const *); + symbols(std::initializer_list< std::pair< std::string_view, T > >); + symbols(char const *, + std::initializer_list< std::pair< std::string_view, T > >); + + // public member functions + void insert_for_next_parse(std::string_view, T); + void erase_for_next_parse(std::string_view); + void clear_for_next_parse(); + template<typename Context> + void insert_for_next_parse(Context const &, std::string_view, T); + template<typename Context> + void erase_for_next_parse(Context const &, std::string_view); + template<typename Context> void clear_for_next_parse(Context const &); + template<typename Context> + unspecified find(Context const &, std::string_view) const; + template<typename Context> + void insert(Context const &, std::string_view, T) const; + template<typename Context> + void erase(Context const &, std::string_view) const; + template<typename Context> void clear(Context const &) const; +};
A symbols<T> represents the initial state of a symbol table parser that produces attributes of type T. The entries in the symbol table can be changed during parsing, but those mutations to not affect the symbols<T> object itself; all mutations happen to a copy of the symbol table in the parse context. For table entries that should be used during every parse, add entries via add() or operator(). For mid-parse mutations, use insert() and erase().
symbols public member functionsvoid insert_for_next_parse(std::string_view str, T x);+
Inserts an entry consisting of a UTF-8 string str to match, and an associated attribute x, to *this. The entry is added for use in all subsequent top-level parses. Subsequent lookups during the current top-level parse will not necessarily match str.
void erase_for_next_parse(std::string_view str);+
Erases the entry whose UTF-8 match string is str, from *this. The entry will no longer be available for use in all subsequent top-level parses. str will not be removed from the symbols matched in the current top-level parse.
void clear_for_next_parse();+
Erases all the entries from the copy of the symbol table inside the parse context context.
template<typename Context> + void insert_for_next_parse(Context const & context, std::string_view str, + T x);+
Inserts an entry consisting of a UTF-8 string str to match, and an associated attribute x, to *this. The entry is added for use in all subsequent top-level parses. Subsequent lookups during the current top-level parse will not necessarily match str.
template<typename Context> + void erase_for_next_parse(Context const & context, std::string_view str);+
Erases the entry whose UTF-8 match string is str, from *this. The entry will no longer be available for use in all subsequent top-level parses. str will not be removed from the symbols matched in the current top-level parse.
template<typename Context> void clear_for_next_parse(Context const & context);+
Erases all the entries from the copy of the symbol table inside the parse context context.
template<typename Context> + unspecified find(Context const & context, std::string_view str) const;+
Uses UTF-8 string str to look up an attribute in the table during parsing, returning it as an optional reference. The lookup is done on the copy of the symbol table inside the parse context context, not *this.
template<typename Context> + void insert(Context const & context, std::string_view str, T x) const;+
Inserts an entry consisting of a UTF-8 string to match str, and an associtated attribute x, to the copy of the symbol table inside the parse context context.
template<typename Context> + void erase(Context const & context, std::string_view str) const;+
Erases the entry whose UTF-8 match string is str from the copy of the symbol table inside the parse context context.
template<typename Context> void clear(Context const & context) const;+
Erases all the entries from the copy of the symbol table inside the parse context context.
boost::parser::to_tokens
+// In header: <boost/parser/lexer.hpp> + +unspecified to_tokens;
A range adaptor that produces boost::parser::token_views. Takes a range (possibly using pipe syntax) as the first argument. The second argument is the lexer to use. The third argument is a std::reference_wrapper<TokenCache>, where TokenCache is a random-access container used to cache tokens during token parsing; this argument is optional.
boost::parser::token
+// In header: <boost/parser/lexer.hpp> + +template<typename CharType> +struct token { + // types + typedef CharType char_type; + typedef std::basic_string_view< CharType > string_view; + typedef BOOST_PARSER_TOKEN_POSITION_TYPE position_type; + + // member classes/structs/unions + + union value { + + // public data members + long long ll_; + long double d_; + string_view sv_; + }; + + // construct/copy/destruct + token(); + token(int, position_type, string_view); + token(int, position_type, long long); + token(int, position_type, long double); + + // public member functions + constexpr int id() const; + constexpr position_type underlying_position() const; + constexpr bool has_string_view() const; + constexpr string_view get_string_view() const; + constexpr bool has_long_long() const; + constexpr long long get_long_long() const; + constexpr bool has_long_double() const; + constexpr long double get_long_double() const; + constexpr bool operator==(token const &) const; +};
A token produced by the lexer during token parsing.
+ +token public member functionsconstexpr int id() const;
constexpr position_type underlying_position() const;
constexpr bool has_string_view() const;
constexpr string_view get_string_view() const;
constexpr bool has_long_long() const;
constexpr long long get_long_long() const;
constexpr bool has_long_double() const;
constexpr long double get_long_double() const;
constexpr bool operator==(token const & rhs) const;
boost::parser::token::value
+// In header: <boost/parser/lexer.hpp> + + + +union value { + + // public data members + long long ll_; + long double d_; + string_view sv_; +};
boost::parser::token_chars
+// In header: <boost/parser/lexer.hpp> + +constexpr auto token_chars;
boost::parser::token_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename TokenSpec, typename Expected> +struct token_parser { +};
boost::parser::token_spec
+// In header: <boost/parser/token_parser.hpp> + +constexpr parser_interface token_spec;
A variable template that defines a token parser associated with boost::parser::token_spec_t<Regex, ID, ValueType, Base>. This token parser can be used to specify a lexer, and may also be used in parsers.
boost::parser::token_spec_t
+// In header: <boost/parser/lexer.hpp> + +template<ctll::fixed_string Regex, auto ID, typename ValueType, int Base> +struct token_spec_t { + // types + typedef decltype(ID) id_type; + typedef ValueType value_type; + + // public data members + static constexpr ctll::fixed_string regex; + static constexpr id_type id; + static constexpr int base; + static constexpr bool is_character_token; +};
boost::parser::tokens_view
+// In header: <boost/parser/lexer.hpp> + +template<std::ranges::contiguous_range V, typename Lexer, typename TokenCache> +struct tokens_view : + public std::ranges::view_interface< tokens_view< V, Lexer, TokenCache > > +{ + // types + typedef typename Lexer::token_type token_type; + + // construct/copy/destruct + tokens_view(); + explicit tokens_view(V, Lexer); + explicit tokens_view(V, Lexer, std::reference_wrapper< TokenCache >); + + // public member functions + constexpr V base() const; + constexpr V base(); + constexpr Lexer lexer(); + constexpr iterator< false > begin(); + constexpr iterator< true > begin() const; + constexpr sentinel< false > end(); + constexpr sentinel< true > end() const; +};
A std::views-compatible view that provides the tokens from the given contiguous range, using the given lexer and optional token cache. You should typically not need to use this type directly; use boost::parser::to_tokens instead.
boost::parser::trace
+// In header: <boost/parser/parser.hpp> + + + +enum trace { off, on };
boost::parser::transform
+// In header: <boost/parser/parser.hpp> + + +template<typename F> auto transform(F f);
boost::parser::transform_directive
+// In header: <boost/parser/parser.hpp> + +template<typename F> +struct transform_directive { + + // public member functions + template<typename Parser> + constexpr auto operator[](parser_interface< Parser >) const noexcept; + + // public data members + F f_; +};
A directive that transforms the attribute generated by a parser. operator[] returns a parser_interface<transform_parser<Parser, F>>.
transform_directive public member functionstemplate<typename Parser> + constexpr auto operator[](parser_interface< Parser > rhs) const noexcept;
boost::parser::transform_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename Parser, typename F> +struct transform_parser { +};
Applies the given parser p of type Parser. The attribute produced by p is passed to the fiven invocable f of type F. f will only be invoked if p succeeds and sttributes are currently being generated. The parse succeeds iff p succeeds. The attribute produced is the the result of the call to f.
boost::parser::transform_replace
+// In header: <boost/parser/transform_replace.hpp> + +unspecified transform_replace;
A range adaptor object ([range.adaptor.object]). Given subexpressions E and P, Q, R, and 'S', each of the expressions replace(E,
+P), replace(E, P, Q). replace(E, P, Q, R), and replace(E, P, Q,
+R, S) are expression-equivalent to replace_view(E, P), replace_view(E, P, Q), replace_view(E, P, Q, R), replace_view(E,
+P, Q, R, S), respectively.
boost::parser::transform_replace_view
+// In header: <boost/parser/transform_replace.hpp> + +template<std::ranges::viewable_range V, std::move_constructible F, + typename Parser, typename GlobalState, typename ErrorHandler, + typename SkipParser> +struct transform_replace_view { + // member classes/structs/unions + template<bool Const> + struct iterator { + // types + typedef unspecified I; + typedef unspecified S; + typedef unspecified ref_t_iter; + typedef BOOST_PARSER_SUBRANGE< ref_t_iter > reference_type; + typedef unspecified base_type; + + // construct/copy/destruct + iterator() = default; + iterator(unspecified); + + // public member functions + constexpr iterator & operator++(); + constexpr reference_type operator*() const; + }; + template<bool Const> + struct sentinel { + }; + // construct/copy/destruct + transform_replace_view() = default; + transform_replace_view(V, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + parser_interface< SkipParser > const &, F, + trace = trace::off); + transform_replace_view(V, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + F, trace = trace::off); + + // public member functions + constexpr V base() const; + constexpr V base(); + constexpr F const & f() const; + constexpr auto begin(); + constexpr auto end(); + constexpr auto begin() const; + constexpr auto end() const; +};
Produces a range of subranges of a given range base. Each subrange is either a subrange of base that does not match the given parser parser, or is f(*boost::parser::parse(match, parser)), where f is the given invocable and match is the matching subrange.
In addition to the template parameter constraints, F must be invocable with the attribute type of Parser; V and the range type produced by F, "`Rf`" must be ranges of char, or must have the same UTF format; and V and Rf must meet the same compatibility requirements as described in std::ranges::join_view.
transform_replace_view
+ public
+ construct/copy/destructtransform_replace_view() = default;
transform_replace_view(V base, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + parser_interface< SkipParser > const & skip, F f, + trace trace_mode = trace::off);
transform_replace_view(V base, + parser_interface< Parser, GlobalState, ErrorHandler > const & parser, + F f, trace trace_mode = trace::off);
boost::parser::transform_replace_view::iterator
+// In header: <boost/parser/transform_replace.hpp> + + +template<bool Const> +struct iterator { + // types + typedef unspecified I; + typedef unspecified S; + typedef unspecified ref_t_iter; + typedef BOOST_PARSER_SUBRANGE< ref_t_iter > reference_type; + typedef unspecified base_type; + + // construct/copy/destruct + iterator() = default; + iterator(unspecified); + + // public member functions + constexpr iterator & operator++(); + constexpr reference_type operator*() const; +};
iterator public member functionsconstexpr iterator & operator++();
constexpr reference_type operator*() const;
boost::parser::transform_replace_view::sentinel
+// In header: <boost/parser/transform_replace.hpp> + + +template<bool Const> +struct sentinel { +};
tuple
+// In header: <boost/parser/tuple.hpp> + + +typedef hana::tuple< Args... > tuple;
boost::parser::uint_
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< uint_parser< unsigned int > > uint_;
boost::parser::uint_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<typename T, int Radix = 10, int MinDigits = 1, int MaxDigits = -1, + typename Expected = detail::nope> +struct uint_parser { +};
Matches an unsigned number of radix Radix, of at least MinDigits and at most MaxDigits, producing an attribute of type T. Fails on any other input. The parse will also fail if Expected is anything but detail::nope (which it is by default), and the produced attribute is not equal to expected_. Radix must be in [2, 36].
boost::parser::ulong_
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< uint_parser< unsigned long > > ulong_;
boost::parser::ulong_long
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< uint_parser< unsigned long long > > ulong_long;
boost::parser::upper
+// In header: <boost/parser/parser.hpp> + +unspecified upper;
boost::parser::ushort_
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< uint_parser< unsigned short > > ushort_;
boost::parser::utf16_view
+// In header: <boost/parser/transcode_view.hpp> + +template<unspecified V> +class utf16_view { +public: + // construct/copy/destruct + utf16_view() = default; + utf16_view(V); +};
boost::parser::utf32_view
+// In header: <boost/parser/transcode_view.hpp> + +template<unspecified V> +class utf32_view { +public: + // construct/copy/destruct + utf32_view() = default; + utf32_view(V); +};
boost::parser::utf8_view
+// In header: <boost/parser/transcode_view.hpp> + +template<unspecified V> +class utf8_view { +public: + // construct/copy/destruct + utf8_view() = default; + utf8_view(V); +};
boost::parser::vs_output_error_handler
+// In header: <boost/parser/error_handling.hpp> + + +struct vs_output_error_handler : public boost::parser::stream_error_handler { + // construct/copy/destruct + vs_output_error_handler(); + vs_output_error_handler(std::string_view); + vs_output_error_handler(std::wstring_view); +};
boost::parser::with_error_handler
+// In header: <boost/parser/parser.hpp> + + +template<typename Parser, typename GlobalState, typename ErrorHandler> + auto with_error_handler(parser_interface< Parser, GlobalState, default_error_handler > const & parser, + ErrorHandler & error_handler);
Returns a with the same parser and globals, with parser_interfaceerror_handler added. The resut of passing any non-top-level parser for the parser argument is undefined.
boost::parser::with_globals
+// In header: <boost/parser/parser.hpp> + + +template<typename Parser, typename GlobalState, typename ErrorHandler> + auto with_globals(unspecified parser, GlobalState & globals);
Returns a with the same parser and error handler, with parser_interfaceglobals added. The resut of passing any non-top-level parser for the parser argument is undefined.
boost::parser::write_formatted_expectation_failure_error_message
+// In header: <boost/parser/error_handling.hpp> + + +template<typename Iter, typename Sentinel, template< class > class Exception> + std::ostream & + write_formatted_expectation_failure_error_message(std::ostream & os, + std::string_view filename, + Iter first, + Sentinel last, + Exception< Iter > const & e, + int64_t preferred_max_line_length = 80, + int64_t max_after_caret = 40);
boost::parser::write_formatted_expectation_failure_error_message
+// In header: <boost/parser/error_handling_fwd.hpp> + + +template<typename Iter, typename Sentinel, template< class > class Exception> + std::ostream & + write_formatted_expectation_failure_error_message(std::ostream & os, + std::wstring_view filename, + Iter first, + Sentinel last, + Exception< Iter > const & e, + int64_t preferred_max_line_length = 80, + int64_t max_after_caret = 40);
boost::parser::write_formatted_message
+// In header: <boost/parser/error_handling.hpp> + + +template<typename Iter, typename Sentinel> + std::ostream & + write_formatted_message(std::ostream & os, std::string_view filename, + Iter first, Iter it, Sentinel last, + std::string_view message, + int64_t preferred_max_line_length = 80, + int64_t max_after_caret = 40);
boost::parser::write_formatted_message
+// In header: <boost/parser/error_handling_fwd.hpp> + + +template<typename Iter, typename Sentinel> + std::ostream & + write_formatted_message(std::ostream & os, std::wstring_view filename, + Iter first, Iter it, Sentinel last, + std::string_view message, + int64_t preferred_max_line_length = 80, + int64_t max_after_caret = 40);
boost::parser::ws
+// In header: <boost/parser/parser.hpp> + +constexpr parser_interface< ws_parser< false, false > > ws;
The whitespace parser. This matches "\r\n", or any one of the Unicode code points with the White_Space property, as defined in https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt. Produces no attribute.
+boost::parser::ws_id
+// In header: <boost/parser/parser_fwd.hpp> + +constexpr int ws_id;
boost::parser::ws_parser
+// In header: <boost/parser/parser_fwd.hpp> + +template<bool NewlinesOnly, bool NoNewlines> +struct ws_parser { +};
boost::parser::zero_plus_parser
+// In header: <boost/parser/parser.hpp> + +template<typename Parser> +struct zero_plus_parser : public boost::parser::repeat_parser< Parser > { + // construct/copy/destruct + zero_plus_parser(Parser); +};
+ Here are all the tables containing the various Boost.Parser parsers, examples, + etc., all in one place. These are repeated elsewhere in different sections + of the tutorial. +
+
+ This table lists all the Boost.Parser parsers. For the callable parsers, a
+ separate entry exists for each possible arity of arguments. For a parser p, if there is no entry for p without arguments, p
+ is a function, and cannot itself be used as a parser; it must be called. In
+ the table below:
+
char");
+ RESOLVE()
+ is a notional macro that expands to the resolution of parse argument or
+ evaluation of a parse predicate (see The
+ Parsers And Their Uses);
+ RESOLVE(pred) == true"
+ is a shorthand notation for "RESOLVE(pred) is contextually convertible to bool and true";
+ likewise for false;
+ c is a character of type
+ char, char8_t,
+ or char32_t;
+ str is a string literal
+ of type char const[], char8_t
+ const [],
+ or char32_t const
+ [];
+ pred is a parse predicate;
+ arg0, arg1,
+ arg2, ... are parse arguments;
+ a is a semantic action;
+ r is an object whose type
+ models parsable_range;
+ p, p1,
+ p2, ... are parsers; and
+ escapes is a symbols<T> object, where T is char
+ or char32_t.
+ ![]() |
+Note | +
|---|---|
|
+
+ The definition of + +template<typename T> +concept parsable_range = (std::ranges::forward_range<T> && + code_unit<std::ranges::range_value_t<T>>) || + detail::is_tokens_view_v<T>; ++ + + |
![]() |
+Note | +
|---|---|
+ Some of the parsers in this table consume no input. All parsers consume the + input they match unless otherwise stated in the table below. + |
Table 1.1. Parsers and Their Semantics
+|
+ + Parser + + |
+
+ + Semantics + + |
+
+ + Attribute Type + + |
+
+ + Notes + + |
+
|---|---|---|---|
|
+
+ |
+
+ + Matches epsilon, the empty string. Always matches, + and consumes no input. + + |
+
+ + None. + + |
+
+
+ Matching |
+
|
+
+ |
+
+
+ Fails to match the input if |
+
+ + None. + + |
++ | +
|
+
+ |
+
+ + Matches a single whitespace code point (see note), according to the + Unicode White_Space property. + + |
+
+ + None. + + |
+
+
+ For more info, see the Unicode
+ properties. |
+
|
+
+ |
+
+ + Matches a single newline (see note), following the "hard" + line breaks in the Unicode line breaking algorithm. + + |
+
+ + None. + + |
+
+
+ For more info, see the Unicode
+ Line Breaking Algorithm. |
+
|
+
+ |
+
+ + Matches only at the end of input, and consumes no input. + + |
+
+ + None. + + |
++ | +
|
+
+ |
+
+
+ Always matches, and consumes no input. Generates the attribute |
+
+
+ |
+
+
+ An important use case for |
+
|
+
+ |
+
+ + Matches any single code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+
+ Matches exactly the code point |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+
+ Matches the next code point |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+
+ Matches the next code point |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ |
+
|
+
+ |
+
+ + Matches a single code point. + + |
+
+
+ |
+
+
+ Similar to |
+
|
+
+ |
+
+ + Matches a single code point. + + |
+
+
+ |
+
+
+ Similar to |
+
|
+
+ |
++ + | +
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single control-character code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single decimal digit code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single punctuation code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single hexidecimal digit code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single lower-case code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single upper-case code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+
+ Matches exactly the given code point |
+
+ + None. + + |
+
+
+ |
+
|
+
+ |
+
+
+ Matches exactly the given code point |
+
+ + None. + + |
+
+
+ This is a UDL
+ that represents |
+
|
+
+ |
+
+
+ Matches exactly the given string |
+
+ + None. + + |
+
+
+ |
+
|
+
+ |
+
+
+ Matches exactly the given string |
+
+ + None. + + |
+
+
+ This is a UDL
+ that represents |
+
|
+
+ |
+
+
+ Matches exactly |
+
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ Matches exactly |
+
+
+ |
+
+
+ This is a UDL
+ that represents |
+
|
+
+ |
+
+
+ Matches |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches a binary unsigned integral value. + + |
+
+
+ |
+
+
+ For example, |
+
|
+
+ |
+
+
+ Matches exactly the binary unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches an octal unsigned integral value. + + |
+
+
+ |
+
+
+ For example, |
+
|
+
+ |
+
+
+ Matches exactly the octal unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches a hexadecimal unsigned integral value. + + |
+
+
+ |
+
+
+ For example, |
+
|
+
+ |
+
+
+ Matches exactly the hexadecimal unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches an unsigned integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches an unsigned integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches an unsigned integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches an unsigned integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches a signed integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the signed integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches a signed integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the signed integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches a signed integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the signed integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches a signed integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the signed integral value |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches a floating-point number. |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches a floating-point number. |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ The special value |
+
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ The special value |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
+
+
+ It is an error to write |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
+
+
+ It is an error to write |
+
|
+
+ |
+
+
+ |
+
+
+ |
+
+
+ Unlike the other entries in this table, |
+
|
+
+ |
+
+
+ Matches |
+
+
+ |
+
+
+ The result does not include the quotes. A quote within the string
+ can be written by escaping it with a backslash. A backslash within
+ the string can be written by writing two consecutive backslashes.
+ Any other use of a backslash will fail the parse. Skipping is disabled
+ while parsing the entire string, as if using |
+
|
+
+ |
+
+
+ Matches |
+
+
+ |
+
+
+ The result does not include the |
+
|
+
+ |
+
+
+ Matches some character |
+
+
+ |
+
+
+ The result does not include the |
+
|
+
+ |
+
+
+ Matches |
+
+
+ |
+
+
+ The result does not include the |
+
|
+
+ |
+
+
+ Matches some character |
+
+
+ |
+
+
+ The result does not include the |
+
![]() |
+Important | +
|---|---|
+ All the character parsers, like |
+ Here are all the operator overloaded for parsers. In the tables below: +
+c is a character of type
+ char or char32_t;
+ a is a semantic action;
+ r is an object whose type
+ models parsable_range (see
+ Concepts); and
+ p, p1,
+ p2, ... are parsers.
+ ![]() |
+Note | +
|---|---|
+ Some of the expressions in this table consume no input. All parsers consume + the input they match unless otherwise stated in the table below. + |
Table 1.2. Combining Operations and Their Semantics
+|
+ + Expression + + |
+
+ + Semantics + + |
+
+ + Attribute Type + + |
+
+ + Notes + + |
+
|---|---|---|---|
|
+
+ |
+
+
+ Matches iff |
+
+ + None. + + |
++ | +
|
+
+ |
+
+
+ Matches iff |
+
+ + None. + + |
++ | +
|
+
+ |
+
+
+ Parses using |
+
+
+ |
+
+
+ Matching |
+
|
+
+ |
+
+
+ Parses using |
+
+
+ |
+
+
+ Matching |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches iff either |
+
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches iff |
+
+ + None. + + |
++ | +
![]() |
+Important | +
|---|---|
+ All the character parsers, like |
+ There are a couple of special rules not captured in the table above: +
+
+ First, the zero-or-more and one-or-more repetitions (operator*() and operator+(), respectively) may collapse when combined.
+ For any parser p, +(+p)
+ collapses to +p;
+ **p,
+ *+p,
+ and +*p
+ each collapse to just *p.
+
+ Second, using eps
+ in an alternative parser as any alternative except
+ the last one is a common source of errors; Boost.Parser disallows it. This
+ is true because, for any parser p,
+ eps
+ | p
+ is equivalent to eps,
+ since eps
+ always matches. This is not true for eps parameterized with a condition.
+ For any condition cond, eps(cond)
+ is allowed to appear anywhere within an alternative parser.
+
+ This table summarizes the attributes generated for all Boost.Parser parsers. + In the table below: +
+RESOLVE()
+ is a notional macro that expands to the resolution of parse argument or
+ evaluation of a parse predicate (see The
+ Parsers And Their Uses); and
+ x and y
+ represent arbitrary objects.
+ Table 1.3. Parsers and Their Attributes
+|
+ + Parser + + |
+
+ + Attribute Type + + |
+
+ + Notes + + |
+
|---|---|---|
|
+
+ |
+
+ + None. + + |
++ | +
|
+
+ |
+
+ + None. + + |
++ | +
|
+
+ |
+
+ + None. + + |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ Includes all the |
+
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+ + None. + + |
+
+
+ Includes all the |
+
|
+
+ |
+
+
+ |
+
+
+ Includes all the |
+
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
+ char_
+ is a bit odd, since its attribute type is polymorphic. When you use char_
+ to parse text in the non-Unicode code path (i.e. a string of char), the attribute is char.
+ When you use the exact same char_ to parse in the Unicode-aware
+ code path, all matching is code point based, and so the attribute type is the
+ type used to represent code points, char32_t.
+ All parsing of UTF-8 falls under this case.
+
+ Here, we're parsing plain chars,
+ meaning that the parsing is in the non-Unicode code path, the attribute of
+ char_
+ is char:
+
auto result = parse("some text", boost::parser::char_); +static_assert(std::is_same_v<decltype(result), std::optional<char>>)); ++
+ When you parse UTF-8, the matching is done on a code point basis, so the attribute
+ type is char32_t:
+
auto result = parse("some text" | boost::parser::as_utf8, boost::parser::char_); +static_assert(std::is_same_v<decltype(result), std::optional<char32_t>>)); ++
+ The good news is that usually you don't parse characters individually. When
+ you parse with char_,
+ you usually parse repetition of then, which will produce a std::string,
+ regardless of whether you're in Unicode parsing mode or not. If you do need
+ to parse individual characters, and want to lock down their attribute type,
+ you can use cp
+ and/or cu
+ to enforce a non-polymorphic attribute type.
+
+ Combining operations of course affect the generation of attributes. In the + tables below: +
+m and n
+ are parse arguments that resolve to integral values;
+ pred is a parse predicate;
+ arg0, arg1,
+ arg2, ... are parse arguments;
+ a is a semantic action;
+ and
+ p, p1,
+ p2, ... are parsers that
+ generate attributes.
+ Table 1.4. Combining Operations and Their Attributes
+|
+ + Parser + + |
+
+ + Attribute Type + + |
+
|---|---|
|
+
+ |
+
+ + None. + + |
+
|
+
+ |
+
+ + None. + + |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+ + None. + + |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
![]() |
+Important | +
|---|---|
+ All the character parsers, like |
![]() |
+Important | +
|---|---|
+ In case you did not notice it above, adding a semantic action to a parser
+ erases the parser's attribute. The attribute is still available inside the
+ semantic action as |
+ In the table: a is a semantic
+ action; and p, p1, p2,
+ ... are parsers that generate attributes. Note that only >>
+ is used here; > has the exact
+ same attribute generation rules.
+
Table 1.5. Sequence and Alternative Combining Operations and Their Attributes
+|
+ + Expression + + |
+
+ + Attribute Type + + |
+
|---|---|
| + + | +
+ + None. + + |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
| + + | +
+
+ |
+
| + + | +
+
+ |
+
| + + | +
+
+ |
+
| + + | +
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
| + + | +
+
+ |
+
| + + | +
+
+ |
+
|
+
+ |
+
+ + None. + + |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
+
+template<typename T> +concept code_unit = + std::same_as<std::remove_cv_t<T>, char> || + std::same_as<std::remove_cv_t<T>, wchar_t> || + std::same_as<std::remove_cv_t<T>, char8_t> || + std::same_as<std::remove_cv_t<T>, char16_t>|| + std::same_as<std::remove_cv_t<T>, char32_t>; + +template<typename T> +concept token_iter = is_token_v<std::iter_value_t<T>>; + +template<typename T> +concept parsable_iter = + (std::forward_iterator<T> && code_unit<std::iter_value_t<T>>) || + token_iter<T>; + +template<typename T> +concept parsable_range = (std::ranges::forward_range<T> && + code_unit<std::ranges::range_value_t<T>>) || + detail::is_tokens_view_v<T>; + +template<typename T> +concept parsable_pointer = std::is_pointer_v<std::remove_cvref_t<T>> && + code_unit<std::remove_pointer_t<std::remove_cvref_t<T>>>; + +template<typename T> +concept parsable_range_like = parsable_range<T> || parsable_pointer<T>; + +template<typename T> +concept range_like = std::ranges::range<T> || parsable_pointer<T>; + +template< + typename I, + typename S, + typename ErrorHandler, + typename GlobalState> +using minimal_parse_context = decltype(detail::make_context<false, false>( + std::declval<I>(), + std::declval<S>(), + std::declval<bool &>(), + std::declval<int &>(), + std::declval<ErrorHandler const &>(), + std::declval<detail::nope &>(), + std::declval<detail::symbol_table_tries_t &>(), + std::declval<detail::pending_symbol_table_operations_t &>())); + +template<typename T, typename I, typename S, typename GlobalState> +concept error_handler = + requires ( + T const & t, + I first, + S last, + parse_error<I> const & e, + diagnostic_kind kind, + std::string_view message, + minimal_parse_context< + I, S, T, GlobalState> const & context) { + { t(first, last, e) } -> std::same_as<error_handler_result>; + t.diagnose(kind, message, context, first); + t.diagnose(kind, message, context); + }; + +template<typename T> +concept container = std::ranges::common_range<T> && requires(T t) { + { t.insert(t.begin(), *t.begin()) } + -> std::same_as<std::ranges::iterator_t<T>>; +}; ++
+
++ Boost.Parser can be used entirely on its own. If Boost is available, extra + functionality provided by Boost is also available. +
+
+ By default, Boost.Parser is usable entirely on its own. The only explicit opt-in
+ use of Boost is the use of Boost.Hana. If you turn on the use of Hana, the
+ tuple type used throughout Boost.Parser will be boost::hana::tuple instead of std::tuple.
+ To enable this, simply define BOOST_PARSER_USE_HANA_TUPLE.
+ The Boost.Hana tuple is much nicer, because it has an operator[], and a whole lot of very useful algorithms;
+ you will see this operator used throughout the tutorial and examples. I encourage
+ you to use the Hana tuple, but I realize that it is an often-unfamiliar replacement
+ for std::tuple, which is a C++ vocabulary template,
+ and so that is the default.
+
![]() |
+Important | +
|---|---|
+ Boost.Parser defines a template alias |
+ The presence of Boost headers is detected using __has_include(). When it is present, all the typical Boost
+ conventions are used; otherwise, non-Boost alternatives are used. This applies
+ to the use of BOOST_ASSERT
+ versus assert, and printing
+ typenames with Boost.TypeIndex versus with std::typeinfo.
+
![]() |
+Note | +
|---|---|
+ If you want to disable the use of the C macro |
![]() |
+Important | +
|---|---|
+ Boost.Parser uses inline namespaces around definitions of all functions and
+ types that use the optional Boost features; the name of the inline namespace
+ varies depending on whether the Boost implementation is used. So if Boost.TypeIndex
+ is available to one translation unit, but another TU must use |
+ Boost.Parser automatically treats aggregate structs
+ as if they were tuples in many cases. There is some metaprogramming logic that
+ makes this work, and this logic has a hard limit on the size of a struct that it can operate on. There is a configuration
+ macro BOOST_PARSER_MAX_AGGREGATE_SIZE
+ that you can adjust if the default value is too small. Note that turning this
+ value up significantly can significantly increase compile times. Also, MSVC
+ seems to have a hard time with large values; I successfully set this value
+ to 50 on MSVC, but 100 broke the MSVC build entirely.
+
+ Boost.Parser uses std::optional and std::variant
+ internally. There is no way to change this. However, when Boost.Parser generates
+ values as a result of the parse (see Attribute
+ Generation), it can place them into other implementations of optional
+ and/or variant, if you tell it to do so. You tell it which templates are usable
+ as an optional or variant by specializing the associated variable template.
+ For instance, here is how you would tell Boost.Parser that boost::optional
+ is an optional-type:
+
template<typename T> +constexpr bool boost::parser::enable_optional<boost::optional<T>> = true; ++
+ Here's how you would do the same thing for boost::variant2::variant:
+
template<typename... Ts> +constexpr bool boost::parser::enable_variant<boost::variant2::variant<Ts...>> = true; ++
+ The requirements on a template used as an optional are pretty simple, since
+ Boost.Parser does almost nothing but assign to them. For a type O to be a usable optional, you must be able
+ to assign to O, and O must have an operator* that returns the stored value, or a (possibly
+ cv-qualified) reference to the stored value.
+
+ For variants, the requirement is even simpler; the variant type only needs + to be assignable. +
+![]() |
+Note | +
|---|---|
+ The only thing affected by |
+ Boost.Parser uses std::ranges::subrange extensively. However, there is no
+ C++17 equivalent. So, there is a boost::parser::subrange
+ for C++17 builds. To switch between these transparently in the code, while
+ keeping CTAD operational, Boost.Parser defines BOOST_PARSER_SUBRANGE. This is
+ the name of the template, so if you use it in your own code you would use it
+ like BOOST_PARSER_SUBRANGE<I>
+ to instantiate it.
+
+ Boost.Parser allows you to debug your parsers by passing trace::on to
+ parse(). Sometimes, your run environment
+ does not include a terminal. If you're running Boost.Parser code in the Visual
+ Studio debugger, you can see this trace output in the Visual Studio debugger
+ output panel rather than in a terminal by defining BOOST_PARSER_TRACE_TO_VS_OUTPUT.
+
+ This is a conforming JSON parser. It passes all the required tests in the + JSON Test Suite, + and all but 5 of the optional ones. Notice that the actual parsing bits are + only about 150 lines of code. +
++
+// This header includes a type called json::value that acts as a +// Javascript-like polymorphic value type. +#include "json.hpp" + +#include <boost/parser/parser.hpp> + +#include <fstream> +#include <vector> +#include <climits> + + +namespace json { + + namespace bp = ::boost::parser; + using namespace bp::literals; + + // The JSON spec imposes a limit on how deeply JSON data structures are + // allowed to nest. This exception is thrown when that limit is exceeded + // during the parse. + template<typename Iter> + struct excessive_nesting : std::runtime_error + { + excessive_nesting(Iter it) : + runtime_error("excessive_nesting"), + iter(it) + {} + Iter iter; + }; + + + // The only globals we need to parse JSON are: "How many data structures + // deep are we?", and "What is the limit of open data structures + // allowed?". + struct global_state + { + int recursive_open_count = 0; + int max_recursive_open_count = 0; + }; + + // When matching paired UTF-16 surrogates, we need to track a bit of state + // between matching the first and second UTF-16 code units: namely, the + // value of the first code unit. + struct double_escape_locals + { + int first_surrogate = 0; + }; + + + // Here are all the rules declared. I've given them names that are + // end-user friendly, so that if there is a parse error, you get a message + // like "expected four hexadecimal digits here:", instead of "expected + // hex_4 here:". + + bp::rule<class ws> const ws = "whitespace"; + + bp::rule<class string_char, uint32_t> const string_char = + "code point (code points <= U+001F must be escaped)"; + bp::rule<class four_hex_digits, uint32_t> const hex_4 = + "four hexadecimal digits"; + bp::rule<class escape_seq, uint32_t> const escape_seq = + "\\uXXXX hexadecimal escape sequence"; + bp::rule<class escape_double_seq, uint32_t, double_escape_locals> const + escape_double_seq = "\\uXXXX hexadecimal escape sequence"; + bp::rule<class single_escaped_char, uint32_t> const single_escaped_char = + "'\"', '\\', '/', 'b', 'f', 'n', 'r', or 't'"; + + bp::rule<class null, value> const null = "null"; + bp::rule<class string, std::string> const string = "string"; + bp::rule<class number, double> const number = "number"; + bp::rule<class object_element, boost::parser::tuple<std::string, value>> const + object_element = "object-element"; + bp::rule<class object_tag, value> const object_p = "object"; + bp::rule<class array_tag, value> const array_p = "array"; + + bp::rule<class value_tag, value> const value_p = "value"; + + + + // JSON limits whitespace to just these four characters. + auto const ws_def = '\x09'_l | '\x0a' | '\x0d' | '\x20'; + + // Since our json object representation, json::value, is polymorphic, and + // since its default-constructed state represents the JSON value "null", + // we need to tell a json::value that it is an object (similar to a map) + // before we start inserting values into it. That's why we need + // object_init. + auto object_init = [](auto & ctx) { + auto & globals = _globals(ctx); + if (globals.max_recursive_open_count < ++globals.recursive_open_count) + throw excessive_nesting(_where(ctx).begin()); + _val(ctx) = object(); + }; + + // We need object_insert because we can't just insert into the json::value + // itself. The json::value does not have an insert() member, because if + // it is currently holding a number, that makes no sense. So, for a + // json::value x, we need to call get<object>(x) to get the object + // interface. + auto object_insert = [](auto & ctx) { + value & v = _val(ctx); + get<object>(v).insert(std::make_pair( + std::move(_attr(ctx))[0_c], std::move(_attr(ctx)[1_c]))); + }; + + // These are the array analogues of the object semantic actions above. + auto array_init = [](auto & ctx) { + auto & globals = _globals(ctx); + if (globals.max_recursive_open_count < ++globals.recursive_open_count) + throw excessive_nesting(_where(ctx).begin()); + _val(ctx) = array(); + }; + auto array_append = [](auto & ctx) { + value & v = _val(ctx); + get<array>(v).push_back(std::move(_attr(ctx))); + }; + + // escape_double_seq is used to match pairs of UTF-16 surrogates that form + // a single code point. So, after matching one UTF-16 code unit c, we + // only want to keep going if c is a lead/high surrogate. + auto first_hex_escape = [](auto & ctx) { + auto & locals = _locals(ctx); + uint32_t const cu = _attr(ctx); + if (!boost::parser::detail::text::high_surrogate(cu)) + _pass(ctx) = false; // Not a high surrogate; explicitly fail the parse. + else + locals.first_surrogate = cu; // Save this initial code unit for later. + }; + // This is also used in escape_double_seq. When we get to this action, we + // know we've already matched a high surrogate, and so this one had better + // be a low surrogate, or we have a (local) parse failure. + auto second_hex_escape = [](auto & ctx) { + auto & locals = _locals(ctx); + uint32_t const cu = _attr(ctx); + if (!boost::parser::detail::text::low_surrogate(cu)) { + _pass(ctx) = false; // Not a low surrogate; explicitly fail the parse. + } else { + // Success! Write to the rule's attribute the code point that the + // first and second code points form. + uint32_t const high_surrogate_min = 0xd800; + uint32_t const low_surrogate_min = 0xdc00; + uint32_t const surrogate_offset = + 0x10000 - (high_surrogate_min << 10) - low_surrogate_min; + uint32_t const first_cu = locals.first_surrogate; + _val(ctx) = (first_cu << 10) + cu + surrogate_offset; + } + }; + + // This is the verbose form of declaration for the integer and unsigned + // integer parsers int_parser and uint_parser. In this case, we don't + // want to use boost::parser::hex directly, since it has a variable number + // of digits. We want to match exactly 4 digits, and this is how we + // declare a hexadecimal parser that matches exactly 4. + bp::parser_interface<bp::uint_parser<uint32_t, 16, 4, 4>> const hex_4_def; + + // We use > here instead of >>, because once we see \u, we know that + // exactly four hex digits must follow -- no other production rule starts + // with \u. + auto const escape_seq_def = "\\u" > hex_4; + + // This uses the actions above and the simpler rule escape_seq to find + // matched UTF-16 surrogate pairs. + auto const escape_double_seq_def = + escape_seq[first_hex_escape] >> escape_seq[second_hex_escape]; + + // This symbol table recognizes each character that can appear right after + // an escaping backslash, and, if it finds one, produces the associated + // code point as its attribute. + bp::symbols<uint32_t> const single_escaped_char_def = { + {"\"", 0x0022u}, + {"\\", 0x005cu}, + {"/", 0x002fu}, + {"b", 0x0008u}, + {"f", 0x000cu}, + {"n", 0x000au}, + {"r", 0x000du}, + {"t", 0x0009u}}; + + // A string may be a matched UTF-16 escaped surrogate pair, a single + // escaped UTF-16 code unit treated as a whole code point, a single + // escaped character like \f, or any other code point outside the range + // [0x0000u, 0x001fu]. Note that we had to put escape_double_seq before + // escape_seq. Otherwise, escape_seq would eat all the escape sequences + // before escape_double_seq could try to match them. + auto const string_char_def = escape_double_seq | escape_seq | + ('\\'_l > single_escaped_char) | + (bp::cp - bp::char_(0x0000u, 0x001fu)); + + // If we see the special token null, treat that as a default-constructed + // json::value. Note that we could have done this with a semantic action, + // but it is best to do everything you can without semantic actions; + // they're a lot of code. + auto const null_def = "null" >> bp::attr(value()); + + auto const string_def = bp::lexeme['"' >> *(string_char - '"') > '"']; + + // Since the JSON format for numbers is not exactly what + // boost::parser::double_ accepts (double_ accepts too much), we need to + // parse a JSON number as a sequence of characters, and then pass the + // result to double_ to actually get the numeric value. This action does + // that. The parser uses boost::parser::raw to produce the subrange of + // the input that covers the number as an attribute, which is used here. + auto parse_double = [](auto & ctx) { + auto const cp_range = _attr(ctx); + auto cp_first = cp_range.begin(); + auto const cp_last = cp_range.end(); + + auto const result = bp::prefix_parse(cp_first, cp_last, bp::double_); + if (result) { + _val(ctx) = *result; + } else { + // This would be more efficient if we used + // boost::container::small_vector, or std::inplace_vector from + // C++26. + std::vector<char> chars(cp_first, cp_last); + auto const chars_first = &*chars.begin(); + auto chars_last = chars_first + chars.size(); + _val(ctx) = std::strtod(chars_first, &chars_last); + } + }; + + // As indicated above, we want to match the specific formats JSON allows, + // and then re-parse the resulting matched range within the semantic + // action. + auto const number_def = + bp::raw[bp::lexeme + [-bp::char_('-') >> + (bp::char_('1', '9') >> *bp::digit | bp::char_('0')) >> + -(bp::char_('.') >> +bp::digit) >> + -(bp::char_("eE") >> -bp::char_("+-") >> +bp::digit)]] + [parse_double]; + + // Note how, in the next three parsers, we turn off backtracking by using + // > instead of >>, once we know that there is no backtracking alternative + // that might match if we fail to match the next element. This produces + // much better error messages than if you always use >>. + + auto const object_element_def = string > ':' > value_p; + + auto const object_p_def = '{'_l[object_init] >> + -(object_element[object_insert] % ',') > '}'; + + auto const array_p_def = '['_l[array_init] >> + -(value_p[array_append] % ',') > ']'; + + // This is the top-level parser. + auto const value_p_def = + number | bp::bool_ | null | string | array_p | object_p; + + // Here, we define all the rules we've declared above, which also connects + // each rule to its _def-suffixed parser. + BOOST_PARSER_DEFINE_RULES( + ws, + hex_4, + escape_seq, + escape_double_seq, + single_escaped_char, + string_char, + null, + string, + number, + object_element, + object_p, + array_p, + value_p); + + // json::parse() takes a string_view as input. It takes an optional + // callback to use for error reporting, which defaults to a no-op that + // ignores all errors. It also takes an optional max recursion depth + // limit, which defaults to the one from the JSON spec, 512. + std::optional<value> parse( + std::string_view str, + diagnostic_function errors_callback = diagnostic_function(), + int max_recursion = 512) + { + // Turn the input range into a UTF-32 range, so that we can be sure + // that we fall into the Unicode-aware parsing path inside parse() + // below. + auto const range = boost::parser::as_utf32(str); + using iter_t = decltype(range.begin()); + + if (max_recursion <= 0) + max_recursion = INT_MAX; + + // Initialize our globals to the current depth (0), and the max depth + // (max_recursion). + global_state globals{0, max_recursion}; + bp::callback_error_handler error_handler(errors_callback); + // Make a new parser that includes the globals and error handler. + auto const parser = bp::with_error_handler( + bp::with_globals(value_p, globals), error_handler); + + try { + // Parse. If no exception is thrown, due to: a failed expectation + // (such as foo > bar, where foo matches the input, but then bar + // cannot); or because the nesting depth is exceeded; we simply + // return the result of the parse. The result will contextually + // convert to false if the parse failed. Note that the + // failed-expectation exception is caught internally, and used to + // generate an error message. + return bp::parse(range, parser, ws); + } catch (excessive_nesting<iter_t> const & e) { + // If we catch an excessive_nesting exception, just report it + // and return an empty/failure result. + if (errors_callback) { + std::string const message = "error: Exceeded maximum number (" + + std::to_string(max_recursion) + + ") of open arrays and/or objects"; + std::stringstream ss; + bp::write_formatted_message( + ss, "", range.begin(), e.iter, range.end(), message); + errors_callback(ss.str()); + } + } + + return {}; + } + +} + +std::string file_slurp(std::ifstream & ifs) +{ + std::string retval; + while (ifs) { + char const c = ifs.get(); + retval += c; + } + if (!retval.empty() && retval.back() == -1) + retval.pop_back(); + return retval; +} + +int main(int argc, char * argv[]) +{ + if (argc < 2) { + std::cerr << "A filename to parse is required.\n"; + exit(1); + } + + std::ifstream ifs(argv[1]); + if (!ifs) { + std::cerr << "Unable to read file '" << argv[1] << "'.\n"; + exit(1); + } + + // Read in the entire file. + std::string const file_contents = file_slurp(ifs); + // Parse the contents. If there is an error, just stream it to cerr. + auto json = json::parse( + file_contents, [](std::string const & msg) { std::cerr << msg; }); + if (!json) { + std::cerr << "Parse failure.\n"; + exit(1); + } + + std::cout << "Parse successful; contents:\n" << *json << "\n"; + + return 0; +} ++
+
+
+ This is just like the previous extended JSON parser example, except that
+ it drops all the code that defines a JSON value, array, object, etc. It communicates
+ events within the parse, and the value associated with each event. For instance,
+ when a string is parsed, a callback is called that indicates this, along
+ with the resulting std::string.
+
+
+#include <boost/parser/parser.hpp> +#include <boost/parser/transcode_view.hpp> + +#include <fstream> +#include <vector> +#include <climits> + + +namespace json { + + namespace bp = ::boost::parser; + using namespace bp::literals; + + template<typename Iter> + struct excessive_nesting : std::runtime_error + { + excessive_nesting(Iter it) : + runtime_error("excessive_nesting"), iter(it) + {} + Iter iter; + }; + + + struct global_state + { + int recursive_open_count = 0; + int max_recursive_open_count = 0; + }; + + struct double_escape_locals + { + int first_surrogate = 0; + }; + + + bp::rule<class ws> const ws = "whitespace"; + + bp::rule<class string_char, uint32_t> const string_char = + "code point (code points <= U+001F must be escaped)"; + bp::rule<class four_hex_digits, uint32_t> const hex_4 = + "four hexadecimal digits"; + bp::rule<class escape_seq, uint32_t> const escape_seq = + "\\uXXXX hexadecimal escape sequence"; + bp::rule<class escape_double_seq, uint32_t, double_escape_locals> const + escape_double_seq = "\\uXXXX hexadecimal escape sequence"; + bp::rule<class single_escaped_char, uint32_t> const single_escaped_char = + "'\"', '\\', '/', 'b', 'f', 'n', 'r', or 't'"; + + bp::callback_rule<class null_tag> const null = "null"; + + // Since we don't create polymorphic values in this parse, we need to be + // able to report that we parsed a bool, so we need a callback rule for + // this. + bp::callback_rule<class bool_tag, bool> const bool_p = "boolean"; + + bp::callback_rule<class string_tag, std::string> const string = "string"; + bp::callback_rule<class number_tag, double> const number = "number"; + + // object_element is broken up into the key (object_element_key) and the + // whole thing (object_element). This was done because the value after + // the ':' may have many parts. It may be an array, for example. This + // implies that we need to report that we have the string part of the + // object-element, and that the rest -- the value -- is coming. + bp::callback_rule<class object_element_key_tag, std::string> const + object_element_key = "string"; + bp::rule<class object_element_tag> const object_element = "object-element"; + + // object gets broken up too, to enable the reporting of the beginning and + // end of the object when '{' or '}' is parsed, respectively. The same + // thing is done for array, below. + bp::callback_rule<class object_open_tag> const object_open = "'{'"; + bp::callback_rule<class object_close_tag> const object_close = "'}'"; + bp::rule<class object_tag> const object = "object"; + + bp::callback_rule<class array_open_tag> const array_open = "'['"; + bp::callback_rule<class array_close_tag> const array_close = "']'"; + bp::rule<class array_tag> const array = "array"; + + // value no longer produces an attribute, and it has no callback either. + // Each individual possible kind of value (string, array, etc.) gets + // reported separately. + bp::rule<class value_tag> const value = "value"; + + + // Since we use these tag types as function parameters in the callbacks, + // they need to be complete types. + class null_tag {}; + class bool_tag {}; + class string_tag {}; + class number_tag {}; + class object_element_key_tag {}; + class object_open_tag {}; + class object_close_tag {}; + class array_open_tag {}; + class array_close_tag {}; + + + auto const ws_def = '\x09'_l | '\x0a' | '\x0d' | '\x20'; + + auto first_hex_escape = [](auto & ctx) { + auto & locals = _locals(ctx); + uint32_t const cu = _attr(ctx); + if (!boost::parser::detail::text::high_surrogate(cu)) + _pass(ctx) = false; + else + locals.first_surrogate = cu; + }; + auto second_hex_escape = [](auto & ctx) { + auto & locals = _locals(ctx); + uint32_t const cu = _attr(ctx); + if (!boost::parser::detail::text::low_surrogate(cu)) { + _pass(ctx) = false; + } else { + uint32_t const high_surrogate_min = 0xd800; + uint32_t const low_surrogate_min = 0xdc00; + uint32_t const surrogate_offset = + 0x10000 - (high_surrogate_min << 10) - low_surrogate_min; + uint32_t const first_cu = locals.first_surrogate; + _val(ctx) = (first_cu << 10) + cu + surrogate_offset; + } + }; + + bp::parser_interface<bp::uint_parser<uint32_t, 16, 4, 4>> const hex_4_def; + + auto const escape_seq_def = "\\u" > hex_4; + + auto const escape_double_seq_def = + escape_seq[first_hex_escape] >> escape_seq[second_hex_escape]; + + bp::symbols<uint32_t> const single_escaped_char_def = { + {"\"", 0x0022u}, + {"\\", 0x005cu}, + {"/", 0x002fu}, + {"b", 0x0008u}, + {"f", 0x000cu}, + {"n", 0x000au}, + {"r", 0x000du}, + {"t", 0x0009u}}; + + auto const string_char_def = escape_double_seq | escape_seq | + ('\\'_l > single_escaped_char) | + (bp::cp - bp::char_(0x0000u, 0x001fu)); + + auto const null_def = "null"_l; + + auto const bool_p_def = bp::bool_; + + auto const string_def = bp::lexeme['"' >> *(string_char - '"') > '"']; + + auto parse_double = [](auto & ctx) { + auto const cp_range = _attr(ctx); + auto cp_first = cp_range.begin(); + auto const cp_last = cp_range.end(); + + auto const result = bp::prefix_parse(cp_first, cp_last, bp::double_); + if (result) { + _val(ctx) = *result; + } else { + // This would be more efficient if we used + // boost::container::small_vector, or std::inplace_vector from + // C++26. + std::vector<char> chars(cp_first, cp_last); + auto const chars_first = &*chars.begin(); + auto chars_last = chars_first + chars.size(); + _val(ctx) = std::strtod(chars_first, &chars_last); + } + }; + + auto const number_def = + bp::raw[bp::lexeme + [-bp::char_('-') >> + (bp::char_('1', '9') >> *bp::digit | bp::char_('0')) >> + -(bp::char_('.') >> +bp::digit) >> + -(bp::char_("eE") >> -bp::char_("+-") >> +bp::digit)]] + [parse_double]; + + // The object_element_key parser is exactly the same as the string parser. + // Note that we did *not* use string here, though; we used string_def. If + // we had used string, its callback would have been called first, and + // worse still, since it moves its attribute, the callback for + // object_element_key would always report the empty string, because the + // string callback would have consumed it first. + auto const object_element_key_def = string_def; + + auto const object_element_def = object_element_key > ':' > value; + + // This is a very straightforward way to write object_def when we know we + // don't care about attribute-generating (non-callback) parsing. If we + // wanted to support both modes in one parser definition, we could have + // written: + // auto const object_open_def = eps; + // auto const object_close_def = eps; + // auto const object_def = '{' >> object_open >> + // -(object_element % ',') > + // '}' >> object_close; + auto const object_open_def = '{'_l; + auto const object_close_def = '}'_l; + auto const object_def = object_open >> + -(object_element % ',') > object_close; + + auto const array_open_def = '['_l; + auto const array_close_def = ']'_l; + auto const array_def = array_open >> -(value % ',') > array_close; + + auto const value_def = number | bool_p | null | string | array | object; + + BOOST_PARSER_DEFINE_RULES( + ws, + hex_4, + escape_seq, + escape_double_seq, + single_escaped_char, + string_char, + null, + bool_p, + string, + number, + object_element_key, + object_element, + object_open, + object_close, + object, + array_open, + array_close, + array, + value); + + // The parse function loses its attribute from the return type; now the + // return type is just bool. + template<typename Callbacks> + bool parse( + std::string_view str, + std::string_view filename, + Callbacks const & callbacks, + int max_recursion = 512) + { + auto const range = boost::parser::as_utf32(str); + using iter_t = decltype(range.begin()); + + if (max_recursion <= 0) + max_recursion = INT_MAX; + + global_state globals{0, max_recursion}; + // This is a different error handler from the json.cpp example, just + // to show different options. + bp::stream_error_handler error_handler(filename); + auto const parser = bp::with_error_handler( + bp::with_globals(value, globals), error_handler); + + try { + // This is identical to the parse() call in json.cpp, except that + // it is callback_parse() instead, and it takes the callbacks + // parameter. + return bp::callback_parse(range, parser, ws, callbacks); + } catch (excessive_nesting<iter_t> const & e) { + std::string const message = "error: Exceeded maximum number (" + + std::to_string(max_recursion) + + ") of open arrays and/or objects"; + bp::write_formatted_message( + std::cout, + filename, + range.begin(), + e.iter, + range.end(), + message); + } + + return {}; + } + +} + +std::string file_slurp(std::ifstream & ifs) +{ + std::string retval; + while (ifs) { + char const c = ifs.get(); + retval += c; + } + if (!retval.empty() && retval.back() == -1) + retval.pop_back(); + return retval; +} + +// This is our callbacks-struct. It has a callback for each of the kinds of +// callback rules in our parser. If one were missing, you'd get a pretty +// nasty template instantiation error. Note that these are all const members; +// callback_parse() takes the callbacks object by constant reference. +struct json_callbacks +{ + void operator()(json::null_tag) const { std::cout << "JSON null value\n"; } + void operator()(json::bool_tag, bool b) const + { + indent(); + std::cout << "JSON bool " << (b ? "true" : "false") << "\n"; + } + void operator()(json::string_tag, std::string s) const + { + indent(); + std::cout << "JSON string \"" << s << "\"\n"; + } + void operator()(json::number_tag, double d) const + { + indent(); + std::cout << "JSON number " << d << "\n"; + } + void operator()(json::object_element_key_tag, std::string key) const + { + indent(); + std::cout << "JSON object element with key \"" << key + << "\" and value...\n"; + } + void operator()(json::object_open_tag) const + { + indent(1); + std::cout << "Beginning of JSON object.\n"; + } + void operator()(json::object_close_tag) const + { + indent(-1); + std::cout << "End of JSON object.\n"; + } + void operator()(json::array_open_tag) const + { + indent(1); + std::cout << "Beginning of JSON array.\n"; + } + void operator()(json::array_close_tag) const + { + indent(-1); + std::cout << "End of JSON array.\n"; + } + + void indent(int level_bump = 0) const + { + if (level_bump < 0) + indent_.resize(indent_.size() - 2); + std::cout << indent_; + if (0 < level_bump) + indent_ += " "; + } + mutable std::string indent_; +}; + +int main(int argc, char * argv[]) +{ + if (argc < 2) { + std::cerr << "A filename to parse is required.\n"; + exit(1); + } + + std::ifstream ifs(argv[1]); + if (!ifs) { + std::cerr << "Unable to read file '" << argv[1] << "'.\n"; + exit(1); + } + + std::string const file_contents = file_slurp(ifs); + bool success = json::parse(file_contents, argv[1], json_callbacks{}); + if (success) { + std::cout << "Parse successful!\n"; + } else { + std::cerr << "Parse failure.\n"; + exit(1); + } + + return 0; +} ++
+
++ Note that here, I was keeping things simple to stay close to the previous + parser. If you want to do callback parsing, you might want that because you're + limited in how much memory you can allocate, or because the JSON you're parsing + is really huge, and you only need to retain certain parts of it. +
+
+ If this is the case, one possible change that might be appealing would be
+ to reduce the memory allocations. The only memory allocation that the parser
+ does is the one we told it to do — it allocates std::strings.
+ If we instead used boost::container::small_vector<char, 1024>,
+ it would only ever allocate if it encountered a string larger than 1024 bytes.
+ We would also want to change the callbacks to take const &
+ parameters instead of using pass-by-value.
+
+ Boost.Parser is a parser + combinator library. That is, it consists of a set of low-level primitive + parsers, and operations that can be used to combine those parsers into more + complicated parsers. +
+
+ There are primitive parsers that parse epsilon (the empty
+ string), chars, ints, floats,
+ etc.
+
+ There are operations which combine parsers to create new parsers. For instance,
+ the Kleene star
+ operation takes an existing parser p
+ and creates a new parser that matches zero or more occurrences of whatever
+ p matches. Both callable objects
+ and operator overloads are used for the combining operations. For instance,
+ operator*()
+ is used for Kleene star,
+ and you can also write repeat(n)[p] to create
+ a parser for exactly n repetitions
+ of p.
+
+ Boost.Parser also tries to accommodate the multiple ways that people often
+ want to get a parse result out of their parsing code. Some parsing may best
+ be done by returning an object that represents the result of the parse. Other
+ parsing may best be done by filling in a preexisting data structure. Yet other
+ parsing may best be done by parsing small sections of a large document, and
+ reporting the results of subparsers as they are finished, via callbacks. Boost.Parser
+ accommodates all these ways of working, and even makes it possible to do callback-based
+ or non-callback-based parsing without rewriting any code (except by changing
+ the top-level call from parse()
+ to callback_parse()).
+
+ All of Boost.Parser's public interfaces are sentinel- and range-friendly, just
+ like the interfaces in std::ranges.
+
+ Boost.Parser is Unicode-aware through and through. When you parse ranges of
+ char, Boost.Parser does not assume
+ any particular encoding — not Unicode or any other encoding. Parsing
+ of inputs other than plain chars
+ assumes that the input is Unicode. In the Unicode-aware code paths, all parsing
+ is done by matching code points. This means that you can feed UTF-8 strings
+ into Boost.Parser, both as input and within your parser, and the right sort
+ of matching occurs. For instance, if your parser is trying to match repetitions
+ of the char '\xcc'
+ (which is a lead byte from a UTF-8 sequence, and so is malformed UTF-8 if not
+ followed by an appropriate UTF-8 code unit), it will not
+ match the start of "\xcc\x80"
+ (UTF-8 for the code point U+0300). Boost.Parser knows that the matching must
+ be whole-code-point, and so it interprets the char
+ '\xcc' as the code point U+00CC.
+
+ Error reporting is important to get right, and it is important to make errors + easy to understand, especially for end-users. Boost.Parser produces runtime + parse error messages that are very similar to the diagnostics that you get + when compiling with GCC and Clang (it even supports warnings that don't fail + the parse). The exact token associated with a diagnostic can be reported to + the user, with the containing line quoted, and with a marker pointing right + at the token. Boost.Parser takes care of this for you; your parser does not + need to include any special code to make this happen. Of course, you can also + replace the error handler entirely, if it doesn't fit your needs. +
+
+ Debugging complex parsers can be a real nightmare. Boost.Parser makes it trivial
+ to get a trace of your entire parse, with easy-to-read (and very verbose) indications
+ of where each part of the trace is within the parse, the state of values produced
+ by the parse, etc. Again, you don't need to write any code to make this happen
+ — you just pass a parameter to parse().
+
+ Dependencies are still a nightmare in C++, so Boost.Parser can be used as a + purely standalone library, independent of Boost. +
+
+ The majority use case for parsing with Boost.Parser is Unicode-aware parsing.
+ Those users should be able simply to use char_ and have it "just
+ work". In the case of Unicode, that "just working" implies that
+ every element of the input range should be a code point.
+
+ Some users will insist that their parsing needs are entirely ASCII. Yet other
+ users cannot use Unicode, because they use some encoding that is not a subset
+ of the Unicode encoding, like EBCDIC. For these users, they can just parse
+ input sequences of char, and that will "just work" for
+ them. For them, this means that every element of the input range that is parsed
+ should be a char.
+
+ This is exactly what char_
+ does, and why it does it.
+
+ Yes, and it's generally not a good programming practice to use a type which + is so loose (anything can be assigned to it, it's implicitly convertible to + anything, etc.). However, it is better than the alternative. Consider this + semantic action: +
+[](auto & ctx) { _attr(ctx) = 42; }
+
+
+ If attached to an int-parser, this is fine. If attached to an epsilon parser
+ (which has no attribute), this silently does nothing. However, in debug mode
+ the assignment in this semantic action will hit a BOOST_ASSERT(false),
+ and lead the user to a big inline comment about how they got there. This is
+ a far more understandable failure mode for most programmers than the arbitrarily-deep
+ template instantiation stack — and baffling type of ctx
+ — that would result if the expression _attr(ctx) were ill-formed.
+
+ The use of none
+ turns an entirely compile-time debugging operation into a run-time debugging
+ one. Usually, this is the opposite of what we want as C++ users. In light of
+ just how inscrutable error messages are that come from parser combinator libraries,
+ using your favorite debugger to step through the stack to diagnose the problem
+ is a much faster way to fix problems.
+
![]() |
+Note | +
|---|---|
+ The example below is taken from an older version of Boost.Parser, so some + of the symbol names may be unfamiliar. However, it's a real example, and + it applies just as well to later versions of Boost.Parser. + |
+ To demonstrate the difference, I added these three lines to the end of the
+ object_init lambda in the Parsing
+ JSON:
+
auto x = _locals(ctx); +if (x) + std::cout << "Oops! What x?"; ++
+ The parser that object_init is attached to has no locals. Here
+ is an example of how you can investigate this error at run time:
+
+
+$ gdb --args example/json ../meta/libraries.json +GNU gdb (Ubuntu 9.1-0ubuntu1) 9.1 +Copyright (C) 2020 Free Software Foundation, Inc. +License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +Type "show copying" and "show warranty" for details. +This GDB was configured as "x86_64-linux-gnu". +Type "show configuration" for configuration details. +For bug reporting instructions, please see: +<http://www.gnu.org/software/gdb/bugs/>. +Find the GDB manual and other documentation resources online at: + <http://www.gnu.org/software/gdb/documentation/>. + +For help, type "help". +Type "apropos word" to search for commands related to "word"... +Reading symbols from example/json... +(gdb) r +Starting program: /home/tzlaine/parser/build/example/json ../meta/libraries.json +json: /home/tzlaine/parser/include/boost/parser/parser.hpp:344: void boost::parser::none::fail() const: Assertion `false' failed. + +Program received signal SIGABRT, Aborted. +__GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 +50 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory. +(gdb) up +#1 0x00007ffff7bdf859 in __GI_abort () at abort.c:79 +79 abort.c: No such file or directory. +(gdb) +#2 0x00007ffff7bdf729 in __assert_fail_base ( + fmt=0x7ffff7d75588 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", + assertion=0x5555555f7a2c "false", + file=0x5555555f7dd8 "/home/tzlaine/parser/include/boost/parser/parser.hpp", line=344, + function=<optimized out>) at assert.c:92 +92 assert.c: No such file or directory. +(gdb) +#3 0x00007ffff7bf0f36 in __GI___assert_fail (assertion=0x5555555f7a2c "false", + file=0x5555555f7dd8 "/home/tzlaine/parser/include/boost/parser/parser.hpp", line=344, + function=0x5555555f7db0 "void boost::parser::none::fail() const") at assert.c:101 +101 in assert.c +(gdb) +#4 0x000055555555f99b in boost::parser::none::fail (this=0x7fffffffc2f0) + at /home/tzlaine/parser/include/boost/parser/parser.hpp:344 +344 BOOST_ASSERT(false); +(gdb) +#5 0x000055555559d380 in boost::parser::none::operator bool<bool>() const (this=0x7fffffffc2f0) + at /home/tzlaine/parser/include/boost/parser/parser.hpp:83 +83 fail(); +(gdb) +#6 0x0000555555590a6b in _ZNK4json11object_initMUlRT_E_clIKN5boost4hana6detail8map_implINS6_10hash_tableIJNS6_6bucketINS4_6parser6detail9begin_tagEJLm0EEEENS9_INSB_7end_tagEJLm1EEEENS9_INSB_8pass_tagEJLm2EEEENS9_INSB_10locals_tagEJLm3EEEENS9_INSB_15rule_params_tagEJLm4EEEENS9_INSB_11globals_tagEJLm5EEEENS9_INSB_16trace_indent_tagEJLm6EEEENS9_INSB_17error_handler_tagEJLm7EEEENS9_INSB_13callbacks_tagEJLm8EEEENS9_INSB_22symbol_table_tries_tagEJLm9EEEENS9_INSB_7val_tagEJLm10EEEENS9_INSB_8attr_tagEJLm11EEEENS9_INSB_9where_tagEJLm12EEEEEEENS5_11basic_tupleIJNS5_4pairINS5_9type_implISC_E1_ENS4_4text20utf_8_to_32_iteratorIPKcS1B_NS18_25use_replacement_characterEEEEENS14_INS15_ISE_E1_ES1D_EENS14_INS15_ISG_E1_EPbEENS14_INS15_ISI_E1_ENSB_4nopeEEENS14_INS15_ISK_E1_ES1O_EENS14_INS15_ISM_E1_EPNS_12global_stateEEENS14_INS15_ISO_E1_EPiEENS14_INS15_ISQ_E1_EPKNSA_22callback_error_handlerEEENS14_INS15_ISS_E1_ES1O_EENS14_INS15_ISU_E1_EPSt3mapIPvNS4_3anyESt4lessIS2E_ESaISt4pairIKS2E_S2F_EEEEENS14_INS15_ISW_E1_EPNS_5valueEEENS14_INS15_ISY_E1_EPS1O_EENS14_INS15_IS10_E1_EPKNSA_4viewIS1D_S1D_EEEEEEEEEEEDaS1_ (__closure=0x7fffffffc9b1, ctx=...) + at /home/tzlaine/parser/example/json.cpp:103 +103 if (x) +(gdb) l +98 auto & globals = _globals(ctx); +99 if (globals.max_recursive_open_count < ++globals.recursive_open_count) +100 throw excessive_nesting(_where(ctx).begin()); +101 _val(ctx) = object(); +102 auto x = _locals(ctx); +103 if (x) +104 std::cout << "Oops! What x?"; +105 }; +106 +107 // We need object_insert because we can't just insert into the json::value +(gdb) ++
+
++ To find the problem, I just had to move up the stack, with GDB's "up" + command, until I saw that I was in my own code. Then I listed the code surrounding + the offending line, as you see above. If I were to keep going up the stack, + I would move through the exact chain of template instantiations — at + the exact lines of code where they appear — in a few seconds. +
+
+ This is how the same problem looks with BOOST_PARSER_NO_RUNTIME_ASSERTIONS
+ defined, the definition of which makes the code we added ill-formed instead
+ of a run time error:
+
+
+$ make json
+Scanning dependencies of target json
+[ 50%] Building CXX object example/CMakeFiles/json.dir/json.cpp.o
+
+/home/tzlaine/parser/example/json.cpp: In instantiation of ‘json::<lambda(auto:58&)> [with auto:58 = const boost::hana::detail::map_impl<boost::hana::detail::hash_table<boost::hana::detail::bucket<boost::parser::detail::begin_tag, 0>, boost::hana::detail::bucket<boost::parser::detail::end_tag, 1>, boost::hana::detail::bucket<boost::parser::detail::pass_tag, 2>, boost::hana::detail::bucket<boost::parser::detail::locals_tag, 3>, boost::hana::detail::bucket<boost::parser::detail::rule_params_tag, 4>, boost::hana::detail::bucket<boost::parser::detail::globals_tag, 5>, boost::hana::detail::bucket<boost::parser::detail::trace_indent_tag, 6>, boost::hana::detail::bucket<boost::parser::detail::error_handler_tag, 7>, boost::hana::detail::bucket<boost::parser::detail::callbacks_tag, 8>, boost::hana::detail::bucket<boost::parser::detail::symbol_table_tries_tag, 9>, boost::hana::detail::bucket<boost::parser::detail::val_tag, 10>, boost::hana::detail::bucket<boost::parser::detail::attr_tag, 11>, boost::hana::detail::bucket<boost::parser::detail::where_tag, 12> >, boost::hana::basic_tuple<boost::hana::pair<boost::hana::type_impl<boost::parser::detail::begin_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::end_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::pass_tag>::_, bool*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::locals_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::rule_params_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::globals_tag>::_, json::global_state*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::trace_indent_tag>::_, int*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::error_handler_tag>::_, const boost::parser::callback_error_handler*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::callbacks_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::symbol_table_tries_tag>::_, std::map<void*, boost::any, std::less<void*>, std::allocator<std::pair<void* const, boost::any> > >*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::val_tag>::_, json::value*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::attr_tag>::_, boost::parser::detail::nope*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::where_tag>::_, const boost::parser::view<boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >*> > >]’:
+/home/tzlaine/parser/include/boost/parser/parser.hpp:3216:24: required from ‘void boost::parser::action_parser<Parser, Action>::call(boost::hana::bool_<UseCallbacks>, Iter&, Sentinel, const Context&, const SkipParser&, boost::parser::detail::flags, bool&, Attribute&) const [with bool UseCallbacks = false; Iter = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Sentinel = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Context = boost::hana::detail::map_impl<boost::hana::detail::hash_table<boost::hana::detail::bucket<boost::parser::detail::begin_tag, 0>, boost::hana::detail::bucket<boost::parser::detail::end_tag, 1>, boost::hana::detail::bucket<boost::parser::detail::pass_tag, 2>, boost::hana::detail::bucket<boost::parser::detail::attr_tag, 3>, boost::hana::detail::bucket<boost::parser::detail::locals_tag, 4>, boost::hana::detail::bucket<boost::parser::detail::rule_params_tag, 5>, boost::hana::detail::bucket<boost::parser::detail::globals_tag, 6>, boost::hana::detail::bucket<boost::parser::detail::trace_indent_tag, 7>, boost::hana::detail::bucket<boost::parser::detail::error_handler_tag, 8>, boost::hana::detail::bucket<boost::parser::detail::callbacks_tag, 9>, boost::hana::detail::bucket<boost::parser::detail::symbol_table_tries_tag, 10>, boost::hana::detail::bucket<boost::parser::detail::val_tag, 11> >, boost::hana::basic_tuple<boost::hana::pair<boost::hana::type_impl<boost::parser::detail::begin_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::end_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::pass_tag>::_, bool*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::attr_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::locals_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::rule_params_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::globals_tag>::_, json::global_state*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::trace_indent_tag>::_, int*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::error_handler_tag>::_, const boost::parser::callback_error_handler*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::callbacks_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::symbol_table_tries_tag>::_, std::map<void*, boost::any, std::less<void*>, std::allocator<std::pair<void* const, boost::any> > >*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::val_tag>::_, json::value*> > >; SkipParser = boost::parser::rule<json::ws>; Attribute = boost::parser::detail::nope; Parser = boost::parser::omit_parser<boost::parser::char_parser<char, void> >; Action = json::<lambda(auto:58&)>; boost::hana::bool_<UseCallbacks> = boost::hana::integral_constant<bool, false>]’
+/home/tzlaine/parser/include/boost/parser/parser.hpp:3175:17: required from ‘boost::parser::detail::nope boost::parser::action_parser<Parser, Action>::call(boost::hana::bool_<UseCallbacks>, Iter&, Sentinel, const Context&, const SkipParser&, boost::parser::detail::flags, bool&) const [with bool UseCallbacks = false; Iter = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Sentinel = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Context = boost::hana::detail::map_impl<boost::hana::detail::hash_table<boost::hana::detail::bucket<boost::parser::detail::begin_tag, 0>, boost::hana::detail::bucket<boost::parser::detail::end_tag, 1>, boost::hana::detail::bucket<boost::parser::detail::pass_tag, 2>, boost::hana::detail::bucket<boost::parser::detail::attr_tag, 3>, boost::hana::detail::bucket<boost::parser::detail::locals_tag, 4>, boost::hana::detail::bucket<boost::parser::detail::rule_params_tag, 5>, boost::hana::detail::bucket<boost::parser::detail::globals_tag, 6>, boost::hana::detail::bucket<boost::parser::detail::trace_indent_tag, 7>, boost::hana::detail::bucket<boost::parser::detail::error_handler_tag, 8>, boost::hana::detail::bucket<boost::parser::detail::callbacks_tag, 9>, boost::hana::detail::bucket<boost::parser::detail::symbol_table_tries_tag, 10>, boost::hana::detail::bucket<boost::parser::detail::val_tag, 11> >, boost::hana::basic_tuple<boost::hana::pair<boost::hana::type_impl<boost::parser::detail::begin_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::end_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::pass_tag>::_, bool*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::attr_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::locals_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::rule_params_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::globals_tag>::_, json::global_state*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::trace_indent_tag>::_, int*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::error_handler_tag>::_, const boost::parser::callback_error_handler*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::callbacks_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::symbol_table_tries_tag>::_, std::map<void*, boost::any, std::less<void*>, std::allocator<std::pair<void* const, boost::any> > >*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::val_tag>::_, json::value*> > >; SkipParser = boost::parser::rule<json::ws>; Parser = boost::parser::omit_parser<boost::parser::char_parser<char, void> >; Action = json::<lambda(auto:58&)>; boost::hana::bool_<UseCallbacks> = boost::hana::integral_constant<bool, false>]’
+/home/tzlaine/parser/include/boost/parser/parser.hpp:2745:35: required from ‘auto boost::parser::seq_parser<ParserTuple, BacktrackingTuple>::dummy_use_parser_t<UseCallbacks, Iter, Sentinel, Context, SkipParser>::operator()(const Parser&) const [with Parser = boost::parser::action_parser<boost::parser::omit_parser<boost::parser::char_parser<char, void> >, json::<lambda(auto:58&)> >; bool UseCallbacks = false; Iter = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Sentinel = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Context = boost::hana::detail::map_impl<boost::hana::detail::hash_table<boost::hana::detail::bucket<boost::parser::detail::begin_tag, 0>, boost::hana::detail::bucket<boost::parser::detail::end_tag, 1>, boost::hana::detail::bucket<boost::parser::detail::pass_tag, 2>, boost::hana::detail::bucket<boost::parser::detail::attr_tag, 3>, boost::hana::detail::bucket<boost::parser::detail::locals_tag, 4>, boost::hana::detail::bucket<boost::parser::detail::rule_params_tag, 5>, boost::hana::detail::bucket<boost::parser::detail::globals_tag, 6>, boost::hana::detail::bucket<boost::parser::detail::trace_indent_tag, 7>, boost::hana::detail::bucket<boost::parser::detail::error_handler_tag, 8>, boost::hana::detail::bucket<boost::parser::detail::callbacks_tag, 9>, boost::hana::detail::bucket<boost::parser::detail::symbol_table_tries_tag, 10>, boost::hana::detail::bucket<boost::parser::detail::val_tag, 11> >, boost::hana::basic_tuple<boost::hana::pair<boost::hana::type_impl<boost::parser::detail::begin_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::end_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::pass_tag>::_, bool*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::attr_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::locals_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::rule_params_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::globals_tag>::_, json::global_state*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::trace_indent_tag>::_, int*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::error_handler_tag>::_, const boost::parser::callback_error_handler*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::callbacks_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::symbol_table_tries_tag>::_, std::map<void*, boost::any, std::less<void*>, std::allocator<std::pair<void* const, boost::any> > >*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::val_tag>::_, json::value*> > >; SkipParser = boost::parser::rule<json::ws>; ParserTuple = boost::hana::tuple<boost::parser::action_parser<boost::parser::omit_parser<boost::parser::char_parser<char, void> >, json::<lambda(auto:58&)> >, boost::parser::opt_parser<boost::parser::delimited_seq_parser<boost::parser::action_parser<boost::parser::rule_parser<false, json::object_element, boost::hana::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, json::value>, boost::parser::detail::nope, boost::parser::detail::nope>, json::<lambda(auto:59&)> >, boost::parser::omit_parser<boost::parser::char_parser<char, void> > > >, boost::parser::omit_parser<boost::parser::char_parser<char, void> > >; BacktrackingTuple = boost::hana::tuple<boost::hana::integral_constant<bool, true>, boost::hana::integral_constant<bool, true>, boost::hana::integral_constant<bool, false> >]’
+/home/tzlaine/boost_1_71_0/boost/hana/transform.hpp:62:42: required from ‘constexpr auto boost::hana::transform_impl<S, boost::hana::when<boost::hana::Sequence<S>::value> >::transformer<F>::operator()(Xs&& ...) const [with Xs = {const boost::parser::action_parser<boost::parser::omit_parser<boost::parser::char_parser<char, void> >, json::<lambda(auto:58&)> >&, const boost::parser::opt_parser<boost::parser::delimited_seq_parser<boost::parser::action_parser<boost::parser::rule_parser<false, json::object_element, boost::hana::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, json::value>, boost::parser::detail::nope, boost::parser::detail::nope>, json::<lambda(auto:59&)> >, boost::parser::omit_parser<boost::parser::char_parser<char, void> > > >&, const boost::parser::omit_parser<boost::parser::char_parser<char, void> >&}; F = const boost::parser::seq_parser<boost::hana::tuple<boost::parser::action_parser<boost::parser::omit_parser<boost::parser::char_parser<char, void> >, json::<lambda(auto:58&)> >, boost::parser::opt_parser<boost::parser::delimited_seq_parser<boost::parser::action_parser<boost::parser::rule_parser<false, json::object_element, boost::hana::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, json::value>, boost::parser::detail::nope, boost::parser::detail::nope>, json::<lambda(auto:59&)> >, boost::parser::omit_parser<boost::parser::char_parser<char, void> > > >, boost::parser::omit_parser<boost::parser::char_parser<char, void> > >, boost::hana::tuple<boost::hana::integral_constant<bool, true>, boost::hana::integral_constant<bool, true>, boost::hana::integral_constant<bool, false> > >::dummy_use_parser_t<false, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>, boost::hana::detail::map_impl<boost::hana::detail::hash_table<boost::hana::detail::bucket<boost::parser::detail::begin_tag, 0>, boost::hana::detail::bucket<boost::parser::detail::end_tag, 1>, boost::hana::detail::bucket<boost::parser::detail::pass_tag, 2>, boost::hana::detail::bucket<boost::parser::detail::attr_tag, 3>, boost::hana::detail::bucket<boost::parser::detail::locals_tag, 4>, boost::hana::detail::bucket<boost::parser::detail::rule_params_tag, 5>, boost::hana::detail::bucket<boost::parser::detail::globals_tag, 6>, boost::hana::detail::bucket<boost::parser::detail::trace_indent_tag, 7>, boost::hana::detail::bucket<boost::parser::detail::error_handler_tag, 8>, boost::hana::detail::bucket<boost::parser::detail::callbacks_tag, 9>, boost::hana::detail::bucket<boost::parser::detail::symbol_table_tries_tag, 10>, boost::hana::detail::bucket<boost::parser::detail::val_tag, 11> >, boost::hana::basic_tuple<boost::hana::pair<boost::hana::type_impl<boost::parser::detail::begin_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::end_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::pass_tag>::_, bool*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::attr_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::locals_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::rule_params_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::globals_tag>::_, json::global_state*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::trace_indent_tag>::_, int*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::error_handler_tag>::_, const boost::parser::callback_error_handler*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::callbacks_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::symbol_table_tries_tag>::_, std::map<void*, boost::any, std::less<void*>, std::allocator<std::pair<void* const, boost::any> > >*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::val_tag>::_, json::value*> > >, boost::parser::rule<json::ws> >*; S = boost::hana::tuple_tag]’
+/home/tzlaine/boost_1_71_0/boost/hana/basic_tuple.hpp:115:39: required from ‘static constexpr decltype(auto) boost::hana::unpack_impl<boost::hana::basic_tuple_tag>::apply(const boost::hana::detail::basic_tuple_impl<std::integer_sequence<long unsigned int, _Idx ...>, Xn ...>&, F&&) [with long unsigned int ...i = {0, 1, 2}; Xn = {boost::parser::action_parser<boost::parser::omit_parser<boost::parser::char_parser<char, void> >, json::<lambda(auto:58&)> >, boost::parser::opt_parser<boost::parser::delimited_seq_parser<boost::parser::action_parser<boost::parser::rule_parser<false, json::object_element, boost::hana::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, json::value>, boost::parser::detail::nope, boost::parser::detail::nope>, json::<lambda(auto:59&)> >, boost::parser::omit_parser<boost::parser::char_parser<char, void> > > >, boost::parser::omit_parser<boost::parser::char_parser<char, void> >}; F = boost::hana::transform_impl<boost::hana::tuple_tag, boost::hana::when<true> >::transformer<const boost::parser::seq_parser<boost::hana::tuple<boost::parser::action_parser<boost::parser::omit_parser<boost::parser::char_parser<char, void> >, json::<lambda(auto:58&)> >, boost::parser::opt_parser<boost::parser::delimited_seq_parser<boost::parser::action_parser<boost::parser::rule_parser<false, json::object_element, boost::hana::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, json::value>, boost::parser::detail::nope, boost::parser::detail::nope>, json::<lambda(auto:59&)> >, boost::parser::omit_parser<boost::parser::char_parser<char, void> > > >, boost::parser::omit_parser<boost::parser::char_parser<char, void> > >, boost::hana::tuple<boost::hana::integral_constant<bool, true>, boost::hana::integral_constant<bool, true>, boost::hana::integral_constant<bool, false> > >::dummy_use_parser_t<false, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>, boost::hana::detail::map_impl<boost::hana::detail::hash_table<boost::hana::detail::bucket<boost::parser::detail::begin_tag, 0>, boost::hana::detail::bucket<boost::parser::detail::end_tag, 1>, boost::hana::detail::bucket<boost::parser::detail::pass_tag, 2>, boost::hana::detail::bucket<boost::parser::detail::attr_tag, 3>, boost::hana::detail::bucket<boost::parser::detail::locals_tag, 4>, boost::hana::detail::bucket<boost::parser::detail::rule_params_tag, 5>, boost::hana::detail::bucket<boost::parser::detail::globals_tag, 6>, boost::hana::detail::bucket<boost::parser::detail::trace_indent_tag, 7>, boost::hana::detail::bucket<boost::parser::detail::error_handler_tag, 8>, boost::hana::detail::bucket<boost::parser::detail::callbacks_tag, 9>, boost::hana::detail::bucket<boost::parser::detail::symbol_table_tries_tag, 10>, boost::hana::detail::bucket<boost::parser::detail::val_tag, 11> >, boost::hana::basic_tuple<boost::hana::pair<boost::hana::type_impl<boost::parser::detail::begin_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::end_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::pass_tag>::_, bool*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::attr_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::locals_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::rule_params_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::globals_tag>::_, json::global_state*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::trace_indent_tag>::_, int*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::error_handler_tag>::_, const boost::parser::callback_error_handler*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::callbacks_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::symbol_table_tries_tag>::_, std::map<void*, boost::any, std::less<void*>, std::allocator<std::pair<void* const, boost::any> > >*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::val_tag>::_, json::value*> > >, boost::parser::rule<json::ws> >*>]’
+/home/tzlaine/boost_1_71_0/boost/hana/unpack.hpp:47:29: [ skipping 20 instantiation contexts, use -ftemplate-backtrace-limit=0 to disable ]
+/home/tzlaine/parser/example/json.cpp:262:5: required from ‘void json::parse_rule(boost::parser::rule_parser<false, json::value_tag, json::value, boost::parser::detail::nope, boost::parser::detail::nope>::tag_type*, boost::hana::bool_<b>, Iter&, Sentinel, const Context&, const SkipParser&, boost::parser::detail::flags, bool&, Attribute&) [with bool UseCallbacks = false; Iter = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Sentinel = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Context = boost::hana::detail::map_impl<boost::hana::detail::hash_table<boost::hana::detail::bucket<boost::parser::detail::begin_tag, 0>, boost::hana::detail::bucket<boost::parser::detail::end_tag, 1>, boost::hana::detail::bucket<boost::parser::detail::pass_tag, 2>, boost::hana::detail::bucket<boost::parser::detail::attr_tag, 3>, boost::hana::detail::bucket<boost::parser::detail::locals_tag, 4>, boost::hana::detail::bucket<boost::parser::detail::rule_params_tag, 5>, boost::hana::detail::bucket<boost::parser::detail::globals_tag, 6>, boost::hana::detail::bucket<boost::parser::detail::trace_indent_tag, 7>, boost::hana::detail::bucket<boost::parser::detail::error_handler_tag, 8>, boost::hana::detail::bucket<boost::parser::detail::callbacks_tag, 9>, boost::hana::detail::bucket<boost::parser::detail::symbol_table_tries_tag, 10>, boost::hana::detail::bucket<boost::parser::detail::val_tag, 11> >, boost::hana::basic_tuple<boost::hana::pair<boost::hana::type_impl<boost::parser::detail::begin_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::end_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::pass_tag>::_, bool*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::attr_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::locals_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::rule_params_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::globals_tag>::_, json::global_state*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::trace_indent_tag>::_, int*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::error_handler_tag>::_, const boost::parser::callback_error_handler*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::callbacks_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::symbol_table_tries_tag>::_, std::map<void*, boost::any, std::less<void*>, std::allocator<std::pair<void* const, boost::any> > >*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::val_tag>::_, json::value*> > >; SkipParser = boost::parser::rule<json::ws>; Attribute = json::value; boost::parser::rule_parser<false, json::value_tag, json::value, boost::parser::detail::nope, boost::parser::detail::nope>::tag_type = json::value_tag; boost::hana::bool_<b> = boost::hana::integral_constant<bool, false>]’
+/home/tzlaine/parser/include/boost/parser/parser.hpp:3707:23: required from ‘boost::parser::rule_parser<false, TagType, Attribute, LocalState, ParamsTuple>::attr_type boost::parser::rule_parser<false, TagType, Attribute, LocalState, ParamsTuple>::call(boost::hana::bool_<UseCallbacks>, Iter&, Sentinel, const Context&, const SkipParser&, boost::parser::detail::flags, bool&) const [with bool UseCallbacks = false; Iter = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Sentinel = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Context = boost::hana::detail::map_impl<boost::hana::detail::hash_table<boost::hana::detail::bucket<boost::parser::detail::begin_tag, 0>, boost::hana::detail::bucket<boost::parser::detail::end_tag, 1>, boost::hana::detail::bucket<boost::parser::detail::pass_tag, 2>, boost::hana::detail::bucket<boost::parser::detail::val_tag, 3>, boost::hana::detail::bucket<boost::parser::detail::attr_tag, 4>, boost::hana::detail::bucket<boost::parser::detail::locals_tag, 5>, boost::hana::detail::bucket<boost::parser::detail::rule_params_tag, 6>, boost::hana::detail::bucket<boost::parser::detail::globals_tag, 7>, boost::hana::detail::bucket<boost::parser::detail::trace_indent_tag, 8>, boost::hana::detail::bucket<boost::parser::detail::error_handler_tag, 9>, boost::hana::detail::bucket<boost::parser::detail::callbacks_tag, 10>, boost::hana::detail::bucket<boost::parser::detail::symbol_table_tries_tag, 11> >, boost::hana::basic_tuple<boost::hana::pair<boost::hana::type_impl<boost::parser::detail::begin_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::end_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::pass_tag>::_, bool*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::val_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::attr_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::locals_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::rule_params_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::globals_tag>::_, json::global_state*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::trace_indent_tag>::_, int*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::error_handler_tag>::_, const boost::parser::callback_error_handler*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::callbacks_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::symbol_table_tries_tag>::_, std::map<void*, boost::any, std::less<void*>, std::allocator<std::pair<void* const, boost::any> > >*> > >; SkipParser = boost::parser::rule<json::ws>; TagType = json::value_tag; Attribute = json::value; LocalState = boost::parser::detail::nope; ParamsTuple = boost::parser::detail::nope; boost::parser::rule_parser<false, TagType, Attribute, LocalState, ParamsTuple>::attr_type = json::value; boost::hana::bool_<UseCallbacks> = boost::hana::integral_constant<bool, false>]’
+/home/tzlaine/parser/include/boost/parser/parser.hpp:4155:32: required from ‘auto boost::parser::parser_interface<Parser, GlobalState, ErrorHandler>::operator()(boost::hana::bool_<UseCallbacks>, Iter&, Sentinel, const Context&, const SkipParserType&, boost::parser::detail::flags, bool&) const [with bool UseCallbacks = false; Iter = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Sentinel = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Context = boost::hana::detail::map_impl<boost::hana::detail::hash_table<boost::hana::detail::bucket<boost::parser::detail::begin_tag, 0>, boost::hana::detail::bucket<boost::parser::detail::end_tag, 1>, boost::hana::detail::bucket<boost::parser::detail::pass_tag, 2>, boost::hana::detail::bucket<boost::parser::detail::val_tag, 3>, boost::hana::detail::bucket<boost::parser::detail::attr_tag, 4>, boost::hana::detail::bucket<boost::parser::detail::locals_tag, 5>, boost::hana::detail::bucket<boost::parser::detail::rule_params_tag, 6>, boost::hana::detail::bucket<boost::parser::detail::globals_tag, 7>, boost::hana::detail::bucket<boost::parser::detail::trace_indent_tag, 8>, boost::hana::detail::bucket<boost::parser::detail::error_handler_tag, 9>, boost::hana::detail::bucket<boost::parser::detail::callbacks_tag, 10>, boost::hana::detail::bucket<boost::parser::detail::symbol_table_tries_tag, 11> >, boost::hana::basic_tuple<boost::hana::pair<boost::hana::type_impl<boost::parser::detail::begin_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::end_tag>::_, boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character> >, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::pass_tag>::_, bool*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::val_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::attr_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::locals_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::rule_params_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::globals_tag>::_, json::global_state*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::trace_indent_tag>::_, int*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::error_handler_tag>::_, const boost::parser::callback_error_handler*>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::callbacks_tag>::_, boost::parser::detail::nope>, boost::hana::pair<boost::hana::type_impl<boost::parser::detail::symbol_table_tries_tag>::_, std::map<void*, boost::any, std::less<void*>, std::allocator<std::pair<void* const, boost::any> > >*> > >; SkipParserType = boost::parser::rule<json::ws>; Parser = boost::parser::rule_parser<false, json::value_tag, json::value, boost::parser::detail::nope, boost::parser::detail::nope>; GlobalState = json::global_state&; ErrorHandler = boost::parser::callback_error_handler&; boost::hana::bool_<UseCallbacks> = boost::hana::integral_constant<bool, false>]’
+/home/tzlaine/parser/include/boost/parser/parser.hpp:1808:43: required from ‘auto boost::parser::detail::skip_parse_impl(Iter&, Sentinel, const Parser&, const SkipParser&, const ErrorHandler&) [with bool Debug = true; Iter = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Sentinel = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Parser = boost::parser::parser_interface<boost::parser::rule_parser<false, json::value_tag, json::value, boost::parser::detail::nope, boost::parser::detail::nope>, json::global_state&, boost::parser::callback_error_handler&>; SkipParser = boost::parser::rule<json::ws>; ErrorHandler = boost::parser::callback_error_handler]’
+/home/tzlaine/parser/include/boost/parser/parser.hpp:6369:53: required from ‘auto boost::parser::parse(I&, S, const boost::parser::parser_interface<Parser, GlobalState, ErrorHandler>&, const boost::parser::rule<TagType, Attribute, LocalState, ParamsTuple>&, boost::parser::trace) [with I = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; S = boost::text::utf_8_to_32_iterator<const char*, const char*, boost::text::use_replacement_character>; Parser = boost::parser::rule_parser<false, json::value_tag, json::value, boost::parser::detail::nope, boost::parser::detail::nope>; GlobalState = json::global_state&; ErrorHandler = boost::parser::callback_error_handler&; TagType = json::ws; Attribute = boost::parser::detail::nope; LocalState = boost::parser::detail::nope; ParamsTuple = boost::parser::detail::nope]’
+/home/tzlaine/parser/example/json.cpp:313:53: required from here
+/home/tzlaine/parser/example/json.cpp:103:13: error: could not convert ‘x’ from ‘boost::parser::none’ to ‘bool’
+ 103 | if (x)
+ | ^
+ | |
+ | boost::parser::none
+make[3]: *** [example/CMakeFiles/json.dir/build.make:63: example/CMakeFiles/json.dir/json.cpp.o] Error 1
+make[2]: *** [CMakeFiles/Makefile2:1668: example/CMakeFiles/json.dir/all] Error 2
+make[1]: *** [CMakeFiles/Makefile2:1675: example/CMakeFiles/json.dir/rule] Error 2
+make: *** [Makefile:827: json] Error 2
+
++
++ Some very familiar problems should be noted here: +
+object_init may be used with multiple rules,
+ some of which have locals, and some of which do not. While it's nice that
+ the last line of the error diagnostic points us to the ill-formed use of
+ a none,
+ we don't know which parser plus semantic action
+ is the problem. With a stack trace in a debugger, we would know that in
+ a few seconds. In this case, we'd have a long slog trying to figure out
+ exactly where the problem lies.
+
+ This is how we get genericity in attribute generation. In the STL, we can use
+ multiple types of container with the algorithms because iterators act as the
+ glue that connects algorithms to containers. With attribute generation, there
+ are instead arbitrary types being constructed and inserted into containers.
+ Allowing the insertion to happen on arbitrary types that model the container
+ concept is what allows generic use of different containers.
+
+ Boost.Parser attempts to keep the rules for attribute generation simple. However,
+ there are some rules for attribute generation that only apply to character
+ types like char and char32_t. Sequences of these
+ produce a std::string attribute, while sequences of every other
+ type produce std::vectors. There are a couple of reasons for this.
+
+ First, strings and vectors are different. We know that strings are just arrays
+ of numbers, but we have a whole different type for them, std::string.
+ It has a different API, and other code that operates on text expects a string
+ instead of some other container. Arrays of characters are already considered
+ special by the standard library and common practice in C++.
+
+ Second, When you write a parser that parses multiple characters in a row, you + are typically trying to produce a string attribute, rather than a few individual + character values. When you use multiple non-character parsers in a row, you + are typically trying to produce multiple values. For instance: +
+namespace bp = boost::parser; +auto parser_1 = bp::char_ >> bp::char_ >> -bp::char_; +auto parser_2 = +(bp::char_ - ' ') >> ' ' >> +(bp::char_ - ' '); ++
+ I don't know about you, but I've rarely written a parser like parser_1
+ and wanted to produce a boost::parser::tuple<char,
+ char, std::optional<char>>. Similarly, I've rarely written
+ a parser like parser_2 and wanted a std::vector<std::string>.
+
+ Boost.Parser therefore makes the common case the default behavior, and provides
+ you with the merge[] and separate[]
+ directives to let you opt-in to generating the less-common attributes.
+
+ Consider this parser. +
+namespace bp = boost::parser; +auto parser = -(bp::char_ % ','); ++
+ But Boost.Parser and Spirit consider the attribute type of this parser to be
+ optional<SEQ-OF<char>>. However, Spirit allows you
+ to parse that into a std::vector<char> or a std::string,
+ and Boost.Parser does not. Boost.Parser requires you to parse that into a
+ std::optional<std::string>, or change the parser to -(bp::char_
+ % ',') | bp::attr(std::string{}). In other words, Spirit considers an
+ optional of a-sequence-of-one-or-more to be equivalent to just a sequence,
+ because the empty state of the sequence represents the empty optional state.
+ Boost.Parser does not. Why the strictness?
+
+ I understand why Spirit works that way — there's no loss of information, + so why not? However, I don't agree with that approach. +
+
+ When I write operator-, I get a std::optional. That's
+ a simple rule.
+
+ If I did write operator-, I was opting in to getting a std::optional.
+ The code expresses that intent. If the library changes my written intent, there
+ better be a damn good reason.
+
+ There is of course an exception to the "simple rule" above -- if
+ I write -p1 >> *p2, and the ATTR(p1) is the
+ same as ATTR(p2), attributes are the same, the optional value
+ gets slurped up into the container. I consider this a "damn good reason",
+ because this is a very common use case. For other, less-common cases, separate[] can be used to keep the attributes
+ non-combining. So separate[-int_ >> *int_] has the attribute
+ boost::parser::tuple<std::optional<int>,
+ std::vector<int>>. This makes opting out of this exception
+ very easy, and the intent remains visible in the code.
+
+ By contrast, "I wrote -+int_ but I really want a std::vector<int>
+ instead of a std::optional<std::vector<int>>"
+ is not a really common use case.
+
+ Also, Spirit-style looseness is more complicated than parser above
+ indicates. Remember, int_ | eps and -int_ are supposed
+ to be semantically equivalent. To do otherwise this would be a profound violation
+ of the principle of least surprise. So, if they're equivalent, we would need
+ to apply the same rule to int_ | eps. Also, we would probably
+ need to apply it to if_(cond)[int_], which is also a std::optional<int>.
+ This is a lot to remember, and this is complicated to implement and maintain.
+
+ I've been using Spirit 1 and later Spirit 2 since they were released. I did + not know about the particular looseness discussed here; a user pointed it out + on Github. In many years of using these libraries, I never fully learned all + the attribute-compatibility rules, and was often surprised by them. +
++ Having a small set of rules that the user can internalize is vital; if the + attribute generated is different from my expressed intent, that's a problem. + For this not to be a problem, I need to be able to understand the rules, so + I can express my intent, and not be surprised. +
+
+ At the end of a call to any of the parse()
+ overloads that takes an attribute out-param (including variants like callback_parse(), etc.), the parse either succeeds
+ or fails. If the call fails, the attribute is explicitly "cleared"
+ by assigning its default-constructed value.
+
+ This is done because it's the less bad of two options. Consider the other option + first. +
+// Without explicit clearing.
+namespace bp = boost::parser;
+std::vector<int> result;
+auto b = bp::parse("3 4 c", +bp::int_, bp::ws, result);
+assert(!b);
+assert(result == std::vector<int>({3, 4}));
+
+
+ This is odd — the parse failed, but the out-param has partial results
+ in it anyway. This happens because the parser +bp::int_ only fails
+ if it cannot match at bp::int_ at least once. Above, it matches
+ it twice, meaning that it succeeds (if it had failed, it would have cleared
+ its attribute). It does not know that there is nothing after it that could
+ continue the parse, nor that it is being used in to do a full parse. So, the
+ over-all parse fails, but the part of the parse that fills in the out-param
+ attribute does not know do clear its attribute.
+
+ This is why the explicit clearing behavior happens at the end of parse(). This is not without its downsides,
+ though. Consider this.
+
// With explicit clearing. +namespace bp = boost::parser; +std::string str = "-42"; +int i = 3; +bool b = parse(str, bp::uint_, i); +assert(!b); +assert(i == 0); ++
+ Here, the explicit clearing replaces the previous value of 3,
+ even though the parser never touched the value! Destroying users' variables'
+ state without need may seem like a bad idea, but consider the alternative —
+ In the previous example, we had spurious values left in the out-param attribute.
+ Here, without clearing, we would have had a value left in the out-param attribute,
+ not because it was a partial result of the parse, but because the parse never
+ touched it. This is certain to be confusing, or at least surprising, behavior.
+ I deemed it better to make the failed parse case consistent, to reduce confusion.
+ The out-param attribute of type A is always equal to A()
+ if the parser fails. It is equal to whatever the parser sets it to —
+ or its previous value, if the parser does not mutate it — if the parse
+ succeeds.
+
![]() |
+Note | +
|---|---|
+ If you are familiar with Spirit 2 and/or Spirit X3, you may be interested + in this section. If you are not, and you have not read the tutorial for Boost.Parser + yet, very little of this will make sense. + |
+ Boost.Spirit + is a library that is already in Boost, and it has been around for a long time. +
++ However, it does not suit user needs in some ways. +
+_locals()
+ in More About Rules).
+ + I wanted a library that does not suffer from any of the above limitations. + It should be noted that while Spirit X3 only has a couple of flaws in the list + above, the one related to rules is a deal-breaker. The ability to write rules, + test them in isolation, and then re-use them throughout a complex parser is + essential. +
++ Though no version of Boost.Spirit + (Spirit 2 or Spirit X3) suffers from all those limitations, there also does + not exist any one version that avoids all of them. Boost.Parser does so. However, + there are a lot of great ideas in Boost.Spirit + that have been retained in Boost.Parser. Both libraries: +
+lexeme[]);
+ + Some readers have wanted a concrete example of my claim that Spirit X3's rules + do not compose well. Consider this program. +
+#include <boost/spirit/home/x3.hpp> + +#include <iostream> +#include <set> +#include <string> +#include <vector> + + +namespace x3 = boost::spirit::x3; +using ints_type = x3::rule<class ints, std::vector<int>>; +BOOST_SPIRIT_DECLARE(ints_type); + +x3::rule<class ints, std::vector<int>> ints = "ints"; +constexpr auto ints_def = x3::int_ % ','; +BOOST_SPIRIT_DEFINE(ints); + +#define FIXED_ATTRIBUTE 0 + + +int main() +{ + std::string input = "43, 42"; + auto first = input.begin(); + auto const last = input.end(); +#if FIXED_ATTRIBUTE + std::vector<int> result; +#else + std::set<int> result; +#endif + bool success = x3::phrase_parse(first, last, ints, x3::space, result); + if (success) { + // We want this to print "43 42\n". + for (auto x : result) { + std::cout << x << ' '; + } + std::cout << "\n"; + } + + return 0; +} ++
+ Defining FIXED_ATTRIBUTE to
+ be 1 leads to a well-formed program
+ that prints "42 43\n"
+ instead of the desired result. The problem here is that if you feed an attribute
+ out-param to x3::phrase_parse(),
+ you get the loose-match semantics that Spirit X3 and Boost.Parser both do.
+ This is a problem, because the user explicitly asserted that the type of the
+ ints rule's attribute should
+ be std::vector<int>. In
+ my opinion, this code should be ill-formed with FIXED_ATTRIBUTE
+ == 1.
+ To make it well-formed again, the user could use ints_def
+ directly, since it does not specify an attribute type.
+
+ When the user explicitly states that a type is some fixed T,
+ a library should not ignore that. As a user of X3, I was bitten by this in
+ such a way that I considered X3 to be a nonviable option for my uses. I ran
+ into a problem that resulted from X3's ignoring one or more of my rules' attributes
+ so that it made the parse produce the wrong result, and I could see no way
+ to fix it.
+
+ When a library provides wider use cases via genericity, we generally consider
+ this a good thing. If it is too loose in its semantics, we generally say that
+ it is type-unsafe. Using rules to nail down type flexibility
+ is one way Boost.Parser tries to enable genericity where it is desired, and
+ let the user turn it off where it is not.
+
structs and classesparse() API
+ Let's look at a slightly more complicated example, even if it is still trivial.
+ Instead of taking any old chars
+ we're given, let's require some structure. Let's parse one or more doubles, separated by commas.
+
+ The Boost.Parser parser for double
+ is double_.
+ So, to parse a single double,
+ we'd just use that. If we wanted to parse two doubles
+ in a row, we'd use:
+
boost::parser::double_ >> boost::parser::double_ ++
+ operator>>
+ in this expression is the sequence-operator; read it as "followed by".
+ If we combine the sequence-operator with Kleene
+ star, we can get the parser we want by writing:
+
boost::parser::double_ >> *(',' >> boost::parser::double_) ++
+ This is a parser that matches at least one double
+ — because of the first double_ in the expression
+ above — followed by zero or more instances of a-comma-followed-by-a-double. Notice that we can use ',' directly. Though it is not a parser, operator>>
+ and the other operators defined on Boost.Parser parsers have overloads that
+ accept character/parser pairs of arguments; these operator overloads will
+ create the right parser to recognize ','.
+
+
+#include <boost/parser/parser.hpp> + +#include <iostream> +#include <string> + + +namespace bp = boost::parser; + +int main() +{ + std::cout << "Enter a list of doubles, separated by commas. No pressure. "; + std::string input; + std::getline(std::cin, input); + + auto const result = bp::parse(input, bp::double_ >> *(',' >> bp::double_)); + + if (result) { + std::cout << "Great! It looks like you entered:\n"; + for (double x : *result) { + std::cout << x << "\n"; + } + } else { + std::cout + << "Good job! Please proceed to the recovery annex for cake.\n"; + } +} ++
+
+
+ The first example filled in an out-parameter to deliver the result of the
+ parse. This call to parse()
+ returns a result instead. As you can see, the result is contextually convertible
+ to bool, and *result is some sort of range. In fact,
+ the return type of this call to parse()
+ is std::optional<std::vector<double>>. Naturally, if the parse fails,
+ std::nullopt is returned. We'll look at how
+ Boost.Parser maps the type of the parser to the return type, or the filled
+ in out-parameter's type, a bit later.
+
![]() |
+Note | +
|---|---|
+ There's a type trait that can tell you the attribute type for a parser,
+ |
+ If I run it in a shell, this is the result: +
+$ example/trivial +Enter a list of doubles, separated by commas. No pressure. 5.6,8.9 +Great! It looks like you entered: +5.6 +8.9 +$ example/trivial +Enter a list of doubles, separated by commas. No pressure. 5.6, 8.9 +Good job! Please proceed to the recovery annex for cake. ++
+ It does not recognize "5.6, 8.9".
+ This is because it expects a comma followed immediately
+ by a double, but I inserted
+ a space after the comma. The same failure to parse would occur if I put a
+ space before the comma, or before or after the list of doubles.
+
+ One more thing: there is a much better way to write the parser above. Instead
+ of repeating the double_
+ subparser, we could have written this:
+
bp::double_ % ',' ++
+ That's semantically identical to bp::double_ >> *(',' >> bp::double_). This pattern — some bit of input
+ repeated one or more times, with a separator between each instance —
+ comes up so often that there's an operator specifically for that, operator%.
+ We'll be using that operator from now on.
+
+ Let's modify the trivial parser we just saw to ignore any spaces it might
+ find among the doubles and commas.
+ To skip whitespace wherever we find it, we can pass a skip parser
+ to our call to parse() (we don't need to touch
+ the parser passed to parse()).
+ Here, we use ws, which matches
+ any Unicode whitespace character.
+
+
+#include <boost/parser/parser.hpp> + +#include <iostream> +#include <string> + + +namespace bp = boost::parser; + +int main() +{ + std::cout << "Enter a list of doubles, separated by commas. No pressure. "; + std::string input; + std::getline(std::cin, input); + + auto const result = bp::parse(input, bp::double_ % ',', bp::ws); + + if (result) { + std::cout << "Great! It looks like you entered:\n"; + for (double x : *result) { + std::cout << x << "\n"; + } + } else { + std::cout + << "Good job! Please proceed to the recovery annex for cake.\n"; + } +} ++
+
+
+ The skip parser, or skipper, is run between the subparsers
+ within the parser passed to parse().
+ In this case, the skipper is run before the first double
+ is parsed, before any subsequent comma or double
+ is parsed, and at the end. So, the strings "3.6,5.9"
+ and " 3.6 , \t 5.9 "
+ are parsed the same by this program.
+
+ Skipping is an important concept in Boost.Parser. You can skip anything,
+ not just whitespace; there are lots of other things you might want to skip.
+ The skipper you pass to parse()
+ can be an arbitrary parser. For example, if you write a parser for a scripting
+ language, you can write a skipper to skip whitespace, inline comments, and
+ end-of-line comments.
+
+ We'll be using skip parsers almost exclusively in the rest of the documentation. + The ability to ignore the parts of your input that you don't care about is + so convenient that parsing without skipping is a rarity in practice. +
+
+ Unless otherwise noted, all the algorithms and views are constrained very
+ much like the way the parse()
+ overloads are. The kinds of ranges, parsers, etc., that they accept are the
+ same.
+
+ As shown in The
+ parse()
+ API, the two patterns of parsing in Boost.Parser are whole-parse and
+ prefix-parse. When you want to find something in the middle of the range
+ being parsed, there's no parse
+ API for that. You can of course make a simple parser that skips everything
+ before what you're looking for.
+
namespace bp = boost::parser; +constexpr auto parser = /* ... */; +constexpr auto middle_parser = bp::omit[*(bp::char_ - parser)] >> parser; ++
+ middle_parser will skip over
+ everything, one char_ at
+ a time, as long as the next char_
+ is not the beginning of a successful match of parser.
+ After this, control passes to parser
+ itself. Ok, so that's not too hard to write. If you need to parse something
+ from the middle in order to generate attributes, this is what you should
+ use.
+
+ However, it often turns out you only need to find some subrange in the parsed
+ range. In these cases, it would be nice to turn this into a proper algorithm
+ in the pattern of the ones in std::ranges,
+ since that's more idiomatic. boost::parser::search()
+ is that algorithm. It has very similar semantics to std::ranges::search,
+ except that it searches not for a match to an exact subrange, but to a match
+ with the given parser. Like std::ranges::search(), it returns a subrange (boost::parser::subrange
+ in C++17, std::ranges::subrange in C++20 and later).
+
namespace bp = boost::parser; +auto result = bp::search("aaXYZq", bp::lit("XYZ"), bp::ws); +assert(!result.empty()); +assert(std::string_view(result.begin(), result.end() - result.begin()) == "XYZ"); ++
+ Since boost::parser::search() returns a subrange, whatever
+ parser you give it produces no attribute. I wrote bp::lit("XYZ")
+ above; if I had written bp::string("XYZ") instead, the result (and lack of std::string
+ construction) would not change.
+
+ As you can see above, one aspect of boost::parser::search()
+ differs intentionally from the conventions of the std::ranges
+ algorithms — it accepts C-style strings, treating them as if they
+ were proper ranges.
+
+ Also, boost::parser::search() knows how to accommodate
+ your iterator type. You can pass the C-style string "aaXYZq"
+ as in the example above, or "aaXYZq"
+ | bp::as_utf32,
+ or "aaXYZq" |
+ bp::as_utf8, or even "aaXYZq"
+ | bp::as_utf16,
+ and it will return a subrange whose iterators are the type that you passed
+ as input, even though internally the iterator type might be something different
+ (a UTF-8 -> UTF-32 transcoding iterator in Unicode parsing, as with all
+ the | bp::as_utfN
+ examples above). As long as you pass a range to be parsed whose value type
+ is char, char8_t,
+ char32_t, or that is adapted
+ using some combination of as_utfN
+ adaptors, this accommodation will operate correctly.
+
+ boost::parser::search() has multiple overloads.
+ You can pass a range or an iterator/sentinel pair, and you can pass a skip
+ parser or not. That's four overloads. Also, all four overloads take an optional
+ boost::parser::trace
+ parameter at the end. This is really handy for investigating why you're not
+ finding something in the input that you expected to.
+
+ boost::parser::search_all
+ creates boost::parser::search_all_views.
+ boost::parser::search_all_view
+ is a std::views-style view. It produces a range of
+ subranges. Each subrange it produces is the next match of the given parser
+ in the parsed range.
+
namespace bp = boost::parser; +auto r = "XYZaaXYZbaabaXYZXYZ" | bp::search_all(bp::lit("XYZ")); +int count = 0; +// Prints XYZ XYZ XYZ XYZ. +for (auto subrange : r) { + std::cout << std::string_view(subrange.begin(), subrange.end() - subrange.begin()) << " "; + ++count; +} +std::cout << "\n"; +assert(count == 4); ++
+ All the details called out in the subsection on boost::parser::search()
+ above apply to boost::parser::search_all: its parser produces
+ no attributes; it accepts C-style strings as if they were ranges; and it
+ knows how to get from the internally-used iterator type back to the given
+ iterator type, in typical cases.
+
+ boost::parser::search_all
+ can be called with, and boost::parser::search_all_view can be constructed
+ with, a skip parser or not, and you can always pass boost::parser::trace at the end of any of their
+ overloads.
+
+ boost::parser::split
+ creates boost::parser::split_views.
+ boost::parser::split_view
+ is a std::views-style view. It produces a range of
+ subranges of the parsed range split on matches of the given parser. You can
+ think of boost::parser::split_view
+ as being the complement of boost::parser::search_all_view, in that boost::parser::split_view
+ produces the subranges between the subranges produced by boost::parser::search_all_view. boost::parser::split_view
+ has very similar semantics to std::views::split_view.
+ Just like std::views::split_view, boost::parser::split_view will produce empty
+ ranges between the beginning/end of the parsed range and an adjacent match,
+ or between adjacent matches.
+
namespace bp = boost::parser; +auto r = "XYZaaXYZbaabaXYZXYZ" | bp::split(bp::lit("XYZ")); +int count = 0; +// Prints '' 'aa' 'baaba' '' ''. +for (auto subrange : r) { + std::cout << "'" << std::string_view(subrange.begin(), subrange.end() - subrange.begin()) << "' "; + ++count; +} +std::cout << "\n"; +assert(count == 5); ++
+ All the details called out in the subsection on boost::parser::search()
+ above apply to boost::parser::split:
+ its parser produces no attributes; it accepts C-style strings as if they
+ were ranges; and it knows how to get from the internally-used iterator type
+ back to the given iterator type, in typical cases.
+
+ boost::parser::split
+ can be called with, and boost::parser::split_view can be constructed
+ with, a skip parser or not, and you can always pass boost::parser::trace at the end of any of their
+ overloads.
+
![]() |
+Important | +
|---|---|
+ |
+ boost::parser::replace
+ creates boost::parser::replace_views.
+ boost::parser::replace_view
+ is a std::views-style view. It produces a range of
+ subranges from the parsed range r
+ and the given replacement range replacement.
+ Wherever in the parsed range a match to the given parser parser
+ is found, replacement is
+ the subrange produced. Each subrange of r
+ that does not match parser
+ is produced as a subrange as well. The subranges are produced in the order
+ in which they occur in r.
+ Unlike boost::parser::split_view,
+ boost::parser::replace_view
+ does not produce empty subranges, unless replacement
+ is empty.
+
namespace bp = boost::parser; +auto card_number = bp::int_ >> bp::repeat(3)['-' >> bp::int_]; +auto rng = "My credit card number is 1234-5678-9012-3456." | bp::replace(card_number, "XXXX-XXXX-XXXX-XXXX"); +int count = 0; +// Prints My credit card number is XXXX-XXXX-XXXX-XXXX. +for (auto subrange : rng) { + std::cout << std::string_view(subrange.begin(), subrange.end() - subrange.begin()); + ++count; +} +std::cout << "\n"; +assert(count == 3); ++
+ If the iterator types Ir
+ and Ireplacement for the
+ r and replacement
+ ranges passed are identical (as in the example above), the iterator type
+ for the subranges produced is Ir.
+ If they are different, an implementation-defined type is used for the iterator.
+ This type is the moral equivalent of a std::variant<Ir, Ireplacement>. This works as long as Ir and Ireplacement
+ are compatible. To be compatible, they must have common reference, value,
+ and rvalue reference types, as determined by std::common_type_t.
+ One advantage to this scheme is that the range of subranges represented by
+ boost::parser::replace_view
+ is easily joined back into a single range.
+
namespace bp = boost::parser; +auto card_number = bp::int_ >> bp::repeat(3)['-' >> bp::int_]; +auto rng = "My credit card number is 1234-5678-9012-3456." | bp::replace(card_number, "XXXX-XXXX-XXXX-XXXX") | std::views::join; +std::string replace_result; +for (auto ch : rng) { + replace_result.push_back(ch); +} +assert(replace_result == "My credit card number is XXXX-XXXX-XXXX-XXXX."); ++
+ Note that we could not have written std::string
+ replace_result(r.begin(), r.end()).
+ This is ill-formed because the std::string
+ range constructor takes two iterators of the same type, but decltype(rng.end()) is a sentinel type different from decltype(rng.begin()).
+
+ Though the ranges r and
+ replacement can both be C-style
+ strings, boost::parser::replace_view
+ must know the end of replacement
+ before it does any work. This is because the subranges produced are all common
+ ranges, and so if replacement
+ is not, a common range must be formed from it. If you expect to pass very
+ long C-style strings to boost::parser::replace and not pay to see
+ the end until the range is used, don't.
+
+ ReplacementV is constrained
+ almost exactly the same as V.
+ V must model parsable_range and std::ranges::viewable_range.
+ ReplacementV is the same,
+ except that it can also be a std::ranges::input_range,
+ whereas V must be a std::ranges::forward_range.
+
+ You may wonder what happens when you pass a UTF-N range for r, and a UTF-M range for replacement. What happens in this case
+ is silent transcoding of replacement
+ from UTF-M to UTF-N by the boost::parser::replace range adaptor. This
+ doesn't require memory allocation; boost::parser::replace just slaps | boost::parser::as_utfN
+ onto replacement. However,
+ since Boost.Parser treats char
+ ranges as unknown encoding, boost::parser::replace will not transcode
+ from char ranges. So calls like
+ this won't work:
+
char const str[] = "some text"; +char const replacement_str[] = "some text"; +using namespace bp = boost::parser; +auto r = empty_str | bp::replace(parser, replacement_str | bp::as_utf8); // Error: ill-formed! Can't mix plain-char inputs and UTF replacements. ++
+ This does not work, even though char
+ and UTF-8 are the same size. If r
+ and replacement are both
+ ranges of char, everything will
+ work of course. It's just mixing char
+ and UTF-encoded ranges that does not work.
+
+ All the details called out in the subsection on boost::parser::search()
+ above apply to boost::parser::replace:
+ its parser produces no attributes; it accepts C-style strings for the r and replacement
+ parameters as if they were ranges; and it knows how to get from the internally-used
+ iterator type back to the given iterator type, in typical cases.
+
+ boost::parser::replace
+ can be called with, and boost::parser::replace_view can be constructed
+ with, a skip parser or not, and you can always pass boost::parser::trace at the end of any of their
+ overloads.
+
![]() |
+Important | +
|---|---|
+ |
![]() |
+Important | +
|---|---|
+ |
+ boost::parser::transform_replace
+ creates boost::parser::transform_replace_views. boost::parser::transform_replace_view
+ is a std::views-style view. It produces a range of
+ subranges from the parsed range r
+ and the given invocable f.
+ Wherever in the parsed range a match to the given parser parser
+ is found, let parser's attribute
+ be attr; f(std::move(attr)) is the subrange produced. Each subrange
+ of r that does not match
+ parser is produced as a subrange
+ as well. The subranges are produced in the order in which they occur in
+ r. Unlike boost::parser::split_view, boost::parser::transform_replace_view does
+ not produce empty subranges, unless f(std::move(attr)) is empty. Here is an example.
+
auto string_sum = [](std::vector<int> const & ints) { + return std::to_string(std::accumulate(ints.begin(), ints.end(), 0)); +}; + +auto rng = "There are groups of [1, 2, 3, 4, 5] in the set." | + bp::transform_replace('[' >> bp::int_ % ',' >> ']', bp::ws, string_sum); +int count = 0; +// Prints "There are groups of 15 in the set". +for (auto subrange : rng) { + for (auto ch : subrange) { + std::cout << ch; + } + ++count; +} +std::cout << "\n"; +assert(count == 3); ++
+ Let the type decltype(f(std::move(attr)))
+ be Replacement. Replacement must be a range, and must be
+ compatible with r. See the
+ description of boost::parser::replace_view's iterator compatibility
+ requirements in the section above for details.
+
+ As with boost::parser::replace,
+ boost::parser::transform_replace
+ can be flattened from a view of subranges into a view of elements by piping
+ it to std::views::join. See the section on boost::parser::replace above for an example.
+
+ Just like boost::parser::replace
+ and boost::parser::replace_view,
+ boost::parser::transform_replace
+ and boost::parser::transform_replace_view do silent
+ transcoding of the result to the appropriate UTF, if applicable. If both
+ r and f(std::move(attr)) are ranges of char,
+ or are both the same UTF, no transcoding occurs. If one of r and f(std::move(attr)) is a range of char
+ and the other is some UTF, the program is ill-formed.
+
+ boost::parser::transform_replace_view
+ will move each attribute into f;
+ f may move from the argument
+ or copy it as desired. f
+ may return an lvalue reference. If it does so, the address of the reference
+ will be taken and stored within boost::parser::transform_replace_view. Otherwise,
+ the value returned by f is
+ moved into boost::parser::transform_replace_view. In
+ either case, the value type of boost::parser::transform_replace_view is always
+ a subrange.
+
+ boost::parser::transform_replace
+ can be called with, and boost::parser::transform_replace_view can
+ be constructed with, a skip parser or not, and you can always pass boost::parser::trace
+ at the end of any of their overloads.
+
+ Frequently, you need to parse something that might have one of several forms.
+ operator|
+ is overloaded to form alternative parsers. For example:
+
namespace bp = boost::parser; +auto const parser_1 = bp::int_ | bp::eps; ++
+ parser_1 matches an integer,
+ or if that fails, it matches epsilon, the empty string.
+ This is equivalent to writing:
+
namespace bp = boost::parser; +auto const parser_2 = -bp::int_; ++
+ However, neither parser_1
+ nor parser_2 is equivalent
+ to writing this:
+
namespace bp = boost::parser; +auto const parser_3 = bp::eps | bp::int_; // Does not do what you think. ++
+ The reason is that alternative parsers try each of their subparsers, one
+ at a time, and stop on the first one that matches. Epsilon
+ matches anything, since it is zero length and consumes no input. It even
+ matches the end of input. This means that parser_3
+ is equivalent to eps
+ by itself.
+
![]() |
+Note | +
|---|---|
+ For this reason, writing |
![]() |
+Warning | +
|---|---|
+ This kind of error is very common when |
+ So far, we've seen several different types of attributes that come from different
+ parsers, int for int_,
+ boost::parser::tuple<char,
+ int>
+ for boost::parser::char_ >>
+ boost::parser::int_, etc. Let's get into how this works
+ with more rigor.
+
![]() |
+Note | +
|---|---|
+ Some parsers have no attribute at all. In the tables below, the type of
+ the attribute is listed as "None." There is a non- |
![]() |
+Warning | +
|---|---|
+ Boost.Parser assumes that all attributes are semi-regular (see |
+ You can use attribute
+ (and the associated alias, attribute_t) to determine the
+ attribute a parser would have if it were passed to parse().
+ Since at least one parser (char_) has a polymorphic attribute
+ type, attribute
+ also takes the type of the range being parsed. If a parser produces no attribute,
+ attribute
+ will produce none,
+ not void.
+
+ If you want to feed an iterator/sentinel pair to attribute, create a range from
+ it like so:
+
constexpr auto parser = /* ... */; +auto first = /* ... */; +auto const last = /* ... */; + +namespace bp = boost::parser; +// You can of course use std::ranges::subrange directly in C++20 and later. +using attr_type = bp::attribute_t<decltype(BOOST_PARSER_SUBRANGE(first, last)), decltype(parser)>; ++
+ There is no single attribute type for any parser, since a parser can be placed
+ within omit[], which makes its attribute
+ type none.
+ Therefore, attribute
+ cannot tell you what attribute your parser will produce under all circumstances;
+ it only tells you what it would produce if it were passed to parse().
+
+ This table summarizes the attributes generated for all Boost.Parser parsers. + In the table below: +
+RESOLVE()
+ is a notional macro that expands to the resolution of parse argument
+ or evaluation of a parse predicate (see The
+ Parsers And Their Uses); and
+ x and y
+ represent arbitrary objects.
+ Table 1.8. Parsers and Their Attributes
+|
+ + Parser + + |
+
+ + Attribute Type + + |
+
+ + Notes + + |
+
|---|---|---|
|
+
+ |
+
+ + None. + + |
++ | +
|
+
+ |
+
+ + None. + + |
++ | +
|
+
+ |
+
+ + None. + + |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ Includes all the |
+
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+ + None. + + |
+
+
+ Includes all the |
+
|
+
+ |
+
+
+ |
+
+
+ Includes all the |
+
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
|
+
+ |
+
+
+ |
++ | +
+ char_
+ is a bit odd, since its attribute type is polymorphic. When you use char_
+ to parse text in the non-Unicode code path (i.e. a string of char), the attribute is char.
+ When you use the exact same char_ to parse in the Unicode-aware
+ code path, all matching is code point based, and so the attribute type is
+ the type used to represent code points, char32_t.
+ All parsing of UTF-8 falls under this case.
+
+ Here, we're parsing plain chars,
+ meaning that the parsing is in the non-Unicode code path, the attribute of
+ char_
+ is char:
+
auto result = parse("some text", boost::parser::char_); +static_assert(std::is_same_v<decltype(result), std::optional<char>>)); ++
+ When you parse UTF-8, the matching is done on a code point basis, so the
+ attribute type is char32_t:
+
auto result = parse("some text" | boost::parser::as_utf8, boost::parser::char_); +static_assert(std::is_same_v<decltype(result), std::optional<char32_t>>)); ++
+ The good news is that usually you don't parse characters individually. When
+ you parse with char_,
+ you usually parse repetition of then, which will produce a std::string,
+ regardless of whether you're in Unicode parsing mode or not. If you do need
+ to parse individual characters, and want to lock down their attribute type,
+ you can use cp
+ and/or cu
+ to enforce a non-polymorphic attribute type.
+
+ Combining operations of course affect the generation of attributes. In the + tables below: +
+m and n
+ are parse arguments that resolve to integral values;
+ pred is a parse predicate;
+ arg0, arg1,
+ arg2, ... are parse arguments;
+ a is a semantic action;
+ and
+ p, p1,
+ p2, ... are parsers that
+ generate attributes.
+ Table 1.9. Combining Operations and Their Attributes
+|
+ + Parser + + |
+
+ + Attribute Type + + |
+
|---|---|
|
+
+ |
+
+ + None. + + |
+
|
+
+ |
+
+ + None. + + |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+ + None. + + |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
![]() |
+Important | +
|---|---|
+ All the character parsers, like |
![]() |
+Important | +
|---|---|
+ In case you did not notice it above, adding a semantic action to a parser
+ erases the parser's attribute. The attribute is still available inside
+ the semantic action as |
+ There are a relatively small number of rules that define how sequence parsers + and alternative parsers' attributes are generated. (Don't worry, there are + examples below.) +
++ The attribute generation behavior of sequence parsers is conceptually pretty + simple: +
+boost::parser::tuple<T>
+ (even if T is a type
+ that means "no attribute"), the attribute becomes T.
+
+ More formally, the attribute generation algorithm works like this. For a
+ sequence parser p, let the
+ list of attribute types for the subparsers of p
+ be a0,
+ a1, a2, ...,
+ an.
+
+ We get the attribute of p
+ by evaluating a compile-time left fold operation, left-fold({a1, a2, ..., an}, tuple<a0>, OP). OP
+ is the combining operation that takes the current attribute type (initially
+ boost::parser::tuple<a0>) and the next attribute type, and returns
+ the new current attribute type. The current attribute type at the end of
+ the fold operation is the attribute type for p.
+
+ OP attempts to apply a series
+ of rules, one at a time. The rules are noted as X
+ >> Y
+ -> Z,
+ where X is the type of the
+ current attribute, Y is the
+ type of the next attribute, and Z
+ is the new current attribute type. In these rules, C<T>
+ is a container of T; none is a special type that indicates that
+ there is no attribute; T
+ is a type; CHAR is a character
+ type, either char or char32_t; and Ts... is a parameter pack of one or more types.
+ Note that T may be the special
+ type none. The current attribute
+ is always a tuple (call it Tup),
+ so the "current attribute X"
+ refers to the last element of Tup,
+ not Tup itself, except for
+ those rules that explicitly mention boost::parser::tuple<>
+ as part of X's type.
+
none >>
+ T ->
+ T
+ CHAR >> CHAR -> std::string
+ T >>
+ none ->
+ T
+ C<T> >> T
+ -> C<T>
+ T >>
+ C<T> -> C<T>
+ C<T> >> optional<T> -> C<T>
+ optional<T> >> C<T> -> C<T>
+ boost::parser::tuple<none> >>
+ T ->
+ boost::parser::tuple<T>
+ boost::parser::tuple<Ts...> >>
+ T ->
+ boost::parser::tuple<Ts..., T>
+
+ The rules that combine containers with (possibly optional) adjacent values
+ (e.g. C<T> >> optional<T>
+ -> C<T>)
+ have a special case for strings. If C<T>
+ is exactly std::string, and T
+ is either char or char32_t, the combination yields a std::string.
+
+ Again, if the final result is that the attribute is boost::parser::tuple<T>,
+ the attribute becomes T.
+
![]() |
+Note | +
|---|---|
|
+
+ What constitutes a container in the rules above is determined by the + +template<typename T> +concept container = std::ranges::common_range<T> && requires(T t) { + { t.insert(t.begin(), *t.begin()) } + -> std::same_as<std::ranges::iterator_t<T>>; +}; ++ + + |
+ The rules for alternative parsers are much simpler. For an alternative parer
+ p, let the list of attribute
+ types for the subparsers of p
+ be a0,
+ a1, a2, ...,
+ an. The attribute of p is std::variant<a0, a1,
+ a2, ..., an>, with the following steps applied:
+
none attributes
+ are left out, and if any are, the attribute is wrapped in a std::optional, like std::optional<std::variant</*...*/>>;
+ std::variant
+ template parameters <T1, T2, ... Tn> are removed; every type that appears
+ does so exacly once;
+ std::variant<T> or std::optional<std::variant<T>>, the attribute becomes instead
+ T or std::optional<T>, respectively; and
+ std::variant<> or std::optional<std::variant<>>, the result becomes none instead.
+
+ The rule for forming containers from non-containers is simple. You get a
+ vector from any of the repeating parsers, like +p, *p, repeat(3)[p], etc.
+ The value type of the vector is .
+ ATTR(p)
+ Another rule for sequence containers is that a value x
+ and a container c containing
+ elements of x's type will
+ form a single container. However, x's
+ type must be exactly the same as the elements in c.
+ There is an exception to this in the special case for strings and characters
+ noted above. For instance, consider the attribute of char_
+ >> string("str"). In the non-Unicode code path, char_'s attribute type is guaranteed to
+ be char, so is ATTR(char_ >> string("str"))std::string.
+ If you are parsing UTF-8 in the Unicode code path, char_'s
+ attribute type is char32_t,
+ and the special rule makes it also produce a std::string.
+ Otherwise, the attribute for would be ATTR(char_ >> string("str"))boost::parser::tuple<char32_t, std::string>.
+
+ Again, there are no special rules for combining values and containers. Every + combination results from an exact match, or fall into the string+character + special case. +
+std::string assignment
+
+ std::string can be assigned from a char. This is dumb. But, we're stuck with
+ it. When you write a parser with a char
+ attribute, and you try to parse it into a std::string,
+ you've almost certainly made a mistake. More importantly, if you write this:
+
namespace bp = boost::parser; +std::string result; +auto b = bp::parse("3", bp::int_, bp::ws, result); ++
+ ... you are even more likely to have made a mistake. Though this should work,
+ because the assignment in std::string s; s
+ = 3; is well-formed, Boost.Parser forbids it.
+ If you write parsing code like the snippet above, you will get a static assertion.
+ If you really do want to assign a float
+ or whatever to a std::string, do it in a semantic action.
+
+ In the table: a is a semantic
+ action; and p, p1, p2,
+ ... are parsers that generate attributes. Note that only >>
+ is used here; > has the exact
+ same attribute generation rules.
+
Table 1.10. Sequence and Alternative Combining Operations and Their Attributes
+|
+ + Expression + + |
+
+ + Attribute Type + + |
+
|---|---|
| + + | +
+ + None. + + |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
| + + | +
+
+ |
+
| + + | +
+
+ |
+
| + + | +
+
+ |
+
| + + | +
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
| + + | +
+
+ |
+
| + + | +
+
+ |
+
|
+
+ |
+
+ + None. + + |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ |
+
+ As we saw in the previous Parsing
+ into structs and classes section, if you parse two strings
+ in a row, you get two separate strings in the resulting attribute. The parser
+ from that example was this:
+
namespace bp = boost::parser; +auto employee_parser = bp::lit("employee") + >> '{' + >> bp::int_ >> ',' + >> quoted_string >> ',' + >> quoted_string >> ',' + >> bp::double_ + >> '}'; ++
+ employee_parser's attribute
+ is boost::parser::tuple<int,
+ std::string, std::string, double>.
+ The two quoted_string parsers
+ produce std::string attributes, and those attributes
+ are not combined. That is the default behavior, and it is just what we want
+ for this case; we don't want the first and last name fields to be jammed
+ together such that we can't tell where one name ends and the other begins.
+ What if we were parsing some string that consisted of a prefix and a suffix,
+ and the prefix and suffix were defined separately for reuse elsewhere?
+
namespace bp = boost::parser; +auto prefix = /* ... */; +auto suffix = /* ... */; +auto special_string = prefix >> suffix; +// Continue to use prefix and suffix to make other parsers.... ++
+ In this case, we might want to use these separate parsers, but want special_string to produce a single std::string
+ for its attribute. merge[] exists for this purpose.
+
namespace bp = boost::parser; +auto prefix = /* ... */; +auto suffix = /* ... */; +auto special_string = bp::merge[prefix >> suffix]; ++
+ merge[] only applies to sequence parsers
+ (like p1 >>
+ p2), and forces all subparsers
+ in the sequence parser to use the same variable for their attribute.
+
+ Another directive, separate[],
+ also applies only to sequence parsers, but does the opposite of merge[]. If forces all the attributes
+ produced by the subparsers of the sequence parser to stay separate, even
+ if they would have combined. For instance, consider this parser.
+
namespace bp = boost::parser; +auto string_and_char = +bp::char_('a') >> ' ' >> bp::cp; ++
+ string_and_char matches one
+ or more 'a's, followed by some
+ other character. As written above, string_and_char
+ produces a std::string, and the final character is appended
+ to the string, after all the 'a's.
+ However, if you wanted to store the final character as a separate value,
+ you would use separate[].
+
namespace bp = boost::parser; +auto string_and_char = bp::separate[+bp::char_('a') >> ' ' >> bp::cp]; ++
+ With this change, string_and_char
+ produces the attribute boost::parser::tuple<std::string, char32_t>.
+
+ As mentioned previously, merge[]
+ applies only to sequence parsers. All subparsers must have the same attribute,
+ or produce no attribute at all. At least one subparser must produce an attribute.
+ When you use merge[], you create a combining
+ group. Every parser in a combining group uses the same variable
+ for its attribute. No parser in a combining group interacts with the attributes
+ of any parsers outside of its combining group. Combining groups are disjoint;
+ merge[/*...*/]
+ >> merge[/*...*/] will produce a tuple of two attributes,
+ not one.
+
+ separate[] also applies only to sequence
+ parsers. When you use separate[],
+ you disable interaction of all the subparsers' attributes with adjacent attributes,
+ whether they are inside or outside the separate[]
+ directive; you force each subparser to have a separate attribute.
+
+ The rules for merge[] and separate[]
+ overrule the steps of the algorithm described above for combining the attributes
+ of a sequence parser. Consider an example.
+
namespace bp = boost::parser; +constexpr auto parser = + bp::char_ >> bp::merge[(bp::string("abc") >> bp::char_ >> bp::char_) >> bp::string("ghi")]; ++
+ You might think that would be ATTR(parser)bp::tuple<char,
+ std::string>.
+ It is not. The parser above does not even compile. Since we created a merge
+ group above, we disabled the default behavior in which the char_ parsers would have collapsed into
+ the string parser that preceded
+ them. Since they are all treated as separate entities, and since they have
+ different attribute types, the use of merge[]
+ is an error.
+
+ Many directives create a new parser out of the parser they are given. merge[] and separate[]
+ do not. Since they operate only on sequence parsers, all they do is create
+ a copy of the sequence parser they are given. The seq_parser template has a template
+ parameter CombiningGroups,
+ and all merge[] and separate[]
+ do is take a given seq_parser and create a copy
+ of it with a different CombiningGroups
+ template parameter. This means that merge[]
+ and separate[] are can be ignored in operator>>
+ expressions much like parentheses are. Consider an example.
+
namespace bp = boost::parser; +constexpr auto parser1 = bp::separate[bp::int_ >> bp::int_] >> bp::int_; +constexpr auto parser2 = bp::lexeme[bp::int_ >> ' ' >> bp::int_] >> bp::int_; ++
+ Note that separate[] is a no-op here; it's only
+ being used this way for this example. These parsers have different attribute
+ types.
+ is ATTR(parser1)boost::parser::tuple(int,
+ int, int). is ATTR(parser2)boost::parser::tuple(boost::parser::tuple(int,
+ int), int). This
+ is because bp::lexeme[]
+ wraps its given parser in a new parser. merge[]
+ does not. That's why, even though parser1
+ and parser2 look so structurally
+ similar, they have different attributes.
+
transform(f)[]
+
+ transform(f)[]
+ is a directive that transforms the attribute of a parser using the given
+ function f. For example:
+
+
+auto str_sum = [&](std::string const & s) { + int retval = 0; + for (auto ch : s) { + retval += ch - '0'; + } + return retval; +}; + +namespace bp = boost::parser; +constexpr auto parser = +bp::char_; +std::string str = "012345"; + +auto result = bp::parse(str, bp::transform(str_sum)[parser]); +assert(result); +assert(*result == 15); +static_assert(std::is_same_v<decltype(result), std::optional<int>>); ++
+
+
+ Here, we have a function str_sum
+ that we use for f. It assumes
+ each character in the given std::string
+ s is a digit, and returns
+ the sum of all the digits in s.
+ Out parser parser would normally
+ return a std::string. However, since str_sum
+ returns a different type — int
+ — that is the attribute type of the full parser, bp::transform(by_value_str_sum)[parser], as you can see from the static_assert.
+
+ As is the case with attributes all throughout Boost.Parser, the attribute
+ passed to f will be moved.
+ You can take it by const &,
+ &&, or by value.
+
+ No distinction is made between parsers with and without an attribute, because
+ there is a Regular special no-attribute type that is generated by parsers
+ with no attribute. You may therefore write something like transform(f)[eps], and Boost.Parser will happily call f with this special no-attribute type.
+
+ omit[p]
+ disables attribute generation for the parser p.
+ raw[p]
+ changes the attribute from
+ to a view that indicates the subrange of the input that was matched by ATTR(p)p. string_view[p] is just
+ like raw[p],
+ except that it produces std::basic_string_views.
+ See Directives for
+ details.
+
+ As described in the previous page, backtracking occurs when the parse attempts
+ to match the current parser P,
+ matches part of the input, but fails to match all of P.
+ The part of the input consumed during the parse of P
+ is essentially "given back".
+
+ This is necessary because P
+ may consist of subparsers, and each subparser that succeeds will try to consume
+ input, produce attributes, etc. When a later subparser fails, the parse of
+ P fails, and the input must
+ be rewound to where it was when P
+ started its parse, not where the latest matching subparser stopped.
+
+ Alternative parsers will often evaluate multiple subparsers one at a time, + advancing and then restoring the input position, until one of the subparsers + succeeds. Consider this example. +
+namespace bp = boost::parser; +auto const parser = repeat(53)[other_parser] | repeat(10)[other_parser]; ++
+ Evaluating parser means trying
+ to match other_parser 53
+ times, and if that fails, trying to match other_parser
+ 10 times. Say you parse input that matches other_parser
+ 11 times. parser will match
+ it. It will also evaluate other_parser
+ 21 times during the parse.
+
+ The attributes of the repeat(53)[other_parser]
+ and repeat(10)[other_parser] are each std::vector<; let's say that ATTR(other_parser)> is ATTR(other_parser)int.
+ The attribute of parser as
+ a whole is the same, std::vector<int>.
+ Since other_parser is busy
+ producing ints — 21 of
+ them to be exact — you may be wondering what happens to the ones produced
+ during the evaluation of repeat(53)[other_parser]
+ when it fails to find all 53 inputs. Its std::vector<int>
+ will contain 11 ints at that
+ point.
+
+ When a repeat-parser fails, and attributes are being generated, it clears
+ its container. This applies to parsers such as the ones above, but also all
+ the other repeat parsers, including ones made using operator+ or operator*.
+
+ So, at the end of a successful parse by parser
+ of 10 inputs (since the right side of the alternative only eats 10 repetitions),
+ the std::vector<int> attribute
+ of parser would contain 10
+ ints.
+
![]() |
+Note | +
|---|---|
+ Users of Boost.Spirit may be familiar with the |
+ Ok, so if parsers all try their best to match the input, and are all-or-nothing, + doesn't that leave room for all kinds of bad input to be ignored? Consider + the top-level parser from the Parsing + JSON example. +
+auto const value_p_def = + number | bp::bool_ | null | string | array_p | object_p; ++
+ What happens if I use this to parse "\""?
+ The parse tries number, fails.
+ It then tries bp::bool_, fails. Then null
+ fails too. Finally, it starts parsing string.
+ Good news, the first character is the open-quote of a JSON string. Unfortunately,
+ that's also the end of the input, so string
+ must fail too. However, we probably don't want to just give up on parsing
+ string now and try array_p, right? If the user wrote an open-quote
+ with no matching close-quote, that's not the prefix of some later alternative
+ of value_p_def; it's ill-formed
+ JSON. Here's the parser for the string
+ rule:
+
auto const string_def = bp::lexeme['"' >> *(string_char - '"') > '"']; ++
+ Notice that operator>
+ is used on the right instead of operator>>. This indicates the same sequence
+ operation as operator>>,
+ except that it also represents an expectation. If the parse before the operator>
+ succeeds, whatever comes after it must also
+ succeed. Otherwise, the top-level parse is failed, and a diagnostic is emitted.
+ It will say something like "Expected '"' here.", quoting the
+ line, with a caret pointing to the place in the input where it expected the
+ right-side match.
+
+ Choosing to use > versus
+ >> is how you indicate
+ to Boost.Parser that parse failure is or is not a hard error, respectively.
+
+ If you want to parse ASCII, using the Unicode parsing API will not actually
+ cost you anything. Your input will be parsed, char by char,
+ and compared to values that are Unicode code points (which are char32_ts).
+ One caveat is that there may be an extra branch on each char, if the input
+ is UTF-8. If your performance requirements can tolerate this, your life will
+ be much easier if you just start with Unicode and stick with it.
+
+ Starting with Unicode support and UTF-8 input will allow you to properly + handle unexpected input, like non-ASCII languages (that's most of them), + with no additional effort on your part. +
++ Treat rules as the unit of work in your parser. Write a rule, test its corners, + and then use it to build larger rules or parsers. This allows you to get + better coverage with less work, since exercising all the code paths of your + rules, one by one, keeps the combinatorial number of paths through your code + manageable. +
++ There are multiple ways to get attributes out of a parser. You can: +
+parse()
+ for the parser to fill in;
+ + All of these are fairly similar in how much effort they require, except for + the semantic action method. For the semantic action approach, you need to + have values to fill in from your parser, and keep them in scope for the duration + of the parse. +
++ It is much more straight forward, and leads to more reusable parsers, to + have the parsers produce the attributes of the parse directly as a result + of the parse. +
++ This does not mean that you should never use semantic actions. They are sometimes + necessary. However, you should default to using the other non-semantic action + methods, and only use semantic actions with a good reason. +
++ A typical error message produced by Boost.Parser will say something like, + "Expected FOO here", where FOO is some rule or parser. Give your + rules names that will read well in error messages like this. For instance, + the JSON examples have these rules: +
+bp::rule<class escape_seq, uint32_t> const escape_seq = + "\\uXXXX hexadecimal escape sequence"; +bp::rule<class escape_double_seq, uint32_t, double_escape_locals> const + escape_double_seq = "\\uXXXX hexadecimal escape sequence"; +bp::rule<class single_escaped_char, uint32_t> const single_escaped_char = + "'\"', '\\', '/', 'b', 'f', 'n', 'r', or 't'"; ++
+ Some things to note: +
+
+ - escape_seq and escape_double_seq have the same
+ name-string. To an end-user who is trying to figure out why their input failed
+ to parse, it doesn't matter which kind of result a parser rule generates.
+ They just want to know how to fix their input. For either rule, the fix is
+ the same: put a hexadecimal escape sequence there.
+
+ - single_escaped_char has a terrible-looking name. However,
+ it's not really used as a name anywhere per se. In error messages, it works
+ nicely, though. The error will be "Expected '"', '', '/', 'b',
+ 'f', 'n', 'r', or 't' here", which is pretty helpful.
+
+ Most of these errors are found at parser construction time, so no actual + parsing is even necessary. For instance, a test case might look like this: +
+TEST(my_parser_tests, my_rule_test) {
+ my_rule r;
+}
+
+
+ In most parsing cases, being able to generate an attribute that represents
+ the result of the parse, or being able to parse into such an attribute, is
+ sufficient. Sometimes, it is not. If you need to parse a very large chunk
+ of text, the generated attribute may be too large to fit in memory. In other
+ cases, you may want to generate attributes sometimes, and not others. callback_rules
+ exist for these kinds of uses. A callback_rule is just like
+ a rule, except that it allows the rule's attribute to be returned to the
+ caller via a callback, as long as the parse is started with a call to callback_parse() instead of parse(). Within a call to parse(), a callback_rule is identical
+ to a regular rule.
+
+ For a rule with no attribute, the signature of a callback function is void (tag), where tag
+ is the tag-type used when declaring the rule. For a rule with an attribute
+ attr, the signature is void (tag, attr). For instance, with this rule:
+
boost::parser::callback_rule<struct foo_tag> foo = "foo"; ++
+ this would be an appropriate callback function: +
+void foo_callback(foo_tag) +{ + std::cout << "Parsed a 'foo'!\n"; +} ++
+ For this rule: +
+boost::parser::callback_rule<struct bar_tag, std::string> bar = "bar"; ++
+ this would be an appropriate callback function: +
+void bar_callback(bar_tag, std::string const & s) +{ + std::cout << "Parsed a 'bar' containing " << s << "!\n"; +} ++
![]() |
+Important | +
|---|---|
+ In the case of |
+ You opt into callback parsing by parsing with a call to callback_parse()
+ instead of parse(). If you use callback_rules with parse(), they're just regular rules.
+ This allows you to choose whether to do "normal" attribute-generating/attribute-assigning
+ parsing with parse(), or callback parsing with
+ callback_parse(), without rewriting much
+ parsing code, if any.
+
+ The only reason all rules
+ are not callback_rules
+ is that you may want to have some rules use callbacks within
+ a parse, and have some that do not. For instance, if you want to report the
+ attribute of callback_rule r1 via callback, r1's
+ implementation may use some rule r2
+ to generate some or all of its attribute.
+
+ See Parsing + JSON With Callbacks for an extended example of callback parsing. +
+
+ Certain overloaded operators are defined for all parsers in Boost.Parser.
+ We've already seen some of them used in this tutorial, especially operator>>,
+ operator|,
+ and operator||,
+ which are used to form sequence parsers, alternative parsers, and permutation
+ parsers, respectively.
+
+ Here are all the operator overloaded for parsers. In the tables below: +
+c is a character of type
+ char or char32_t;
+ a is a semantic action;
+ r is an object whose
+ type models parsable_range
+ (see Concepts); and
+ p, p1,
+ p2, ... are parsers.
+ ![]() |
+Note | +
|---|---|
+ Some of the expressions in this table consume no input. All parsers consume + the input they match unless otherwise stated in the table below. + |
Table 1.7. Combining Operations and Their Semantics
+|
+ + Expression + + |
+
+ + Semantics + + |
+
+ + Attribute Type + + |
+
+ + Notes + + |
+
|---|---|---|---|
|
+
+ |
+
+
+ Matches iff |
+
+ + None. + + |
++ | +
|
+
+ |
+
+
+ Matches iff |
+
+ + None. + + |
++ | +
|
+
+ |
+
+
+ Parses using |
+
+
+ |
+
+
+ Matching |
+
|
+
+ |
+
+
+ Parses using |
+
+
+ |
+
+
+ Matching |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches iff either |
+
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches iff |
+
+ + None. + + |
++ | +
![]() |
+Important | +
|---|---|
+ All the character parsers, like |
+ There are a couple of special rules not captured in the table above: +
+
+ First, the zero-or-more and one-or-more repetitions (operator*() and operator+(), respectively) may collapse when combined.
+ For any parser p, +(+p)
+ collapses to +p;
+ **p,
+ *+p,
+ and +*p
+ each collapse to just *p.
+
+ Second, using eps
+ in an alternative parser as any alternative except
+ the last one is a common source of errors; Boost.Parser disallows it. This
+ is true because, for any parser p,
+ eps
+ | p
+ is equivalent to eps,
+ since eps
+ always matches. This is not true for eps parameterized with a condition.
+ For any condition cond,
+ eps(cond)
+ is allowed to appear anywhere within an alternative parser.
+
![]() |
+Note | +
|---|---|
+ When looking at Boost.Parser parsers in a debugger, or when looking at
+ their reference documentation, you may see reference to the template |
+ A directive is an element of your parser that doesn't have any meaning by
+ itself. Some are second-order parsers that need a first-order parser to do
+ the actual parsing. Others influence the parse in some way. You can often
+ spot a directive lexically by its use of [];
+ directives always []. Non-directives
+ might, but only when attaching a semantic action.
+
+ The directives that are second order parsers are technically directives,
+ but since they are also used to create parsers, it is more useful just to
+ focus on that. The directives repeat()
+ and if_() were already described in
+ the section on parsers; we won't say much about them here.
+
+ Sequence, alternative, and permutation parsers do not nest in most cases.
+ (Let's consider just sequence parsers to keep thinkgs simple, but most of
+ this logic applies to alternative parsers as well.) a
+ >> b
+ >> c
+ is the same as (a
+ >> b) >> c and a
+ >> (b >> c), and
+ they are each represented by a single seq_parser with three subparsers,
+ a, b,
+ and c. However, if something
+ prevents two seq_parsers
+ from interacting directly, they will nest.
+ For instance, lexeme[a >> b] >>
+ c is a seq_parser containing two parsers,
+ lexeme[a >> b] and
+ c. This is because lexeme[] takes its given parser and
+ wraps it in a lexeme_parser. This in turn
+ turns off the sequence parser combining logic, since both sides of the second
+ operator>>
+ in lexeme[a >> b] >>
+ c are not seq_parsers. Sequence parsers
+ have several rules that govern what the overall attribute type of the parser
+ is, based on the positions and attributes of it subparsers (see Attribute
+ Generation). Therefore, it's important to know which directives create
+ a new parser (and what kind), and which ones do not; this is indicated for
+ each directive below.
+
+ See The
+ Parsers And Their Uses. Creates a repeat_parser.
+
+ See The
+ Parsers And Their Uses. Creates a seq_parser.
+
+ omit[p]
+ disables attribute generation for the parser p.
+ Not only does omit[p]
+ have no attribute, but any attribute generation work that normally happens
+ within p is skipped.
+
+ This directive can be useful in cases like this: say you have some fairly
+ complicated parser p that
+ generates a large and expensive-to-construct attribute. Now say that you
+ want to write a function that just counts how many times p
+ can match a string (where the matches are non-overlapping). Instead of using
+ p directly, and building
+ all those attributes, or rewriting p
+ without the attribute generation, use omit[].
+
+ Creates an omit_parser.
+
+ raw[p]
+ changes the attribute from
+ to to a view that delimits the subrange of the input that was matched by
+ ATTR(p)p. The type of the view is
+ subrange<I>,
+ where I is the type of the
+ iterator used within the parse. Note that this may not be the same as the
+ iterator type passed to parse().
+ For instance, when parsing UTF-8, the iterator passed to parse()
+ may be char8_t const
+ *, but within the parse it will be
+ a UTF-8 to UTF-32 transcoding (converting) iterator. Just like omit[], raw[]
+ causes all attribute-generation work within p
+ to be skipped.
+
+ Similar to the re-use scenario for omit[]
+ above, raw[] could be used to find the
+ locations of all non-overlapping matches
+ of p in a string.
+
+ Creates a raw_parser.
+
+ string_view[p]
+ is very similar to raw[p], except
+ that it changes the attribute of p
+ to std::basic_string_view<C>,
+ where C is the character
+ type of the underlying range being parsed. string_view[]
+ requires that the underlying range being parsed is contiguous. Since this
+ can only be detected in C++20 and later, string_view[]
+ is not available in C++17 mode.
+
+ Similar to the re-use scenario for omit[]
+ above, string_view[] could be used to find the
+ locations of all non-overlapping matches
+ of p in a string. Whether
+ raw[] or string_view[]
+ is more natural to use to report the locations depends on your use case,
+ but they are essentially the same.
+
+ Creates a string_view_parser.
+
+ no_case[p]
+ enables case-insensitive parsing within the parse of p.
+ This applies to the text parsed by char_(),
+ string(), and bool_ parsers. The number
+ parsers are already case-insensitive. The case-insensitivity is achieved
+ by doing Unicode case folding on the text being parsed and the values in
+ the parser being matched (see note below if you want to know more about Unicode
+ case folding). In the non-Unicode code path, a full Unicode case folding
+ is not done; instead, only the transformations of values less than 0x100 are done. Examples:
+
#include <boost/parser/transcode_view.hpp> // For as_utfN. + +namespace bp = boost::parser; +auto const street_parser = bp::string(u8"Tobias Straße"); +assert(!bp::parse("Tobias Strasse" | bp::as_utf32, street_parser)); // No match. +assert(bp::parse("Tobias Strasse" | bp::as_utf32, bp::no_case[street_parser])); // Match! + +auto const alpha_parser = bp::no_case[bp::char_('a', 'z')]; +assert(bp::parse("a" | bp::as_utf32, bp::no_case[alpha_parser])); // Match! +assert(bp::parse("B" | bp::as_utf32, bp::no_case[alpha_parser])); // Match! ++
+ Everything pretty much does what you'd naively expect inside no_case[], except that the two-character
+ range version of char_ has
+ a limitation. It only compares a code point from the input to its two arguments
+ (e.g. 'a' and 'z'
+ in the example above). It does not do anything special for multi-code point
+ case folding expansions. For instance, char_(U'ß', U'ß') matches the input U"s", which makes sense, since U'ß' expands
+ to U"ss".
+ However, that same parser does not match
+ the input U"ß"!
+ In short, stick to pairs of code points that have single-code point case
+ folding expansions. If you need to support the multi-expanding code points,
+ use the other overload, like: char_(U"abcd/*...*/ß").
+
![]() |
+Note | +
|---|---|
+ Unicode case folding is an operation that makes text uniformly one case,
+ and if you do it to two bits of text |
+ Creates a no_case_parser.
+
+ lexeme[p]
+ disables use of the skipper, if a skipper is being used, within the parse
+ of p. This is useful, for
+ instance, if you want to enable skipping in most parts of your parser, but
+ disable it only in one section where it doesn't belong. If you are skipping
+ whitespace in most of your parser, but want to parse strings that may contain
+ spaces, you should use lexeme[]:
+
namespace bp = boost::parser; +auto const string_parser = bp::lexeme['"' >> *(bp::char_ - '"') >> '"']; ++
+ Without lexeme[], our string parser would correctly
+ match "foo bar", but
+ the generated attribute would be "foobar".
+
+ Creates a lexeme_parser.
+
+ skip[] is like the inverse of lexeme[]. It enables skipping in the
+ parse, even if it was not enabled before. For example, within a call to
+ parse() that uses a skipper, let's
+ say we have these parsers in use:
+
namespace bp = boost::parser; +auto const one_or_more = +bp::char_; +auto const skip_or_skip_not_there_is_no_try = bp::lexeme[bp::skip[one_or_more] >> one_or_more]; ++
+ The use of lexeme[] disables skipping, but then
+ the use of skip[] turns it back on. The net
+ result is that the first occurrence of one_or_more
+ will use the skipper passed to parse();
+ the second will not.
+
+ skip[] has another use. You can parameterize
+ skip with a different parser to change the skipper just within the scope
+ of the directive. Let's say we passed ws to parse(),
+ and we're using these parsers somewhere within that parse()
+ call:
+
namespace bp = boost::parser; +auto const zero_or_more = *bp::char_; +auto const skip_both_ways = zero_or_more >> bp::skip(bp::blank)[zero_or_more]; ++
+ The first occurrence of zero_or_more
+ will use the skipper passed to parse(),
+ which is ws;
+ the second will use blank as its skipper.
+
+ Creates a skip_parser.
+
transform(f)[]
+ + These directives influence the generation of attributes. See Attribute + Generation section for more details on them. +
+
+ merge[] and separate[]
+ create a copy of the given seq_parser.
+
+ transform(f)[]
+ creates a tranform_parser.
+
+ Boost.Parser has good error reporting built into it. Consider what happens
+ when we fail to parse at an expectation point (created using operator>).
+ If I feed the parser from the Parsing
+ JSON With Callbacks example a file called sample.json containing this
+ input (note the unmatched '['):
+
+
+{
+ "key": "value",
+ "foo": [, "bar": []
+}
+
++
++ This is the error message that is printed to the terminal: +
++
+sample.json:3:12: error: Expected ']' here: + "foo": [, "bar": [] + ^ ++
+
++ That message is formatted like the diagnostics produced by Clang and GCC. + It quotes the line on which the failure occurred, and even puts a caret under + the exact position at which the parse failed. This error message is suitable + for many kinds of end-users, and interoperates well with anything that supports + Clang and/or GCC diagnostics. +
++ Most of Boost.Parser's error handlers format their diagnostics this way, + though you are not bound by that. You can make an error handler type that + does whatever you want, as long as it meets the error handler interface. +
++ The Boost.Parser error handlers are: +
+default_error_handler:
+ Produces formatted diagnostics like the one above, and prints them to
+ std::cerr. default_error_handler has
+ no associated file name, and both errors and diagnostics are printed
+ to std::cerr. This handler is constexpr-friendly.
+ stream_error_handler:
+ Produces formatted diagnostics. One or two streams may be used. If two
+ are used, errors go to one stream and warnings go to the other. A file
+ name can be associated with the parse; if it is, that file name will
+ appear in all diagnostics.
+ callback_error_handler:
+ Produces formatted diagnostics. Calls a callback with the diagnostic
+ message to report the diagnostic, rather than streaming out the diagnostic.
+ A file name can be associated with the parse; if it is, that file name
+ will appear in all diagnostics. This handler is useful for recording
+ the diagnostics in memory.
+ rethrow_error_handler:
+ Does nothing but re-throw any exception that it is asked to handle. Its
+ diagnose() member functions are no-ops.
+ vs_output_error_handler:
+ Directs all errors and warnings to the debugging output panel inside
+ Visual Studio. Available on Windows only. Probably does nothing useful
+ desirable when executed outside of Visual Studio.
+
+ You can set the error handler to any of these, or one of your own, using
+ with_error_handler() (see The
+ parse()
+ API). If you do not set one, default_error_handler will
+ be used.
+
+ Boost.Parser only generates error messages like the ones in this page at
+ failed expectation points, like a > b, where you have successfully
+ parsed a, but then cannot successfully parse b.
+ This may seem limited to you. It's actually the best that we can do.
+
+ In order for error handling to happen other than at expectation points, we
+ have to know that there is no further processing that might take place. This
+ is true because Boost.Parser has P1 | P2 | ... | Pn parsers
+ ("or_parsers"). If any one of these parsers Pi
+ fails to match, it is not allowed to fail the parse — the next one
+ (Pi+1) might match. If we get to the end of the alternatives
+ of the or_parser and Pn fails, we still cannot fail the top-level
+ parse, because the or_parser might be a subparser within a parent
+ or_parser.
+
+ Ok, so what might we do? Perhaps we could at least indicate when we ran into
+ end-of-input. But we cannot, for exactly the same reason already stated.
+ For any parser P, reaching end-of-input is a failure for P,
+ but not necessarily for the whole parse.
+
+ Perhaps we could record the farthest point ever reached during the parse, + and report that at the top level, if the top level parser fails. That would + be little help without knowing which parser was active when we reached that + point. This would require some sort of repeated memory allocation, since + in Boost.Parser the progress point of the parser is stored exclusively on + the stack — by the time we fail the top-level parse, all those far-reaching + stack frames are long gone. Not the best. +
++ Worse still, knowing how far you got in the parse and which parser was active + is not very useful. Consider this. +
+namespace bp = boost::parser;
+auto a_b = bp::char_('a') >> bp::char_('b');
+auto c_b = bp::char_('c') >> bp::char_('b');
+auto result = bp::parse("acb", a_b | c_b);
+
+
+ If we reported the farthest-reaching parser and it's position, it would be
+ the a_b parser, at position "bc" in the
+ input. Is this really enlightening? Was the error in the input putting the
+ 'a' at the beginning or putting the 'c' in the
+ middle? If you point the user at a_b as the parser that failed,
+ and never mention c_b, you are potentially just steering them
+ in the wrong direction.
+
+ All error messages must come from failed expectation points. Consider parsing
+ JSON. If you open a list with '[', you know that you're parsing
+ a list, and if the list is ill-formed, you'll get an error message saying
+ so. If you open an object with '{', the same thing is possible
+ — when missing the matching '}', you can tell the user,
+ "That's not an object", and this is useful feedback. The same thing
+ with a partially parsed number, etc. If the JSON parser does not build in
+ expectations like matched braces and brackets, how can Boost.Parser know
+ that a missing '}' is really a problem, and that no later parser
+ will match the input even without the '}'?
+
![]() |
+Important | +
|---|---|
+ The bottom line is that you should build expectation points into your parsers
+ using |
+ You can get access to the error handler within any semantic action by calling
+ _error_handler(ctx) (see The
+ Parse Context). Any error handler must have the following member functions:
+
+
+template<typename Context, typename Iter> +void diagnose( + diagnostic_kind kind, + std::string_view message, + Context const & context, + Iter it) const; ++
+
++
+template<typename Context> +void diagnose( + diagnostic_kind kind, + std::string_view message, + Context const & context) const; ++
+
+
+ If you call the second one, the one without the iterator parameter, it will
+ call the first with _where(context).begin()
+ as the iterator parameter. The one without the iterator is the one you will
+ use most often. The one with the explicit iterator parameter can be useful
+ in situations where you have messages that are related to each other, associated
+ with multiple locations. For instance, if you are parsing XML, you may want
+ to report that a close-tag does not match its associated open-tag by showing
+ the line where the open-tag was found. That may of course not be located
+ anywhere near _where(ctx).begin(). (A description of _globals() is below.)
+
[](auto & ctx) {
+ // Assume we have a std::vector of open tags, and another
+ // std::vector of iterators to where the open tags were parsed, in our
+ // globals.
+ if (_attr(ctx) != _globals(ctx).open_tags.back()) {
+ std::string open_tag_msg =
+ "Previous open-tag \"" + _globals(ctx).open_tags.back() + "\" here:";
+ _error_handler(ctx).diagnose(
+ boost::parser::diagnostic_kind::error,
+ open_tag_msg,
+ ctx,
+ _globals(ctx).open_tags_position.back());
+ std::string close_tag_msg =
+ "does not match close-tag \"" + _attr(ctx) + "\" here:";
+ _error_handler(ctx).diagnose(
+ boost::parser::diagnostic_kind::error,
+ close_tag_msg,
+ ctx);
+
+ // Explicitly fail the parse. Diagnostics do not affect parse success.
+ _pass(ctx) = false;
+ }
+}
+
+
+ There are also some convenience functions that make the above code a little
+ less verbose, _report_error()
+ and _report_warning():
+
[](auto & ctx) {
+ // Assume we have a std::vector of open tags, and another
+ // std::vector of iterators to where the open tags were parsed, in our
+ // globals.
+ if (_attr(ctx) != _globals(ctx).open_tags.back()) {
+ std::string open_tag_msg =
+ "Previous open-tag \"" + _globals(ctx).open_tags.back() + "\" here:";
+ _report_error(ctx, open_tag_msg, _globals(ctx).open_tag_positions.back());
+ std::string close_tag_msg =
+ "does not match close-tag \"" + _attr(ctx) + "\" here:";
+ _report_error(ctx, close_tag_msg);
+
+ // Explicitly fail the parse. Diagnostics do not affect parse success.
+ _pass(ctx) = false;
+ }
+}
+
+
+ You should use these less verbose functions almost all the time. The only
+ time you would want to use _error_handler()
+ directly is when you are using a custom error handler, and you want access
+ to some part of its interface besides diagnose().
+
+ Though there is support for reporting warnings using the functions above, + none of the error handlers supplied by Boost.Parser will ever report a warning. + Warnings are strictly for user code. +
+
+ For more information on the rest of the error handling and diagnostic API,
+ see the header reference pages for error_handling_fwd.hpp
+ and error_handling.hpp.
+
+ Creating your own error handler is pretty easy; you just need to implement + three member functions. Say you want an error handler that writes diagnostics + to a file. Here's how you might do that. +
++
+struct logging_error_handler +{ + logging_error_handler() {} + logging_error_handler(std::string_view filename) : + filename_(filename), ofs_(filename_) + { + if (!ofs_) + throw std::runtime_error("Could not open file."); + } + + // This is the function called by Boost.Parser after a parser fails the + // parse at an expectation point and throws a parse_error. It is expected + // to create a diagnostic message, and put it where it needs to go. In + // this case, we're writing it to a log file. This function returns a + // bp::error_handler_result, which is an enum with two enumerators -- fail + // and rethrow. Returning fail fails the top-level parse; returning + // rethrow just re-throws the parse_error exception that got us here in + // the first place. + template<typename Iter, typename Sentinel, template<class> class Exception> + bp::error_handler_result + operator()(Iter first, Sentinel last, Exception<Iter> const & e) const + { + bp::write_formatted_expectation_failure_error_message( + ofs_, filename_, first, last, e); + return bp::error_handler_result::fail; + } + + // This function is for users to call within a semantic action to produce + // a diagnostic. + template<typename Context, typename Iter> + void diagnose( + bp::diagnostic_kind kind, + std::string_view message, + Context const & context, + Iter it) const + { + bp::write_formatted_message( + ofs_, + filename_, + bp::_begin(context), + it, + bp::_end(context), + message); + } + + // This is just like the other overload of diagnose(), except that it + // determines the Iter parameter for the other overload by calling + // _where(ctx). + template<typename Context> + void diagnose( + bp::diagnostic_kind kind, + std::string_view message, + Context const & context) const + { + diagnose(kind, message, context, bp::_where(context).begin()); + } + + std::string filename_; + mutable std::ofstream ofs_; +}; ++
+
+
+ That's it. You just need to do the important work of the error handler in
+ its call operator, and then implement the two overloads of diagnose()
+ that it must provide for use inside semantic actions. The default implementation
+ of these is even available as the free function write_formatted_message(),
+ so you can just call that, as you see above. Here's how you might use it.
+
+
+int main() +{ + std::cout << "Enter a list of integers, separated by commas. "; + std::string input; + std::getline(std::cin, input); + + constexpr auto parser = bp::int_ >> *(',' > bp::int_); + logging_error_handler error_handler("parse.log"); + auto const result = bp::parse(input, bp::with_error_handler(parser, error_handler)); + + if (result) { + std::cout << "It looks like you entered:\n"; + for (int x : *result) { + std::cout << x << "\n"; + } + } +} ++
+
+
+ We just define a logging_error_handler, and pass it by reference
+ to with_error_handler(), which decorates the top-level
+ parser with the error handler. We could not
+ have written bp::with_error_handler(parser, logging_error_handler("parse.log")),
+ because with_error_handler()
+ does not accept rvalues. This is becuse the error handler eventually goes
+ into the parse context. The parse context only stores pointers and iterators,
+ keeping it cheap to copy.
+
+ If we run the example and give it the input "1,",
+ this shows up in the log file:
+
parse.log:1:2: error: Expected int_ here (end of input): +1, + ^ ++
+ Sometimes, during the writing of a parser, you make a simple mistake that
+ is diagnosed horrifyingly, due to the high number of template instantiations
+ between the line you just wrote and the point of use (usually, the call to
+ parse()). By "sometimes",
+ I mean "almost always and many, many times". Boost.Parser has a
+ workaround for situations like this. The workaround is to make the ill-formed
+ code well-formed in as many circumstances as possible, and then do a runtime
+ assert instead.
+
+ Usually, C++ programmers try whenever they can to catch mistakes as early
+ as they can. That usually means making as much bad code ill-formed as possible.
+ Counter-intuitively, this does not work well in parser combinator situations.
+ For an example of just how dramatically different these two debugging scenarios
+ can be with Boost.Parser, please see the very long discussion in the none is weird section of Rationale.
+
+ If you are morally opposed to this approach, or just hate fun, good news:
+ you can turn off the use of this technique entirely by defining BOOST_PARSER_NO_RUNTIME_ASSERTIONS.
+
+ Debugging parsers is hard. Any parser above a certain complexity level is
+ nearly impossible to debug simply by looking at the parser's code. Stepping
+ through the parse in a debugger is even worse. To provide a reasonable chance
+ of debugging your parsers, Boost.Parser has a trace mode that you can turn
+ on simply by providing an extra parameter to parse()
+ or callback_parse():
+
boost::parser::parse(input, parser, boost::parser::trace::on); ++
+ Every overload of parse()
+ and callback_parse() takes this final parameter,
+ which is defaulted to boost::parser::trace::off.
+
+ If we trace a substantial parser, we will see a lot
+ of output. Each code point of the input must be considered, one at a time,
+ to see if a certain rule matches. An an example, let's trace a parse using
+ the JSON parser from Parsing
+ JSON. The input is "null". null
+ is one of the types that a Javascript value can have; the top-level parser
+ in the JSON parser example is:
+
auto const value_p_def = + number | bp::bool_ | null | string | array_p | object_p; ++
+ So, a JSON value can be a number, or a Boolean, a null, etc.
+ During the parse, each alternative will be tried in turn, until one is matched.
+ I picked null because it is relatively close to the beginning
+ of the value_p_def alternative parser. Even so, the output is
+ pretty huge. Let's break it down as we go:
+
+
+[begin value; input="null"] ++
+
+
+ Each parser is traced as [begin foo; ...], then the parsing
+ operations themselves, and then [end foo; ...]. The name of
+ a rule is used as its name in the begin and end
+ parts of the trace. Non-rules have a name that is similar to the way the
+ parser looked when you wrote it. Most lines will have the next few code points
+ of the input quoted, as we have here (input="null").
+
+
+[begin number | bool_ | null | string | ...; input="null"] ++
+
+
+ This shows the beginning of the parser inside
+ the rule value — the parser that actually does all the
+ work. In the example code, this parser is called value_p_def.
+ Since it isn't a rule, we have no name for it, so we show its implementation
+ in terms of subparsers. Since it is a bit long, we don't print the entire
+ thing. That's why that ellipsis is there.
+
+
+[begin number; input="null"] + [begin raw[lexeme[ >> ...]][<<action>>]; input="null"] ++
+
+
+ Now we're starting to see the real work being done. number is
+ a somewhat complicated parser that does not match "null",
+ so there's a lot to wade through when following the trace of its attempt
+ to do so. One thing to note is that, since we cannot print a name for an
+ action, we just print "<<action>>". Something
+ similar happens when we come to an attribute that we cannot print, because
+ it has no stream insertion operation. In that case, "<<unprintable-value>>"
+ is printed.
+
+
+ [begin raw[lexeme[ >> ...]]; input="null"]
+ [begin lexeme[-char_('-') >> char_('1', '9') >> ... | ... >> ...]; input="null"]
+ [begin -char_('-') >> char_('1', '9') >> *digit | char_('0') >> -(char_('.') >> ...) >> -( >> ...); input="null"]
+ [begin -char_('-'); input="null"]
+ [begin char_('-'); input="null"]
+ no match
+ [end char_('-'); input="null"]
+ matched ""
+ attribute: <<empty>>
+ [end -char_('-'); input="null"]
+ [begin char_('1', '9') >> *digit | char_('0'); input="null"]
+ [begin char_('1', '9') >> *digit; input="null"]
+ [begin char_('1', '9'); input="null"]
+ no match
+ [end char_('1', '9'); input="null"]
+ no match
+ [end char_('1', '9') >> *digit; input="null"]
+ [begin char_('0'); input="null"]
+ no match
+ [end char_('0'); input="null"]
+ no match
+ [end char_('1', '9') >> *digit | char_('0'); input="null"]
+ no match
+ [end -char_('-') >> char_('1', '9') >> *digit | char_('0') >> -(char_('.') >> ...) >> -( >> ...); input="null"]
+ no match
+ [end lexeme[-char_('-') >> char_('1', '9') >> ... | ... >> ...]; input="null"]
+ no match
+ [end raw[lexeme[ >> ...]]; input="null"]
+ no match
+ [end raw[lexeme[ >> ...]][<<action>>]; input="null"]
+ no match
+[end number; input="null"]
+[begin bool_; input="null"]
+ no match
+[end bool_; input="null"]
+
++
+
+ number and boost::parser::bool_ did not match,
+ but null will:
+
+
+[begin null; input="null"]
+ [begin "null" >> attr(null); input="null"]
+ [begin "null"; input="null"]
+ [begin string("null"); input="null"]
+ matched "null"
+ attribute:
+ [end string("null"); input=""]
+ matched "null"
+ attribute: null
+
++
+
+ Finally, this parser actually matched, and the match generated the attribute
+ null, which is a special value of the type json::value.
+ Since we were matching a string literal "null", earlier
+ there was no attribute until we reached the attr(null) parser.
+
+
+[end "null"; input=""] + [begin attr(null); input=""] + matched "" + attribute: null + [end attr(null); input=""] + matched "null" + attribute: null + [end "null" >> attr(null); input=""] + matched "null" + attribute: null + [end null; input=""] + matched "null" + attribute: null + [end number | bool_ | null | string | ...; input=""] + matched "null" + attribute: null +[end value; input=""] +-------------------- +parse succeeded +-------------------- ++
+
++ At the very end of the parse, the trace code prints out whether the top-level + parse succeeded or failed. +
++ Some things to be aware of when looking at Boost.Parser trace output: +
+p[a] forms an action_parser
+ containing the parser p and semantic action a.
+ This is essentially an implementation detail, but unfortunately the trace
+ output does not hide this from you.
+ p, the trace-name may be intentionally different
+ from the actual structure of p. For example, in the trace
+ above, you see a parser called simply "null".
+ This parser is actually boost::parser::omit[boost::parser::string("null")],
+ but what you typically write is just "null", so
+ that's the name used. There are two special cases like this: the one
+ described here for omit[string], and another for omit[char_].
+ if_(pred)[p] is described as "Equivalent
+ to eps(pred)
+ >> p". In a trace, you will not see if_;
+ you will see eps
+ and p instead.
+
+ This is just about the most minimal example of using Boost.Parser that one
+ could write. We take a string from the command line, or "World"
+ if none is given, and then we parse it:
+
+
+#include <boost/parser/parser.hpp> + +#include <iostream> +#include <string> + + +namespace bp = boost::parser; + +int main(int argc, char const * argv[]) +{ + std::string input = "World"; + if (1 < argc) + input = argv[1]; + + std::string result; + bp::parse(input, *bp::char_, result); + std::cout << "Hello, " << result << "!\n"; +} ++
+
+
+ The expression *bp::char_
+ is a parser-expression. It uses one of the many parsers that Boost.Parser
+ provides: char_.
+ Like all Boost.Parser parsers, it has certain operations defined on it. In
+ this case, *bp::char_
+ is using an overloaded operator* as the C++ version of a Kleene
+ star operator. Since C++ has no postfix unary *
+ operator, we have to use the one we have, so it is used as a prefix.
+
+ So, *bp::char_
+ means "any number of characters". In other words, it really cannot
+ fail. Even an empty string will match it.
+
+ The parse operation is performed by calling the parse()
+ function, passing the parser as one of the arguments:
+
bp::parse(input, *bp::char_, result); ++
+ The arguments here are: input,
+ the range to parse; *bp::char_,
+ the parser used to do the parse; and result,
+ an out-parameter into which to put the result of the parse. Don't get too
+ caught up on this method of getting the parse result out of parse(); there are multiple ways
+ of doing so, and we'll cover all of them in subsequent sections.
+
+ Also, just ignore for now the fact that Boost.Parser somehow figured out
+ that the result type of the *bp::char_
+ parser is a std::string. There are clear rules for this
+ that we'll cover later.
+
+ The effects of this call to parse()
+ is not very interesting — since the parser we gave it cannot ever
+ fail, and because we're placing the output in the same type as the input,
+ it just copies the contents of input
+ to result.
+
+ Boost.Parser seldom allocates memory. The exceptions to this are: +
+symbols
+ allocates memory for the symbol/attribute pairs it contains. If symbols
+ are added during the parse, allocations must also occur then. The data
+ structure used by symbols is also a trie,
+ which is a node-based tree. So, lots of allocations are likely if you
+ use symbols.
+ boost::parser::trace::on to a top-level
+ parsing function, the names of parsers are allocated.
+ operator>),
+ the name of the failed parser is placed into a std::string,
+ which will usually cause an allocation.
+ string()'s attribute is a std::string, the use of which implies allocation.
+ You can avoid this allocation by explicitly using a different string
+ type for the attribute that does not allocate.
+ repeat(p) in
+ all its forms, including operator*, operator+,
+ and operator%, is std::vector<ATTR(p)>,
+ the use of which implies allocation. You can avoid this allocation by
+ explicitly using a different sequence container for the attribute that
+ does not allocate. boost::container::static_vector or C++26's
+ std::inplace_vector may be useful as such replacements.
+
+ With the exception of allocating the name of the parser that was expected
+ in a failed expectation situation, Boost.Parser does not does not allocate
+ unless you tell it to, by using symbols, using a particular
+ error_handler, turning on trace, or parsing into attributes that allocate.
+
+ In the earlier page about rules (Rule
+ Parsers), I described rules as being analogous to
+ functions. rules
+ are, at base, organizational. Here are the common use cases for rules.
+ Use a rule
+ if you want to:
+
+ Let's look at the use cases in detail. +
+
+ We saw in the previous section how parse()
+ is flexible in what types it will accept as attribute out-parameters. Here's
+ another example.
+
namespace bp = boost::parser; +auto result = bp::parse(input, bp::int % ',', result); ++
+ result can be one of many
+ different types. It could be std::vector<int>.
+ It could be std::set<long long>. It could be a lot of things. Often,
+ this is a very useful property; if you had to rewrite all of your parser
+ logic because you changed the desired container in some part of your attribute
+ from a std::vector to a std::deque,
+ that would be annoying. However, that flexibility comes at the cost of type
+ checking. If you want to write a parser that always
+ produces exactly a std::vector<unsigned int> and no other type,
+ you also probably want a compilation error if you accidentally pass that
+ parser a std::set<unsigned int> attribute instead. There is no way with
+ a plain parser to enforce that its attribute type may only ever be a single,
+ fixed type.
+
+ Fortunately, rules
+ allow you to write a parser that has a fixed attribute type. Every rule has
+ a specific attribute type, provided as a template parameter. If one is not
+ specified, the rule has no attribute. The fact that the attribute is a specific
+ type allows you to remove attribute flexibility. For instance, say we have
+ a rule defined like this:
+
+
+bp::rule<struct doubles, std::vector<double>> doubles = "doubles"; +auto const doubles_def = bp::double_ % ','; +BOOST_PARSER_DEFINE_RULES(doubles); ++
+
+
+ You can then use it in a call to parse(),
+ and parse() will return a std::optional<std::vector<double>>:
+
+
+auto const result = bp::parse(input, doubles, bp::ws); ++
+
+
+ If you call parse() with an attribute out-parameter,
+ it must be exactly std::vector<double>:
+
std::vector<double> vec_result; +bp::parse(input, doubles, bp::ws, vec_result); // Ok. +std::deque<double> deque_result; +bp::parse(input, doubles, bp::ws, deque_result); // Ill-formed! ++
+ If we wanted to use a std::deque<double> as the attribute type of our rule:
+
// Attribute changed to std::deque<double>. +bp::rule<struct doubles, std::deque<double>> doubles = "doubles"; +auto const doubles_def = bp::double_ % ','; +BOOST_PARSER_DEFINE_RULES(doubles); + +int main() +{ + std::deque<double> deque_result; + bp::parse(input, doubles, bp::ws, deque_result); // Ok. +} ++
+ The take-away here is that the attribute flexibility is still available,
+ but only within the rule — the parser
+ bp::double_ % ',' can parse into a std::vector<double> or a std::deque<double>, but the rule doubles
+ must parse into only the exact attribute it was declared to generate.
+
+ The reason for this is that, inside the rule parsing implementation, there + is code something like this: +
+using attr_t = ATTR(doubles_def);
+attr_t attr;
+parse(first, last, parser, attr);
+attribute_out_param = std::move(attr);
+
+
+ Where attribute_out_param
+ is the attribute out-parameter we pass to parse().
+ If that final move assignment is ill-formed, the call to parse()
+ is too.
+
+ You can also use rules to exploit attribute flexibility. Even though a rule + reduces the flexibility of attributes it can generate, the fact that it is + so easy to write a new rule means that we can use rules themselves to get + the attribute flexibility we want across our code: +
+namespace bp = boost::parser; + +// We only need to write the definition once... +auto const generic_doubles_def = bp::double_ % ','; + +bp::rule<struct vec_doubles, std::vector<double>> vec_doubles = "vec_doubles"; +auto const & vec_doubles_def = generic_doubles_def; // ... and re-use it, +BOOST_PARSER_DEFINE_RULES(vec_doubles); + +// Attribute changed to std::deque<double>. +bp::rule<struct deque_doubles, std::deque<double>> deque_doubles = "deque_doubles"; +auto const & deque_doubles_def = generic_doubles_def; // ... and re-use it again. +BOOST_PARSER_DEFINE_RULES(deque_doubles); ++
+ Now we have one of each, and we did not have to copy any parsing logic that + would have to be maintained in two places. +
++ Sometimes, you need to create a rule to enforce a certain attribute type, + but the rule's attribute is not constructible from its parser's attribute. + When that happens, you'll need to write a semantic action. +
+struct type_t +{ + type_t() = default; + explicit type_t(double x) : x_(x) {} + // etc. + + double x_; +}; + +namespace bp = boost::parser; + +auto doubles_to_type = [](auto & ctx) { + using namespace bp::literals; + _val(ctx) = type_t(_attr(ctx)[0_c] * _attr(ctx)[1_c]); +}; + +bp::rule<struct type_tag, type_t> type = "type"; +auto const type_def = (bp::double_ >> bp::double_)[doubles_to_type]; +BOOST_PARSER_DEFINE_RULES(type); ++
+ For a rule R and its parser
+ P, we do not need to write
+ such a semantic action if:
+
+ - is an
+ aggregate, and ATTR(R)
+ is a compatible tuple;
+ ATTR(P)
+ - is a
+ tuple, and ATTR(R) is a
+ compatible aggregate;
+ ATTR(P)
+ - is a
+ non-aggregate class type ATTR(R)C,
+ and is a
+ tuple whose elements can be used to construct ATTR(P)C;
+ or
+
+ - and
+ ATTR(R) are
+ compatible types.
+ ATTR(P)
+ The notion of "compatible" is defined in The
+ parse()
+ API.
+
+ Each rule
+ has associated diagnostic text that Boost.Parser can use for failures of
+ that rule. This is useful when the parse reaches a parse failure at an expectation
+ point (see Expectation
+ points). Let's say you have the following code defined somewhere.
+
namespace bp = boost::parser; + +bp::rule<struct value_tag> value = + "an integer, or a list of integers in braces"; + +auto const ints = '{' > (value % ',') > '}'; +auto const value_def = bp::int_ | ints; + +BOOST_PARSER_DEFINE_RULES(value); ++
+ Notice the two expectation points. One before (value % ','), one before
+ the final '}'. Later, you call
+ parse in some input:
+
bp::parse("{ 4, 5 a", value, bp::ws); ++
+ This runs should of the second expectation point, and produces output like + this: +
+1:7: error: Expected '}' here:
+{ 4, 5 a
+ ^
+
++ That's a pretty good error message. Here's what it looks like if we violate + the earlier expectation: +
+bp::parse("{ }", value, bp::ws); ++
1:2: error: Expected an integer, or a list of integers in braces % ',' here:
+{ }
+ ^
+
+
+ Not nearly as nice. The problem is that the expectation is on (value % ',').
+ So, even thought we gave value
+ reasonable dianostic text, we put the text on the wrong thing. We can introduce
+ a new rule to put the diagnstic text in the right place.
+
namespace bp = boost::parser; + +bp::rule<struct value_tag> value = + "an integer, or a list of integers in braces"; +bp::rule<struct comma_values_tag> comma_values = + "a comma-delimited list of integers"; + +auto const ints = '{' > comma_values > '}'; +auto const value_def = bp::int_ | ints; +auto const comma_values_def = (value % ','); + +BOOST_PARSER_DEFINE_RULES(value, comma_values); ++
+ Now when we call bp::parse("{ }",
+ value,
+ bp::ws) we
+ get a much better message:
+
1:2: error: Expected a comma-delimited list of integers here:
+{ }
+ ^
+
+
+ The rule
+ value might be useful elsewhere
+ in our code, perhaps in another parser. It's diagnostic text is appropriate
+ for those other potential uses.
+
+ It's pretty common to see grammars that include recursive rules. Consider + this EBNF rule for balanced parentheses: +
+<parens> ::= "" | ( "(" <parens> ")" )
+
++ We can try to write this using Boost.Parser like this: +
+namespace bp = boost::parser; +auto const parens = '(' >> parens >> ')' | bp::eps; ++
+ We had to put the bp::eps second, because Boost.Parser's parsing
+ algorithm is greedy. Otherwise, it's just a straight transliteration. Unfortunately,
+ it does not work. The code is ill-formed because you can't define a variable
+ in terms of itself. Well you can, but nothing good comes of it. If we instead
+ make the parser in terms of a forward-declared rule, it works.
+
namespace bp = boost::parser; +bp::rule<struct parens_tag> parens = "matched parentheses"; +auto const parens_def = '(' >> parens > ')' | bp::eps; +BOOST_PARSER_DEFINE_RULES(parens); ++
+ Later, if we use it to parse, it does what we want. +
+assert(bp::parse("(((())))", parens, bp::ws)); ++
+ When it fails, it even produces nice diagnostics. +
+bp::parse("(((()))", parens, bp::ws); ++
1:7: error: Expected ')' here (end of input): +(((())) + ^ ++
+ Recursive rules
+ work differently from other parsers in one way: when re-entering the rule
+ recursively, only the attribute variable (_attr(ctx)
+ in your semantic actions) is unique to that instance of the rule. All the
+ other state of the uppermost instance of that rule is shared. This includes
+ the value of the rule (_val(ctx)),
+ and the locals and parameters to the rule. In other words, _val(ctx) returns a reference to the same
+ object in every instance of a recursive rule. This is because each
+ instance of the rule needs a place to put the attribute it generates from
+ its parse. However, we only want a single return value for the uppermost
+ rule; if each instance had a separate value in _val(ctx),
+ then it would be impossible to build up the result of a recursive rule step
+ by step during in the evaluation of the recursive instantiations.
+
+ Also, consider this rule: +
+namespace bp = boost::parser; +bp::rule<struct ints_tag, std::vector<int>> ints = "ints"; +auto const ints_def = bp::int_ >> ints | bp::eps; ++
+ What is the default attribute type for ints_def? It sure looks like std::optional<std::vector<int>>.
+ Inside the evaluation of ints,
+ Boost.Parser must evaluate ints_def,
+ and then produce a std::vector<int> —
+ the return type of ints —
+ from it. How? How do you turn a std::optional<std::vector<int>>
+ into a std::vector<int>? To
+ a human, it seems obvious, but the metaprogramming that properly handles
+ this simple example and the general case is certainly beyond me.
+
+ Boost.Parser has a specific semantic for what consitutes a recursive rule.
+ Each rule has a tag type associated with it, and if Boost.Parser enters a
+ rule with a certain tag Tag,
+ and the currently-evaluating rule (if there is one) also has the tag Tag, then rule instance being entered is
+ considered to be a recursion. No other situations are considered recursion.
+ In particular, if you have rules Ra
+ and Rb, and Ra uses Rb,
+ which in turn used Ra, the
+ second use of Ra is not considered
+ recursion. Ra and Rb are of course mutually recursive, but
+ neither is considered a "recursive rule" for purposes of getting
+ a unique value, locals, and parameters.
+
+ One of the advantages of using rules is that you can declare all your rules + up front and then use them immediately afterward. This lets you make rules + that use each other without introducing cycles: +
+namespace bp = boost::parser; + +// Assume we have some polymorphic type that can be an object/dictionary, +// array, string, or int, called `value_type`. + +bp::rule<class string, std::string> const string = "string"; +bp::rule<class object_element, bp::tuple<std::string, value_type>> const object_element = "object-element"; +bp::rule<class object, value_type> const object = "object"; +bp::rule<class array, value_type> const array = "array"; +bp::rule<class value_tag, value_type> const value = "value"; + +auto const string_def = bp::lexeme['"' >> *(bp::char_ - '"') > '"']; +auto const object_element_def = string > ':' > value; +auto const object_def = '{'_l >> -(object_element % ',') > '}'; +auto const array_def = '['_l >> -(value % ',') > ']'; +auto const value_def = bp::int_ | bp::bool_ | string | array | object; + +BOOST_PARSER_DEFINE_RULES(string, object_element, object, array, value); ++
+ Here we have a parser for a Javascript-value-like type value_type.
+ value_type may be an array,
+ which itself may contain other arrays, objects, strings, etc. Since we need
+ to be able to parse objects within arrays and vice versa, we need each of
+ those two parsers to be able to refer to each other.
+
+ Only rules
+ can be callback parsers, so if you want to get attributes supplied to you
+ via callbacks instead of somewhere in the middle of a giant attribute that
+ represents the whole parse result, you need to use rules. See Parsing
+ JSON With Callbacks for an extended example of callback parsing.
+
+ Inside all of a rule's semantic actions, the expression _val(ctx)
+ is a reference to the attribute that the rule generates. This can be useful
+ when you want subparsers to build up the attribute in a specific way:
+
namespace bp = boost::parser; +using namespace bp::literals; + +bp::rule<class ints, std::vector<int>> const ints = "ints"; +auto twenty_zeros = [](auto & ctx) { _val(ctx).resize(20, 0); }; +auto push_back = [](auto & ctx) { _val(ctx).push_back(_attr(ctx)); }; +auto const ints_def = "20-zeros"_l[twenty_zeros] | +bp::int_[push_back]; +BOOST_PARSER_DEFINE_RULES(ints); ++
![]() |
+Tip | +
|---|---|
+ That's just an example. It's almost always better to do things without
+ using semantic actions. We could have instead written |
+ The rule
+ template takes another template parameter we have not discussed yet. You
+ can pass a third parameter LocalState
+ to rule,
+ which will be defaulted csontructed by the rule, and made available within
+ semantic actions used in the rule as _locals(ctx). This
+ gives your rule some local state, if it needs it. The type of LocalState can be anything regular. It
+ could be a single value, a struct containing multiple values, or a tuple,
+ among others.
+
struct foo_locals +{ + char first_value = 0; +}; + +namespace bp = boost::parser; + +bp::rule<class foo, int, foo_locals> const foo = "foo"; + +auto record_first = [](auto & ctx) { _locals(ctx).first_value = _attr(ctx); } +auto check_against_first = [](auto & ctx) { + char const first = _locals(ctx).first_value; + char const attr = _attr(ctx); + if (attr == first) + _pass(ctx) = false; + _val(ctx) = (int(first) << 8) | int(attr); +}; + +auto const foo_def = bp::cu[record_first] >> bp::cu[check_against_first]; +BOOST_PARSER_DEFINE_RULES(foo); ++
+ foo matches the input if
+ it can match two elements of the input in a row, but only if they are not
+ the same value. Without locals, it's a lot harder to write parsers that have
+ to track state as they parse.
+
+ Sometimes, it is convenient to parameterize parsers. Consider these parsing + rules from the YAML 1.2 + spec: +
+[80] +s-separate(n,BLOCK-OUT) ::= s-separate-lines(n) +s-separate(n,BLOCK-IN) ::= s-separate-lines(n) +s-separate(n,FLOW-OUT) ::= s-separate-lines(n) +s-separate(n,FLOW-IN) ::= s-separate-lines(n) +s-separate(n,BLOCK-KEY) ::= s-separate-in-line +s-separate(n,FLOW-KEY) ::= s-separate-in-line + +[136] +in-flow(n,FLOW-OUT) ::= ns-s-flow-seq-entries(n,FLOW-IN) +in-flow(n,FLOW-IN) ::= ns-s-flow-seq-entries(n,FLOW-IN) +in-flow(n,BLOCK-KEY) ::= ns-s-flow-seq-entries(n,FLOW-KEY) +in-flow(n,FLOW-KEY) ::= ns-s-flow-seq-entries(n,FLOW-KEY) + +[137] +c-flow-sequence(n,c) ::= “[” s-separate(n,c)? in-flow(c)? “]” + ++
+ YAML [137] says that the parsing should proceed into two YAML subrules, both
+ of which have these n and
+ c parameters. It is certainly
+ possible to transliterate these YAML parsing rules to something that uses
+ unparameterized Boost.Parser rules, but it is quite painful
+ to do so. It is better to use a parameterized rule.
+
+ You give parameters to a rule by calling its with()
+ member. The values you pass to with() are used to create a boost::parser::tuple that is available in
+ semantic actions attached to the rule, using _params(ctx).
+
+ Passing parameters to rules like this allows you
+ to easily write parsers that change the way they parse depending on contextual
+ data that they have already parsed.
+
+ Here is an implementation of YAML [137]. It also implements the two YAML
+ rules used directly by [137], rules [136] and [80]. The rules that those rules use are also represented below, but are
+ implemented using only eps, so that I don't have
+ to repeat too much of the (very large) YAML spec.
+
+
+namespace bp = boost::parser; + +// A type to represent the YAML parse context. +enum class context { + block_in, + block_out, + block_key, + flow_in, + flow_out, + flow_key +}; + +// A YAML value; no need to fill it in for this example. +struct value +{ + // ... +}; + +// YAML [66], just stubbed in here. +auto const s_separate_in_line = bp::eps; + +// YAML [137]. +bp::rule<struct c_flow_seq_tag, value> c_flow_sequence = "c-flow-sequence"; +// YAML [80]. +bp::rule<struct s_separate_tag> s_separate = "s-separate"; +// YAML [136]. +bp::rule<struct in_flow_tag, value> in_flow = "in-flow"; +// YAML [138]; just eps below. +bp::rule<struct ns_s_flow_seq_entries_tag, value> ns_s_flow_seq_entries = + "ns-s-flow-seq-entries"; +// YAML [81]; just eps below. +bp::rule<struct s_separate_lines_tag> s_separate_lines = "s-separate-lines"; + +// Parser for YAML [137]. +auto const c_flow_sequence_def = + '[' >> + -s_separate.with(bp::_p<0>, bp::_p<1>) >> + -in_flow.with(bp::_p<0>, bp::_p<1>) >> + ']'; +// Parser for YAML [80]. +auto const s_separate_def = bp::switch_(bp::_p<1>) + (context::block_out, s_separate_lines.with(bp::_p<0>)) + (context::block_in, s_separate_lines.with(bp::_p<0>)) + (context::flow_out, s_separate_lines.with(bp::_p<0>)) + (context::flow_in, s_separate_lines.with(bp::_p<0>)) + (context::block_key, s_separate_in_line) + (context::flow_key, s_separate_in_line); +// Parser for YAML [136]. +auto const in_flow_def = bp::switch_(bp::_p<1>) + (context::flow_out, ns_s_flow_seq_entries.with(bp::_p<0>, context::flow_in)) + (context::flow_in, ns_s_flow_seq_entries.with(bp::_p<0>, context::flow_in)) + (context::block_out, ns_s_flow_seq_entries.with(bp::_p<0>, context::flow_key)) + (context::flow_key, ns_s_flow_seq_entries.with(bp::_p<0>, context::flow_key)); + +auto const ns_s_flow_seq_entries_def = bp::eps; +auto const s_separate_lines_def = bp::eps; + +BOOST_PARSER_DEFINE_RULES( + c_flow_sequence, + s_separate, + in_flow, + ns_s_flow_seq_entries, + s_separate_lines); ++
+
+
+ YAML [137] (c_flow_sequence)
+ parses a list. The list may be empty, and must be surrounded by brackets,
+ as you see here. But, depending on the current YAML context (the c parameter to [137]), we may require certain
+ spacing to be matched by s-separate,
+ and how sub-parser in-flow behaves also depends on the current
+ context.
+
+ In s_separate above, we parse
+ differently based on the value of c.
+ This is done above by using the value of the second parameter to s_separate in a switch-parser. The second
+ parameter is looked up by using _p as a parse argument.
+
+ in_flow does something similar.
+ Note that in_flow calls its
+ subrule by passing its first parameter, but using a fixed value for the second
+ value. s_separate only passes
+ its n parameter conditionally.
+ The point is that a rule can be used with and without .with(),
+ and that you can pass constants or parse arguments to .with().
+
+ With those rules defined, we could write a unit test for YAML [137] like + this: +
++
+auto const test_parser = c_flow_sequence.with(4, context::block_out); +auto result = bp::parse("[]", test_parser); +assert(result); ++
+
+
+ You could extend this with tests for different values of n
+ and c. Obviously, in real
+ tests, you parse actual contents inside the "[]",
+ if the other rules were implemented, like [138].
+
+ Getting at one of a rule's arguments and passing it as an argument to another
+ parser can be very verbose. _p is a variable template
+ that allows you to refer to the nth
+ argument to the current rule, so that you can, in turn, pass it to one of
+ the rule's subparsers. Using this, foo_def
+ above can be rewritten as:
+
auto const foo_def = bp::repeat(bp::_p<0>)[' '_l]; ++
+ Using _p
+ can prevent you from having to write a bunch of lambdas that get each get
+ an argument out of the parse context using _params(ctx)[0_c] or
+ similar.
+
+ Note that _p
+ is a parse argument (see The
+ Parsers And Their Uses), meaning that it is an invocable that takes
+ the context as its only parameter. If you want to use it inside a semantic
+ action, you have to call it.
+
+ Semantic actions in this tutorial are usually of the signature void (auto
+ & ctx). That is, they take a context by reference,
+ and return nothing. If they were to return something, that something would
+ just get dropped on the floor.
+
+ It is a pretty common pattern to create a rule in order to get a certain
+ kind of value out of a parser, when you don't normally get it automatically.
+ If I want to parse an int,
+ int_
+ does that, and the thing that I parsed is also the desired attribute. If
+ I parse an int followed by a
+ double, I get a boost::parser::tuple containing one of each.
+ But what if I don't want those two values, but some function of those two
+ values? I probably write something like this.
+
struct obj_t { /* ... */ }; +obj_t to_obj(int i, double d) { /* ... */ } + +namespace bp = boost::parser; +bp::rule<struct obj_tag, obj_t> obj = "obj"; +auto make_obj = [](auto & ctx) { + using boost::hana::literals; + _val(ctx) = to_obj(_attr(ctx)[0_c], _attr(ctx)[1_c]); +}; +constexpr auto obj_def = (bp::int_ >> bp::double_)[make_obj]; ++
+ That's fine, if a little verbose. However, you can also do this instead: +
+namespace bp = boost::parser; +bp::rule<struct obj_tag, obj_t> obj = "obj"; +auto make_obj = [](auto & ctx) { + using boost::hana::literals; + return to_obj(_attr(ctx)[0_c], _attr(ctx)[1_c]); +}; +constexpr auto obj_def = (bp::int_ >> bp::double_)[make_obj]; ++
+ Above, we return the value from a semantic action, and the returned value
+ gets assigned to _val(ctx).
+
+ Finally, you can provide a function that takes the individual elements of
+ the attribute (if it's a tuple), and returns the value to assign to _val(ctx):
+
namespace bp = boost::parser; +bp::rule<struct obj_tag, obj_t> obj = "obj"; +constexpr auto obj_def = (bp::int_ >> bp::double_)[to_obj]; ++
+ More formally, within a rule, the use of a semantic action is determined
+ as follows. Assume we have a function APPLY
+ that calls a function with the elements of a tuple, like std::apply.
+ For some context ctx, semantic
+ action action, and attribute
+ attr, action
+ is used like this:
+
+ - _val(ctx) =
+ APPLY(action, std::move(attr)),
+ if that is well-formed, and attr
+ is a tuple of size 2 or larger;
+
+ - otherwise, _val(ctx) =
+ action(ctx), if
+ that is well-formed;
+
+ - otherwise, action(ctx).
+
+ The first case does not pass the context to the action at all. The last case + is the normal use of semantic actions outside of rules. +
++ The previous example showed how to use a symbol table as a fixed lookup table. + What if we want to add things to the table during the parse? We can do that, + but we need to do so within a semantic action. First, here is our symbol + table, already with a single value in it: +
++
+bp::symbols<int> const symbols = {{"c", 8}}; +assert(parse("c", symbols)); ++
+
++ No surprise that it works to use the symbol table as a parser to parse the + one string in the symbol table. Now, here's our parser: +
++
+auto const parser = (bp::char_ >> bp::int_)[add_symbol] >> symbols; ++
+
+
+ Here, we've attached the semantic action not to a simple parser like double_,
+ but to the sequence parser (bp::char_
+ >> bp::int_). This sequence parser contains two parsers,
+ each with its own attribute, so it produces two attributes as a tuple.
+
+
+auto const add_symbol = [&symbols](auto & ctx) { + using namespace bp::literals; + // symbols::insert() requires a string, not a single character. + char chars[2] = {_attr(ctx)[0_c], 0}; + symbols.insert(ctx, chars, _attr(ctx)[1_c]); +}; ++
+
+
+ Inside the semantic action, we can get the first element of the attribute
+ tuple using UDLs
+ provided by Boost.Hana, and boost::hana::tuple::operator[](). The first attribute, from the char_,
+ is _attr(ctx)[0_c], and
+ the second, from the int_, is _attr(ctx)[1_c]
+ (if boost::parser::tuple
+ aliases to std::tuple, you'd use std::get or
+ boost::parser::get
+ instead). To add the symbol to the symbol table, we call insert().
+
+
+auto const parser = (bp::char_ >> bp::int_)[add_symbol] >> symbols; ++
+
+
+ During the parse, ("X", 9)
+ is parsed and added to the symbol table. Then, the second 'X'
+ is recognized by the symbol table parser. However:
+
+
+assert(!parse("X", symbols)); ++
+
+
+ If we parse again, we find that "X"
+ did not stay in the symbol table. The fact that symbols
+ was declared const might have given you a hint that this would happen.
+
+ The full program: +
++
+#include <boost/parser/parser.hpp> + +#include <iostream> +#include <string> + + +namespace bp = boost::parser; + +int main() +{ + bp::symbols<int> const symbols = {{"c", 8}}; + assert(parse("c", symbols)); + + auto const add_symbol = [&symbols](auto & ctx) { + using namespace bp::literals; + // symbols::insert() requires a string, not a single character. + char chars[2] = {_attr(ctx)[0_c], 0}; + symbols.insert(ctx, chars, _attr(ctx)[1_c]); + }; + auto const parser = (bp::char_ >> bp::int_)[add_symbol] >> symbols; + + auto const result = parse("X 9 X", parser, bp::ws); + assert(result && *result == 9); + (void)result; + + assert(!parse("X", symbols)); +} ++
+
+![]() |
+Important | +
|---|---|
+ |
+ It is possible to add symbols to a symbols permanently. To do
+ so, you have to use a mutable symbols object s, and add the symbols by calling s.insert_for_next_parse(), instead of s.insert(). These two operations are orthogonal, so
+ if you want to both add a symbol to the table for the current top-level parse,
+ and leave it in the table for subsequent top-level parses, you need to call
+ both functions.
+
+ It is also possible to erase a single entry from the symbol table, or to
+ clear the symbol table entirely. Just as with insertion, there are versions
+ of erase and clear for the current parse, and another that applies only to
+ subsequent parses. The full set of operations can be found in the symbols
+ API docs.
+
+ [mpte There are two versions of each of the symbols *_for_next_parse()
+ functions — one that takes a context, and one that does not. The one
+ with the context is meant to be used within a semantic action. The one without
+ the context is for use outside of any parse.]
+
+ Now that you've seen some examples, let's see how parsing works in a bit + more detail. Consider this example. +
+namespace bp = boost::parser; +auto int_pair = bp::int_ >> bp::int_; // Attribute: tuple<int, int> +auto int_pairs_plus = +int_pair >> bp::int_; // Attribute: tuple<std::vector<tuple<int, int>>, int> ++
+ int_pairs_plus must match
+ a pair of ints (using int_pair) one or more times, and then must
+ match an additional int. In
+ other words, it matches any odd number (greater than 1) of ints in the input. Let's look at how this
+ parse proceeds.
+
auto result = bp::parse("1 2 3", int_pairs_plus, bp::ws); ++
+ At the beginning of the parse, the top level parser uses its first subparser
+ (if any) to start parsing. So, int_pairs_plus,
+ being a sequence parser, would pass control to its first parser +int_pair.
+ Then +int_pair
+ would use int_pair to do
+ its parsing, which would in turn use bp::int_.
+ This creates a stack of parsers, each one using a particular subparser.
+
+ Step 1) The input is "1 2 3",
+ and the stack of active parsers is int_pairs_plus
+ -> +int_pair
+ -> int_pair -> bp::int_.
+ (Read "->" as "uses".) This parses "1",
+ and the whitespace after is skipped by bp::ws. Control
+ passes to the second bp::int_ parser in int_pair.
+
+ Step 2) The input is "2 3"
+ and the stack of parsers looks the same, except the active parser is the
+ second bp::int_ from int_pair.
+ This parser consumes "2"
+ and then bp::ws skips the subsequent space. Since we've
+ finished with int_pair's
+ match, its boost::parser::tuple<int,
+ int>
+ attribute is complete. It's parent is +int_pair, so this tuple attribute is pushed
+ onto the back of +int_pair's
+ attribute, which is a std::vector<boost::parser::tuple<int, int>>. Control passes up to the parent
+ of int_pair, +int_pair.
+ Since +int_pair
+ is a one-or-more parser, it starts a new iteration; control passes to int_pair again.
+
+ Step 3) The input is "3"
+ and the stack of parsers looks the same, except the active parser is the
+ first bp::int_ from int_pair
+ again, and we're in the second iteration of +int_pair. This parser consumes "3". Since this is the end of the
+ input, the second bp::int_ of int_pair
+ does not match. This partial match of "3"
+ should not count, since it was not part of a full match. So, int_pair indicates its failure, and +int_pair
+ stops iterating. Since it did match once, +int_pair does not fail; it is a zero-or-more
+ parser; failure of its subparser after the first success does not cause it
+ to fail. Control passes to the next parser in sequence within int_pairs_plus.
+
+ Step 4) The input is "3"
+ again, and the stack of parsers is int_pairs_plus
+ -> bp::int_. This parses the "3",
+ and the parse reaches the end of input. Control passes to int_pairs_plus,
+ which has just successfully matched with all parser in its sequence. It then
+ produces its attribute, a boost::parser::tuple<std::vector<boost::parser::tuple<int, int>>, int>, which gets returned from bp::parse().
+
+ Something to take note of between Steps #3 and #4: at the beginning of #4, + the input position had returned to where is was at the beginning of #3. This + kind of backtracking happens in alternative parsers when an alternative fails. + The next page has more details on the semantics of backtracking. +
+
+ So far, parsers have been presented as somewhat abstract entities. You may
+ be wanting more detail. A Boost.Parser parser P
+ is an invocable object with a pair of call operator overloads. The two functions
+ are very similar, and in many parsers one is implemented in terms of the
+ other. The first function does the parsing and returns the default attribute
+ for the parser. The second function does exactly the same parsing, but takes
+ an out-param into which it writes the attribute for the parser. The out-param
+ does not need to be the same type as the default attribute, but they need
+ to be compatible.
+
+ Compatibility means that the default attribute is assignable to the out-param
+ in some fashion. This usually means direct assignment, but it may also mean
+ a tuple -> aggregate or aggregate -> tuple conversion. For sequence
+ types, compatibility means that the sequence type has insert
+ or push_back with the usual
+ semantics. This means that the parser +boost::parser::int_ can fill a std::set<int> just
+ as well as a std::vector<int>.
+
+ Some parsers also have additional state that is required to perform a match.
+ For instance, char_ parsers
+ can be parameterized with a single code point to match; the exact value of
+ that code point is stored in the parser object.
+
+ No parser has direct support for all the operations defined on parsers (operator|,
+ operator>>,
+ etc.). Instead, there is a template called parser_interface that supports
+ all of these operations. parser_interface wraps each
+ parser, storing it as a data member, adapting it for general use. You should
+ only ever see parser_interface in the debugger,
+ or possibly in some of the reference documentation. You should never have
+ to write it in your own code.
+
+ So far, we've seen only simple parsers that parse the same value repeatedly + (with or without commas and spaces). It's also very common to parse a few + values in a specific sequence. Let's say you want to parse an employee record. + Here's a parser you might write: +
+namespace bp = boost::parser; +auto employee_parser = bp::lit("employee") + >> '{' + >> bp::int_ >> ',' + >> quoted_string >> ',' + >> quoted_string >> ',' + >> bp::double_ + >> '}'; ++
+ The attribute type for employee_parser
+ is boost::parser::tuple<int,
+ std::string, std::string, double>.
+ That's great, in that you got all the parsed data for the record without
+ having to write any semantic actions. It's not so great that you now have
+ to get all the individual elements out by their indices, using get().
+ It would be much nicer to parse into the final data structure that your program
+ is going to use. This is often some struct
+ or class. Boost.Parser supports
+ parsing into arbitrary aggregate structs,
+ and non-aggregates that are constructible from the tuple at hand.
+
+ If we have a struct that has
+ data members of the same types listed in the boost::parser::tuple attribute type for employee_parser, it would be nice to parse
+ directly into it, instead of parsing into a tuple and then constructing our
+ struct later. Fortunately, this
+ just works in Boost.Parser. Here is an example of parsing straight into a
+ compatible aggregate type.
+
+
+#include <boost/parser/parser.hpp> + +#include <iostream> +#include <string> + + +struct employee +{ + int age; + std::string surname; + std::string forename; + double salary; +}; + +namespace bp = boost::parser; + +int main() +{ + std::cout << "Enter employee record. "; + std::string input; + std::getline(std::cin, input); + + auto quoted_string = bp::lexeme['"' >> +(bp::char_ - '"') >> '"']; + auto employee_p = bp::lit("employee") + >> '{' + >> bp::int_ >> ',' + >> quoted_string >> ',' + >> quoted_string >> ',' + >> bp::double_ + >> '}'; + + employee record; + auto const result = bp::parse(input, employee_p, bp::ws, record); + + if (result) { + std::cout << "You entered:\nage: " << record.age + << "\nsurname: " << record.surname + << "\nforename: " << record.forename + << "\nsalary : " << record.salary << "\n"; + } else { + std::cout << "Parse failure.\n"; + } +} ++
+
+
+ Unfortunately, this is taking advantage of the loose attribute assignment
+ logic; the employee_parser
+ parser still has a boost::parser::tuple
+ attribute. See The
+ parse()
+ API for a description of attribute out-param compatibility.
+
+ For this reason, it's even more common to want to make a rule that returns
+ a specific type like employee.
+ Just by giving the rule a struct
+ type, we make sure that this parser always generates an employee
+ struct as its attribute, no matter where it is in the parse. If we made a
+ simple parser P that uses
+ the employee_p rule, like
+ bp::int >> employee_p, P's
+ attribute type would be boost::parser::tuple<int, employee>.
+
+
+#include <boost/parser/parser.hpp> + +#include <iostream> +#include <string> + + +struct employee +{ + int age; + std::string surname; + std::string forename; + double salary; +}; + +namespace bp = boost::parser; + +bp::rule<struct quoted_string, std::string> quoted_string = "quoted name"; +bp::rule<struct employee_p, employee> employee_p = "employee"; + +auto quoted_string_def = bp::lexeme['"' >> +(bp::char_ - '"') >> '"']; +auto employee_p_def = bp::lit("employee") + >> '{' + >> bp::int_ >> ',' + >> quoted_string >> ',' + >> quoted_string >> ',' + >> bp::double_ + >> '}'; + +BOOST_PARSER_DEFINE_RULES(quoted_string, employee_p); + +int main() +{ + std::cout << "Enter employee record. "; + std::string input; + std::getline(std::cin, input); + + static_assert(std::is_aggregate_v<std::decay_t<employee &>>); + + auto const result = bp::parse(input, employee_p, bp::ws); + + if (result) { + std::cout << "You entered:\nage: " << result->age + << "\nsurname: " << result->surname + << "\nforename: " << result->forename + << "\nsalary : " << result->salary << "\n"; + } else { + std::cout << "Parse failure.\n"; + } +} ++
+
+
+ Just as you can pass a struct
+ as an out-param to parse() when the parser's attribute type is a tuple,
+ you can also pass a tuple as an out-param to parse() when the parser's attribute type is a struct:
+
// Using the employee_p rule from above, with attribute type employee...
+boost::parser::tuple<int, std::string, std::string, double> tup;
+auto const result = bp::parse(input, employee_p, bp::ws, tup); // Ok!
+
+![]() |
+Important | +
|---|---|
+ This automatic use of |
class types as attributes
+ + Many times you don't have an aggregate struct that you want to produce from + your parse. It would be even nicer than the aggregate code above if Boost.Parser + could detect that the members of a tuple that is produced as an attribute + are usable as the arguments to some type's constructor. So, Boost.Parser + does that. +
++
+#include <boost/parser/parser.hpp> + +#include <iostream> +#include <string> + + +namespace bp = boost::parser; + +int main() +{ + std::cout << "Enter a string followed by two unsigned integers. "; + std::string input; + std::getline(std::cin, input); + + constexpr auto string_uint_uint = + bp::lexeme[+(bp::char_ - ' ')] >> bp::uint_ >> bp::uint_; + std::string string_from_parse; + if (parse(input, string_uint_uint, bp::ws, string_from_parse)) + std::cout << "That yields this string: " << string_from_parse << "\n"; + else + std::cout << "Parse failure.\n"; + + std::cout << "Enter an unsigned integer followed by a string. "; + std::getline(std::cin, input); + std::cout << input << "\n"; + + constexpr auto uint_string = bp::uint_ >> +bp::char_; + std::vector<std::string> vector_from_parse; + if (parse(input, uint_string, bp::ws, vector_from_parse)) { + std::cout << "That yields this vector of strings:\n"; + for (auto && str : vector_from_parse) { + std::cout << " '" << str << "'\n"; + } + } else { + std::cout << "Parse failure.\n"; + } +} ++
+
++ Let's look at the first parse. +
++
+constexpr auto string_uint_uint = + bp::lexeme[+(bp::char_ - ' ')] >> bp::uint_ >> bp::uint_; +std::string string_from_parse; +if (parse(input, string_uint_uint, bp::ws, string_from_parse)) + std::cout << "That yields this string: " << string_from_parse << "\n"; +else + std::cout << "Parse failure.\n"; ++
+
+
+ Here, we use the parser string_uint_uint,
+ which produces a boost::parser::tuple<std::string, unsigned int, unsigned
+ int>
+ attribute. When we try to parse that into an out-param std::string
+ attribute, it just works. This is because std::string
+ has a constructor that takes a std::string,
+ an offset, and a length. Here's the other parse:
+
+
+constexpr auto uint_string = bp::uint_ >> +bp::char_; +std::vector<std::string> vector_from_parse; +if (parse(input, uint_string, bp::ws, vector_from_parse)) { + std::cout << "That yields this vector of strings:\n"; + for (auto && str : vector_from_parse) { + std::cout << " '" << str << "'\n"; + } +} else { + std::cout << "Parse failure.\n"; +} ++
+
+
+ Now we have the parser uint_string,
+ which produces boost::parser::tuple<unsigned int, std::string>
+ attribute — the two chars
+ at the end combine into a std::string.
+ Those two values can be used to construct a std::vector<std::string>, via the count, T
+ constructor.
+
+ Just like with using aggregates in place of tuples, non-aggregate class types can be substituted for tuples
+ in most places. That includes using a non-aggregate class
+ type as the attribute type of a rule.
+
+ However, while compatible tuples can be substituted for aggregates, you
+ can't substitute a tuple for some class type T
+ just because the tuple could have been used to construct T.
+ Think of trying to invert the substitution in the second parse above. Converting
+ a std::vector<std::string> into a boost::parser::tuple<unsigned int, std::string>
+ makes no sense.
+
+ It is very common to need to parse quoted strings. Quoted strings are slightly
+ tricky, though, when using a skipper (and you should be using a skipper 99%
+ of the time). You don't want to allow arbitrary whitespace in the middle
+ of your strings, and you also don't want to remove all whitespace from your
+ strings. Both of these things will happen with the typical skipper, ws.
+
+ So, here is how most people would write a quoted string parser: +
+namespace bp = boost::parser; +const auto string = bp::lexeme['"' >> *(bp::char_ - '"') > '"']; ++
+ Some things to note: +
+lexeme[] disables skipping in the
+ parser, and it must be written around the quotes, not around the operator*
+ expression; and
+ + This is a very common pattern. I have written a quoted string parser like + this dozens of times. The parser above is the quick-and-dirty version. A + more robust version would be able to handle escaped quotes within the string, + and then would immediately also need to support escaped escape characters. +
+
+ Boost.Parser provides quoted_string to use in place
+ of this very common pattern. It supports quote- and escaped-character-escaping,
+ using backslash as the escape character.
+
+
+namespace bp = boost::parser; + +auto result1 = bp::parse("\"some text\"", bp::quoted_string, bp::ws); +assert(result1); +std::cout << *result1 << "\n"; // Prints: some text + +auto result2 = + bp::parse("\"some \\\"text\\\"\"", bp::quoted_string, bp::ws); +assert(result2); +std::cout << *result2 << "\n"; // Prints: some "text" ++
+
+
+ As common as this use case is, there are very similar use cases that it does
+ not cover. So, quoted_string has some options.
+ If you call it with a single character, it returns a quoted_string that uses that
+ single character as the quote-character.
+
+
+auto result3 = bp::parse("!some text!", bp::quoted_string('!'), bp::ws); +assert(result3); +std::cout << *result3 << "\n"; // Prints: some text ++
+
+
+ You can also supply a range of characters. One of the characters from the
+ range must quote both ends of the string; mismatches are not allowed. Think
+ of how Python allows you to quote a string with either '"'
+ or '\'', but the same character
+ must be used on both sides.
+
+
+auto result4 = bp::parse("'some text'", bp::quoted_string("'\""), bp::ws); +assert(result4); +std::cout << *result4 << "\n"; // Prints: some text ++
+
+
+ Another common thing to do in a quoted string parser is to recognize escape
+ sequences. If you have simple escape sequencecs that do not require any real
+ parsing, like say the simple escape sequences from C++, you can provide a
+ symbols
+ object as well. The template parameter T
+ to symbols<T>
+ must be char or char32_t. You don't need to include the escaped
+ backslash or the escaped quote character, since those always work.
+
+
+// the c++ simple escapes +bp::symbols<char> const escapes = { + {"'", '\''}, + {"?", '\?'}, + {"a", '\a'}, + {"b", '\b'}, + {"f", '\f'}, + {"n", '\n'}, + {"r", '\r'}, + {"t", '\t'}, + {"v", '\v'}}; +auto result5 = + bp::parse("\"some text\r\"", bp::quoted_string('"', escapes), bp::ws); +assert(result5); +std::cout << *result5 << "\n"; // Prints (with a CRLF newline): some text ++
+
++ So far we've seen examples that parse some text and generate associated attributes. + Sometimes, you want to find some subrange of the input that contains what + you're looking for, and you don't want to generate attributes at all. +
+
+ There are two directives that affect the attribute type
+ of any parser, raw[] and string_view[].
+ (We'll get to directives in more detail in the Directives
+ section later. For now, you just need to know that a directive wraps a parser,
+ and changes some aspect of how it functions.)
+
+ raw[] changes the attribute of its
+ parser to be a subrange
+ whose begin()
+ and end()
+ return the bounds of the sequence being parsed that match p.
+
namespace bp = boost::parser; +auto int_parser = bp::int_ % ','; // ATTR(int_parser) is std::vector<int> +auto subrange_parser = bp::raw[int_parser]; // ATTR(subrange_parser) is a subrange + +// Parse using int_parser, generating integers. +auto ints = bp::parse("1, 2, 3, 4", int_parser, bp::ws); +assert(ints); +assert(*ints == std::vector<int>({1, 2, 3, 4})); + +// Parse again using int_parser, but this time generating only the +// subrange matched by int_parser. (prefix_parse() allows matches that +// don't consume the entire input.) +auto const str = std::string("1, 2, 3, 4, a, b, c"); +auto first = str.begin(); +auto range = bp::prefix_parse(first, str.end(), subrange_parser, bp::ws); +assert(range); +assert(range->begin() == str.begin()); +assert(range->end() == str.begin() + 10); + +static_assert(std::is_same_v< + decltype(range), + std::optional<bp::subrange<std::string::const_iterator>>>); ++
+ Note that the subrange
+ has the iterator type std::string::const_iterator,
+ because that's the iterator type passed to prefix_parse().
+ If we had passed char const
+ * iterators to prefix_parse(),
+ that would have been the iterator type. The only exception to this comes
+ from Unicode-aware parsing (see Unicode
+ Support). In some of those cases, the iterator being used in the parse
+ is not the one you passed. For instance, if you call prefix_parse()
+ with char8_t *
+ iterators, it will create a UTF-8 to UTF-32 transcoding view, and parse the
+ iterators of that view. In such a case, you'll get a subrange whose iterator type
+ is a transcoding iterator. When that happens, you can get the underlying
+ iterator — the one you passed to prefix_parse()
+ — by calling the .base() member function on each transcoding iterator
+ in the returned subrange.
+
auto const u8str = std::u8string(u8"1, 2, 3, 4, a, b, c"); +auto u8first = u8str.begin(); +auto u8range = bp::prefix_parse(u8first, u8str.end(), subrange_parser, bp::ws); +assert(u8range); +assert(u8range->begin().base() == u8str.begin()); +assert(u8range->end().base() == u8str.begin() + 10); ++
+ string_view[] has very similar semantics
+ to raw[], except that it produces a
+ std::basic_string_view<CharT>
+ (where CharT is the type
+ of the underlying range begin parsed) instead of a subrange. For this to work,
+ the underlying range must be contiguous. Contiguity of iterators is not detectable
+ before C++20, so this directive is only available in C++20 and later.
+
namespace bp = boost::parser; +auto int_parser = bp::int_ % ','; // ATTR(int_parser) is std::vector<int> +auto sv_parser = bp::string_view[int_parser]; // ATTR(sv_parser) is a string_view + +auto const str = std::string("1, 2, 3, 4, a, b, c"); +auto first = str.begin(); +auto sv1 = bp::prefix_parse(first, str.end(), sv_parser, bp::ws); +assert(sv1); +assert(*sv1 == str.substr(0, 10)); + +static_assert(std::is_same_v<decltype(sv1), std::optional<std::string_view>>); ++
+ Since string_view[] produces string_views,
+ it cannot return transcoding iterators as described above for raw[]. If you parse a sequence of
+ CharT with string_view[],
+ you get exactly a std::basic_string_view<CharT>.
+ If the parse is using transcoding in the Unicode-aware path, string_view[] will decompose the transcoding
+ iterator as necessary. If you pass a transcoding view to parse()
+ or transcoding iterators to prefix_parse(),
+ string_view[] will still see through the
+ transcoding iterators without issue, and give you a string_view
+ of part of the underlying range.
+
auto sv2 = bp::parse("1, 2, 3, 4" | bp::as_utf32, sv_parser, bp::ws); +assert(sv2); +assert(*sv2 == "1, 2, 3, 4"); + +static_assert(std::is_same_v<decltype(sv2), std::optional<std::string_view>>); ++
+ This example is very similar to the others we've seen so far. This one is
+ different only because it uses a rule. As an analogy, think
+ of a parser like char_
+ or double_
+ as an individual line of code, and a rule as a function. Like a
+ function, a rule
+ has its own name, and can even be forward declared. Here is how we define
+ a rule,
+ which is analogous to forward declaring a function:
+
+
+bp::rule<struct doubles, std::vector<double>> doubles = "doubles"; ++
+
+
+ This declares the rule itself. The rule is a parser, and we can
+ immediately use it in other parsers. That definition is pretty dense; take
+ note of these things:
+
struct
+ doubles. Here we've declared
+ the tag type and used it all in one go; you can also use a previously
+ declared tag type.
+ doubles.
+ doubles the
+ diagnstic text "doubles"
+ so that Boost.Parser knows how to refer to it when producing a trace
+ of the parser during debugging.
+
+ Ok, so if doubles is a parser,
+ what does it do? We define the rule's behavior by defining a separate parser
+ that by now should look pretty familiar:
+
+
+auto const doubles_def = bp::double_ % ','; ++
+
+
+ This is analogous to writing a definition for a forward-declared function.
+ Note that we used the name doubles_def.
+ Right now, the doubles rule
+ parser and the doubles_def
+ non-rule parser have no connection to each other. That's intentional —
+ we want to be able to define them separately. To connect them, we declare
+ functions with an interface that Boost.Parser understands, and use the tag
+ type struct doubles
+ to connect them together. We use a macro for that:
+
+
+BOOST_PARSER_DEFINE_RULES(doubles); ++
+
+
+ This macro expands to the code necessary to make the rule doubles
+ and its parser doubles_def
+ work together. The _def suffix
+ is a naming convention that this macro relies on to work. The tag type allows
+ the rule parser, doubles,
+ to call one of these overloads when used as a parser.
+
+ BOOST_PARSER_DEFINE_RULES
+ expands to two overloads of a function called parse_rule(). In the case above, the overloads each
+ take a struct doubles
+ parameter (to distinguish them from the other overloads of parse_rule()
+ for other rules) and parse using doubles_def.
+ You will never need to call any overload of parse_rule() yourself; it is used internally by the
+ parser that implements rules, rule_parser.
+
+ Here is the definition of the macro that is expanded for each rule: +
++
+#define BOOST_PARSER_DEFINE_IMPL(_, rule_name_) \ + template< \ + typename Iter, \ + typename Sentinel, \ + typename Context, \ + typename SkipParser> \ + decltype(rule_name_)::parser_type::attr_type parse_rule( \ + decltype(rule_name_)::parser_type::tag_type *, \ + Iter & first, \ + Sentinel last, \ + Context const & context, \ + SkipParser const & skip, \ + boost::parser::detail::flags flags, \ + bool & success, \ + bool & dont_assign) \ + { \ + auto const & parser = BOOST_PARSER_PP_CAT(rule_name_, _def); \ + using attr_t = \ + decltype(parser(first, last, context, skip, flags, success)); \ + using attr_type = decltype(rule_name_)::parser_type::attr_type; \ + if constexpr (boost::parser::detail::is_nope_v<attr_t>) { \ + dont_assign = true; \ + parser(first, last, context, skip, flags, success); \ + return {}; \ + } else if constexpr (std::is_same_v<attr_type, attr_t>) { \ + return parser(first, last, context, skip, flags, success); \ + } else if constexpr (std::is_constructible_v<attr_type, attr_t>) { \ + return attr_type( \ + parser(first, last, context, skip, flags, success)); \ + } else { \ + attr_type attr{}; \ + parser(first, last, context, skip, flags, success, attr); \ + return attr; \ + } \ + } \ + \ + template< \ + typename Iter, \ + typename Sentinel, \ + typename Context, \ + typename SkipParser, \ + typename Attribute> \ + void parse_rule( \ + decltype(rule_name_)::parser_type::tag_type *, \ + Iter & first, \ + Sentinel last, \ + Context const & context, \ + SkipParser const & skip, \ + boost::parser::detail::flags flags, \ + bool & success, \ + bool & dont_assign, \ + Attribute & retval) \ + { \ + auto const & parser = BOOST_PARSER_PP_CAT(rule_name_, _def); \ + using attr_t = \ + decltype(parser(first, last, context, skip, flags, success)); \ + if constexpr (boost::parser::detail::is_nope_v<attr_t>) { \ + parser(first, last, context, skip, flags, success); \ + } else { \ + parser(first, last, context, skip, flags, success, retval); \ + } \ + } ++
+
+
+ Now that we have the doubles
+ parser, we can use it like we might any other parser:
+
+
+auto const result = bp::parse(input, doubles, bp::ws); ++
+
++ The full program: +
++
+#include <boost/parser/parser.hpp> + +#include <deque> +#include <iostream> +#include <string> + + +namespace bp = boost::parser; + + +bp::rule<struct doubles, std::vector<double>> doubles = "doubles"; +auto const doubles_def = bp::double_ % ','; +BOOST_PARSER_DEFINE_RULES(doubles); + +int main() +{ + std::cout << "Please enter a list of doubles, separated by commas. "; + std::string input; + std::getline(std::cin, input); + + auto const result = bp::parse(input, doubles, bp::ws); + + if (result) { + std::cout << "You entered:\n"; + for (double x : *result) { + std::cout << x << "\n"; + } + } else { + std::cout << "Parse failure.\n"; + } +} ++
+
+
+ All this is intended to introduce the notion of rules. It still may be a bit
+ unclear why you would want to use rules. The use cases for, and
+ lots of detail about, rules is in a later section,
+ More About Rules.
+
+ Like all parsing systems (lex & yacc, Boost.Spirit,
+ etc.), Boost.Parser has a mechanism for associating semantic actions with
+ different parts of the parse. Here is nearly the same program as we saw in
+ the previous example, except that it is implemented in terms of a semantic
+ action that appends each parsed double
+ to a result, instead of automatically building and returning the result.
+ To do this, we replace the double_ from the previous
+ example with double_[action];
+ action is our semantic action:
+
+
+#include <boost/parser/parser.hpp> + +#include <iostream> +#include <string> + + +namespace bp = boost::parser; + +int main() +{ + std::cout << "Enter a list of doubles, separated by commas. "; + std::string input; + std::getline(std::cin, input); + + std::vector<double> result; + auto const action = [&result](auto & ctx) { + std::cout << "Got one!\n"; + result.push_back(_attr(ctx)); + }; + auto const action_parser = bp::double_[action]; + auto const success = bp::parse(input, action_parser % ',', bp::ws); + + if (success) { + std::cout << "You entered:\n"; + for (double x : result) { + std::cout << x << "\n"; + } + } else { + std::cout << "Parse failure.\n"; + } +} ++
+
++ Run in a shell, it looks like this: +
+$ example/semantic_actions +Enter a list of doubles, separated by commas. 4,3 +Got one! +Got one! +You entered: +4 +3 ++
+ In Boost.Parser, semantic actions are implemented in terms of invocable objects + that take a single parameter to a parse-context object. The parse-context + object represents the current state of the parse. In the example we used + this lambda as our invocable: +
++
+auto const action = [&result](auto & ctx) { + std::cout << "Got one!\n"; + result.push_back(_attr(ctx)); +}; ++
+
+
+ We're both printing a message to std::cout
+ and recording a parsed result in the lambda. It could do both, either, or
+ neither of these things if you like. The way we get the parsed double in the lambda is by asking the parse
+ context for it. _attr(ctx) is
+ how you ask the parse context for the attribute produced by the parser to
+ which the semantic action is attached. There are lots of functions like
+ _attr()
+ that can be used to access the state in the parse context. We'll cover more
+ of them later on. The
+ Parse Context defines what exactly the parse context is and how it
+ works.
+
+ Note that you can't write an unadorned lambda directly as a semantic action.
+ Otherwise, the compile will see two '['
+ characters and think it's about to parse an attribute. Parentheses fix this:
+
p[([](auto & ctx){/*...*/})] ++
+ Before you do this, note that the lambdas that you write as semantic actions
+ are almost always generic (having an auto
+ & ctx
+ parameter), and so are very frequently re-usable. Most semantic action lambdas
+ you write should be written out-of-line, and given a good name. Even when
+ they are not reused, named lambdas keep your parsers smaller and easier to
+ read.
+
![]() |
+Important | +
|---|---|
+ Attaching a semantic action to a parser removes its attribute. That is,
+ |
+ There are some other forms for semantic actions, when they are used inside
+ of rules.
+ See More About Rules
+ for details.
+
+ When writing a parser, it often comes up that there is a set of strings that, + when parsed, are associated with a set of values one-to-one. It is tedious + to write parsers that recognize all the possible input strings when you have + to associate each one with an attribute via a semantic action. Instead, we + can use a symbol table. +
++ Say we want to parse Roman numerals, one of the most common work-related + parsing problems. We want to recognize numbers that start with any number + of "M"s, representing thousands, followed by the hundreds, the + tens, and the ones. Any of these may be absent from the input, but not all. + Here are three symbol Boost.Parser tables that we can use to recognize ones, + tens, and hundreds values, respectively: +
++
+bp::symbols<int> const ones = { + {"I", 1}, + {"II", 2}, + {"III", 3}, + {"IV", 4}, + {"V", 5}, + {"VI", 6}, + {"VII", 7}, + {"VIII", 8}, + {"IX", 9}}; + +bp::symbols<int> const tens = { + {"X", 10}, + {"XX", 20}, + {"XXX", 30}, + {"XL", 40}, + {"L", 50}, + {"LX", 60}, + {"LXX", 70}, + {"LXXX", 80}, + {"XC", 90}}; + +bp::symbols<int> const hundreds = { + {"C", 100}, + {"CC", 200}, + {"CCC", 300}, + {"CD", 400}, + {"D", 500}, + {"DC", 600}, + {"DCC", 700}, + {"DCCC", 800}, + {"CM", 900}}; ++
+
+
+ A symbols
+ maps strings of char to their
+ associated attributes. The type of the attribute must be specified as a template
+ parameter to symbols
+ — in this case, int.
+
+ Any "M"s we encounter should add 1000 to the result, and all other + values come from the symbol tables. Here are the semantic actions we'll need + to do that: +
++
+int result = 0; +auto const add_1000 = [&result](auto & ctx) { result += 1000; }; +auto const add = [&result](auto & ctx) { result += _attr(ctx); }; ++
+
+
+ add_1000 just adds 1000 to result.
+ add adds whatever attribute
+ is produced by its parser to result.
+
+ Now we just need to put the pieces together to make a parser: +
++
+using namespace bp::literals; +auto const parser = + *'M'_l[add_1000] >> -hundreds[add] >> -tens[add] >> -ones[add]; ++
+
+
+ We've got a few new bits in play here, so let's break it down. 'M'_l is a
+ literal parser. That is, it is a parser that parses
+ a literal char, code point,
+ or string. In this case, a char
+ 'M' is being parsed. The _l bit at the end is a UDL
+ suffix that you can put after any char,
+ char32_t, or char
+ const *
+ to form a literal parser. You can also make a literal parser by writing
+ lit(), passing an argument of
+ one of the previously mentioned types.
+
+ Why do we need any of this, considering that we just used a literal ',' in our previous example? The reason is that
+ 'M' is not used in an expression
+ with another Boost.Parser parser. It is used within *'M'_l[add_1000].
+ If we'd written *'M'[add_1000], clearly that would be ill-formed; char has no operator*, nor an operator[], associated with it.
+
![]() |
+Tip | +
|---|---|
+ Any time you want to use a |
+ On to the next bit: -hundreds[add].
+ By now, the use of the index operator should be pretty familiar; it associates
+ the semantic action add with
+ the parser hundreds. The
+ operator-
+ at the beginning is new. It means that the parser it is applied to is optional.
+ You can read it as "zero or one". So, if hundreds
+ is not successfully parsed after *'M'[add_1000], nothing happens, because hundreds is allowed to be missing —
+ it's optional. If hundreds
+ is parsed successfully, say by matching "CC",
+ the resulting attribute, 200,
+ is added to result inside
+ add.
+
+ Here is the full listing of the program. Notice that it would have been inappropriate + to use a whitespace skipper here, since the entire parse is a single number, + so it was removed. +
++
+#include <boost/parser/parser.hpp> + +#include <iostream> +#include <string> + + +namespace bp = boost::parser; + +int main() +{ + std::cout << "Enter a number using Roman numerals. "; + std::string input; + std::getline(std::cin, input); + + bp::symbols<int> const ones = { + {"I", 1}, + {"II", 2}, + {"III", 3}, + {"IV", 4}, + {"V", 5}, + {"VI", 6}, + {"VII", 7}, + {"VIII", 8}, + {"IX", 9}}; + + bp::symbols<int> const tens = { + {"X", 10}, + {"XX", 20}, + {"XXX", 30}, + {"XL", 40}, + {"L", 50}, + {"LX", 60}, + {"LXX", 70}, + {"LXXX", 80}, + {"XC", 90}}; + + bp::symbols<int> const hundreds = { + {"C", 100}, + {"CC", 200}, + {"CCC", 300}, + {"CD", 400}, + {"D", 500}, + {"DC", 600}, + {"DCC", 700}, + {"DCCC", 800}, + {"CM", 900}}; + + int result = 0; + auto const add_1000 = [&result](auto & ctx) { result += 1000; }; + auto const add = [&result](auto & ctx) { result += _attr(ctx); }; + + using namespace bp::literals; + auto const parser = + *'M'_l[add_1000] >> -hundreds[add] >> -tens[add] >> -ones[add]; + + if (bp::parse(input, parser) && result != 0) + std::cout << "That's " << result << " in Arabic numerals.\n"; + else + std::cout << "That's not a Roman number.\n"; +} ++
+
+![]() |
+Important | +
|---|---|
+ |
+ Just like with a rule,
+ you can give a symbols
+ a bit of diagnostic text that will be used in error messages generated by
+ Boost.Parser when the parse fails at an expectation point, as described in
+ Error
+ Handling and Debugging. See the symbols constructors for details.
+
+ First, let's cover some terminology that we'll be using throughout the docs: +
++ A semantic action is an arbitrary bit of logic associated + with a parser, that is only executed when the parser matches. +
+
+ Simpler parsers can be combined to form more complex parsers. Given some
+ combining operation C, and
+ parsers P0, P1, ... PN,
+ C(P0, P1, ... PN) creates a new parser Q.
+ This creates a parse tree. Q
+ is the parent of P1, P2 is the child of Q,
+ etc. The parsers are applied in the top-down fashion implied by this topology.
+ When you use Q to parse a
+ string, it will use P0,
+ P1, etc. to do the actual
+ work. If P3 is being used
+ to parse the input, that means that Q
+ is as well, since the way Q
+ parses is by dispatching to its children to do some or all of the work. At
+ any point in the parse, there will be exactly one parser without children
+ that is being used to parse the input; all other parsers being used are its
+ ancestors in the parse tree.
+
+ A subparser is a parser that is the child of another + parser. +
++ The top-level parser is the root of the tree of parsers. +
++ The current parser or bottommost parser + is the parser with no children that is currently being used to parse the + input. +
++ A rule is a kind of parser that makes building large, + complex parsers easier. A subrule is a rule that is + the child of some other rule. The current rule or bottommost + rule is the one rule currently being used to parse the input that + has no subrules. Note that while there is always exactly one current parser, + there may or may not be a current rule — rules are one kind of parser, + and you may or may not be using one at a given point in the parse. +
+
+ The top-level parse is the parse operation being performed
+ by the top-level parser. This term is necessary because, though most parse
+ failures are local to a particular parser, some parse failures cause the
+ call to parse() to indicate failure of the
+ entire parse. For these cases, we say that such a local failure "causes
+ the top-level parse to fail".
+
+ Throughout the Boost.Parser documentation, I will refer to "the call
+ to parse()". Read this as "the
+ call to any one of the functions described in The
+ parse()
+ API". That includes prefix_parse(),
+ callback_parse(), and callback_prefix_parse().
+
+ There are some special kinds of parsers that come up often in this documentation. +
+
+ One is a sequence parser; you will see it created using
+ operator>>,
+ as in p1 >>
+ p2 >>
+ p3. A sequence parser tries to
+ match all of its subparsers to the input, one at a time, in order. It matches
+ the input iff all its subparsers do.
+
+ Another is an alternative parser; you will see it created
+ using operator|,
+ as in p1 |
+ p2 |
+ p3. An alternative parser tries
+ to match all of its subparsers to the input, one at a time, in order; it
+ stops after matching at most one subparser. It matches the input iff one
+ of its subparsers does.
+
+ Finally, there is a permutation parser; it is created
+ using operator||,
+ as in p1 ||
+ p2 ||
+ p3. A permutation parser tries
+ to match all of its subparsers to the input, in any order. So the parser
+ p1 ||
+ p2 ||
+ p3 is equivalent to (p1 >>
+ p2 >>
+ p3) | (p1
+ >> p3
+ >> p2) | (p2 >> p1 >> p3) |
+ (p2 >> p3 >> p1) | (p3 >> p1 >> p2) |
+ (p3 >> p2 >> p1). Hopefully its terseness is self-explanatory.
+ It matches the input iff all of its subparsers do, regardless of the order
+ they match in.
+
+ Boost.Parser parsers each have an attribute associated
+ with them, or explicitly have no attribute. An attribute is a value that
+ the parser generates when it matches the input. For instance, the parser
+ double_
+ generates a double when it matches
+ the input. ATTR()
+ is a notional macro that expands to the attribute type of the parser passed
+ to it; is ATTR(double_)double.
+ This is similar to the attribute type trait.
+
+ Token parsing is parsing using Boost.Parser's optional + support for lexing/tokenizing first, and parsing the resulting tokens, as + opposed to the normal operation of Boost.Parser, in which input characters + are parsed. +
++ Next, we'll look at some simple programs that parse using Boost.Parser. We'll + start small and build up from there. +
++ There are multiple top-level parse functions. They have some things in common: +
+bool.
+ char,
+ wchar_t, char8_t,
+ char16_t, or char32_t.
+ prefix_
+ in their name take an iterator/sentinel pair. For example prefix_parse(first, last, p, ws),
+ which parses the range [first, last),
+ advancing first as it
+ goes. If the parse succeeds, the entire input may or may not have been
+ matched. The value of first
+ will indicate the last location within the input that p
+ matched. The whole input was matched
+ if and only if first == last
+ after the call to parse().
+ parse(),
+ for example parse(r, p, ws), parse()
+ only indicates success if all of r was matched by p.
+ ![]() |
+Note | +
|---|---|
+ |
+ There are eight overloads of parse()
+ and prefix_parse() combined, because there
+ are three either/or options in how you call them.
+
+ You can call prefix_parse()
+ with an iterator and sentinel that delimit a range of character values. For
+ example:
+
namespace bp = boost::parser; +auto const p = /* some parser ... */; + +char const * str_1 = /* ... */; +// Using null_sentinel, str_1 can point to three billion characters, and +// we can call prefix_parse() without having to find the end of the string first. +auto result_1 = bp::prefix_parse(str_1, bp::null_sentinel, p, bp::ws); + +char str_2[] = /* ... */; +auto result_2 = bp::prefix_parse(std::begin(str_2), std::end(str_2), p, bp::ws); ++
+ The iterator/sentinel overloads can parse successfully without matching the
+ entire input. You can tell if the entire input was matched by checking if
+ first ==
+ last is true after prefix_parse() returns.
+
+ By contrast, you call parse()
+ with a range of character values. When the range is a reference to an array
+ of characters, any terminating 0
+ is ignored; this allows calls like parse("str",
+ p)
+ to work naturally.
+
namespace bp = boost::parser; +auto const p = /* some parser ... */; + +std::u8string str_1 = "str"; +auto result_1 = bp::parse(str_1, p, bp::ws); + +// The null terminator is ignored. This call parses s-t-r, not s-t-r-0. +auto result_2 = bp::parse(U"str", p, bp::ws); + +char const * str_3 = "str"; +auto result_3 = bp::parse(bp::null_term(str_3) | bp::as_utf16, p, bp::ws); ++
+ Since there is no way to indicate that p
+ matches the input, but only a prefix of the input was matched, the range
+ (non-iterator/sentinel) overloads of parse()
+ indicate failure if the entire input is not matched.
+
namespace bp = boost::parser; +auto const p = '"' >> *(bp::char_ - '"') >> '"'; +char const * str = "\"two words\"" ; + +std::string result_1; +bool const success = bp::parse(str, p, result_1); // success is true; result_1 is "two words" +auto result_2 = bp::parse(str, p); // !!result_2 is true; *result_2 is "two words" ++
+ When you call parse() with
+ an attribute out-parameter and parser p,
+ the expected type is something like .
+ It doesn't have to be exactly that; I'll explain in a bit. The return type
+ is ATTR(p)bool.
+
+ When you call parse() without
+ an attribute out-parameter and parser p,
+ the return type is std::optional<.
+ Note that when ATTR(p)>
+ is itself an ATTR(p)optional, the
+ return type is std::optional<std::optional<...>>. Each of those optionals tells
+ you something different. The outer one tells you whether the parse succeeded.
+ If so, the parser was successful, but it still generates an attribute that
+ is an optional — that's
+ the inner one.
+
namespace bp = boost::parser; +auto const p = '"' >> *(bp::char_ - '"') >> '"'; +char const * str = "\"two words\"" ; + +auto result_1 = bp::parse(str, p); // !!result_1 is true; *result_1 is "two words" +auto result_2 = bp::parse(str, p, bp::ws); // !!result_2 is true; *result_2 is "twowords" ++
+ For any call to parse() that takes an attribute
+ out-parameter, like parse("str",
+ p, bp::ws, out),
+ the call is well-formed for a number of possible types of out;
+ decltype(out) does
+ not need to be exactly .
+ ATTR(p)
+ For instance, this is well-formed code that does not abort (remember that
+ the attribute type of string()
+ is std::string):
+
namespace bp = boost::parser; +auto const p = bp::string("foo"); + +std::vector<char> result; +bool const success = bp::parse("foo", p, result); +assert(success && result == std::vector<char>({'f', 'o', 'o'})); ++
+ Even though p generates a
+ std::string attribute, when it actually takes
+ the data it generates and writes it into an attribute, it only assumes that
+ the attribute is a container
+ (see Concepts), not that it
+ is some particular container type. It will happily insert() into a std::string
+ or a std::vector<char> all
+ the same. std::string and std::vector<char>
+ are both containers of char,
+ but it will also insert into a container with a different element type.
+ p just needs to be able to
+ insert the elements it produces into the attribute-container. As long as
+ an implicit conversion allows that to work, everything is fine:
+
namespace bp = boost::parser; +auto const p = bp::string("foo"); + +std::deque<int> result; +bool const success = bp::parse("foo", p, result); +assert(success && result == std::deque<int>({'f', 'o', 'o'})); ++
+ This works, too, even though it requires inserting elements from a generated
+ sequence of char32_t into a
+ container of char (remember
+ that the attribute type of +cp
+ is std::vector<char32_t>):
+
namespace bp = boost::parser; +auto const p = +bp::cp; + +std::string result; +bool const success = bp::parse("foo", p, result); +assert(success && result == "foo"); ++
+ This next example works as well, even though the change to a container is + not at the top level. It is an element of the result tuple: +
+namespace bp = boost::parser; +auto const p = +(bp::cp - ' ') >> ' ' >> string("foo"); + +using attr_type = decltype(bp::parse(u8"", p)); +static_assert(std::is_same_v< + attr_type, + std::optional<bp::tuple<std::string, std::string>>>); + +using namespace bp::literals; + +{ + // This is similar to attr_type, with the first std::string changed to a std::vector<int>. + bp::tuple<std::vector<int>, std::string> result; + bool const success = bp::parse(u8"rôle foo" | bp::as_utf8, p, result); + assert(success); + assert(bp::get(result, 0_c) == std::vector<int>({'r', U'ô', 'l', 'e'})); + assert(bp::get(result, 1_c) == "foo"); +} +{ + // This time, we have a std::vector<char> instead of a std::vector<int>. + bp::tuple<std::vector<char>, std::string> result; + bool const success = bp::parse(u8"rôle foo" | bp::as_utf8, p, result); + assert(success); + // The 4 code points "rôle" get transcoded to 5 UTF-8 code points to fit in the std::string. + assert(bp::get(result, 0_c) == std::vector<char>({'r', (char)0xc3, (char)0xb4, 'l', 'e'})); + assert(bp::get(result, 1_c) == "foo"); +} ++
+ As indicated in the inline comments, there are a couple of things to take + away from this example: +
+std::string
+ to std::vector<int>,
+ or std::vector<char32_t>
+ to std::deque<int>),
+ the call to parse() will often still be
+ well-formed.
+ char32_t
+ (or wchar_t for non-MSVC
+ builds), and the new container's element type is char
+ or char8_t, Boost.Parser
+ assumes that this is a UTF-32-to-UTF-8 conversion, and silently transcodes
+ the data when inserting into the new container.
+ + Let's look at a case where another simple-seeming type replacement does + not work. First, the case that works: +
+namespace bp = boost::parser; +auto parser = -(bp::char_ % ','); +std::vector<int> result; +auto b = bp::parse("a, b", parser, bp::ws, result); ++
+
+ is ATTR(parser)std::optional<std::string>. Even though we pass a std::vector<int>,
+ everything is fine. However, if we modify this case only sightly, so that
+ the std::optional<std::string> is nested within the attribute, the code
+ becomes ill-formed.
+
struct S +{ + std::vector<int> chars; + int i; +}; +namespace bp = boost::parser; +auto parser = -(bp::char_ % ',') >> bp::int_; +S result; +auto b = bp::parse("a, b 42", parser, bp::ws, result); ++
+ If we change chars to a
+ std::vector<char>,
+ the code is still ill-formed. Same if we change chars
+ to a std::string. We must actually use std::optional<std::string> exactly to make the code well-formed
+ again.
+
+ The reason the same looseness from the top-level parser does not apply to
+ a nested parser is that, at some point in the code, the parser -(bp::char_ % ',') would try
+ to assign a std::optional<std::string> — the element type of the attribute
+ type it normally generates — to a chars.
+ If there's no implicit conversion there, the code is ill-formed.
+
+ The take-away for this last example is that the ability to arbitrarily swap
+ out data types within the type of the attribute you pass to parse() is very flexible, but is
+ also limited to structurally simple cases. When we discuss rules in the next section,
+ we'll see how this flexibility in the types of attributes can help when writing
+ complicated parsers.
+
+ Those were examples of swapping out one container type for another. They
+ make good examples because that is more likely to be surprising, and so it's
+ getting lots of coverage here. You can also do much simpler things like parse
+ using a uint_,
+ and writing its attribute into a double.
+ In general, you can swap any type T
+ out of the attribute, as long as the swap would not result in some ill-formed
+ assignment within the parse.
+
+ Here is another example that also produces surprising results, for a different + reason. +
+namespace bp = boost::parser; +constexpr auto parser = bp::char_('a') >> bp::char_('b') >> bp::char_('c') | + bp::char_('x') >> bp::char_('y') >> bp::char_('z'); +std::string str = "abc"; +bp::tuple<char, char, char> chars; +bool b = bp::parse(str, parser, chars); +assert(b); +assert(chars == bp::tuple('c', '\0', '\0')); ++
+ This looks wrong, but is expected behavior. At every stage of the parse that
+ produces an attribute, Boost.Parser tries to assign that attribute to some
+ part of the out-param attribute provided to parse(),
+ if there is one. Note that is ATTR(parser)std::string,
+ because each sequence parser is three char_
+ parsers in a row, which forms a std::string;
+ there are two such alternatives, so the overall attribute is also std::string.
+ During the parse, when the first parser bp::char_('a')
+ matches the input, it produces the attribute 'a'
+ and needs to assign it to its destination. Some logic inside the sequence
+ parser indicates that this 'a'
+ contributes to the value in the 0th
+ position in the result tuple, if the result is being written into a tuple.
+ Here, we passed a bp::tuple<char, char, char>,
+ so it writes 'a' into the first
+ element. Each subsequent char_
+ parser does the same thing, and writes over the first element. If we had
+ passed a std::string as the out-param instead, the logic
+ would have seen that the out-param attribute is a string, and would have
+ appended 'a' to it. Then each subsequent
+ parser would have appended to the string.
+
+ Boost.Parser never looks at the arity of the tuple passed to parse() to see if there are too
+ many or too few elements in it, compared to the expected attribute for the
+ parser. In this case, there are two extra elements that are never touched.
+ If there had been too few elements in the tuple, you would have seen a compilation
+ error. The reason that Boost.Parser never does this kind of type-checking
+ up front is that the loose assignment logic is spread out among the individual
+ parsers; the top-level parse can determine what the expected attribute is,
+ but not whether a passed attribute of another type is a suitable stand-in.
+
variant attribute out-parameters
+
+ The use of a variant in an out-param is compatible if the default attribute
+ can be assigned to the variant.
+ No other work is done to make the assignment compatible. For instance, this
+ will work as you'd expect:
+
namespace bp = boost::parser; +std::variant<int, double> v; +auto b = bp::parse("42", bp::int_, v); +assert(b); +assert(v.index() == 0); +assert(std::get<0>(v) == 42); ++
+ Again, this works because v = 42 is well-formed.
+ However, other kinds of substitutions will not work. In particular, the
+ boost::parser::tuple
+ to aggregate or aggregate to boost::parser::tuple transformations will
+ not work. Here's an example.
+
struct key_value +{ + int key; + double value; +}; + +namespace bp = boost::parser; +std::variant<key_value, double> kv_or_d; +key_value kv; +bp::parse("42 13.0", bp::int_ >> bp::double_, kv); // Ok. +bp::parse("42 13.0", bp::int_ >> bp::double_, kv_or_d); // Error: ill-formed! ++
+ In this case, it would be easy for Boost.Parser to look at the alternative + types covered by the variant, and do a conversion. However, there are many + cases in which there is no obviously correct variant alternative type, or + in which the user might expect one variant alternative type and get another. + Consider a couple of cases. +
+struct i_d { int i; double d; }; +struct d_i { double d; int i; }; +using v1 = std::variant<i_d, d_i>; + +struct i_s { int i; short s; }; +struct d_d { double d1; double d2; }; +using v2 = std::variant<i_s, d_d>; + +using tup_t = boost::parser::tuple<short, short>; ++
+ If we have a parser that produces a tup_t,
+ and we have a v1 attribute
+ out-param, the correct variant alternative type clearly does not exist —
+ this case is ambiguous, and anyone can see that neither variant alternative
+ is a better match. If we were assigning a tup_t
+ to v2, it's even worse. The
+ same ambiguity exists, but to the user, i_s
+ is clearly "closer" than d_d.
+
+ So, Boost.Parser only does assignment. If some parser P
+ generates a default attribute that is not assignable to a variant alternative
+ that you want to assign it to, you can just create a rule that creates either an
+ exact variant alternative type, or the variant itself, and use P as your rule's parser.
+
+ A call to parse() either considers the entire
+ input to be in a UTF format (UTF-8, UTF-16, or UTF-32), or it considers the
+ entire input to be in some unknown encoding. Here is how it deduces which
+ case the call falls under:
+
char8_t,
+ or if the input is a boost::parser::utf8_view,
+ the input is UTF-8.
+ char,
+ the input is in an unknown encoding.
+ ![]() |
+Tip | +
|---|---|
+ if you want to want to parse in ASCII-only mode, or in some other non-Unicode
+ encoding, use only sequences of |
![]() |
+Tip | +
|---|---|
+ If you want to ensure all input is parsed as Unicode, pass the input range
+ |
![]() |
+Note | +
|---|---|
+ Since passing |
trace_mode parameter to
+ parse()
+
+ Debugging parsers is notoriously difficult once they reach a certain size.
+ To get a verbose trace of your parse, pass boost::parser::trace::on as the final parameter to parse(). It will show you the current
+ parser being matched, the next few characters to be parsed, and any attributes
+ generated. See the Error
+ Handling and Debugging section of the tutorial for details.
+
+ Each call to parse() can optionally have a globals
+ object associated with it. To use a particular globals object with you parser,
+ you call with_globals() to create a new parser with
+ the globals object in it:
+
struct globals_t +{ + int foo; + std::string bar; +}; +auto const parser = /* ... */; +globals_t globals{42, "yay"}; +auto result = boost::parser::parse("str", boost::parser::with_globals(parser, globals)); ++
+ Every semantic action within that call to parse()
+ can access the same globals_t
+ object using _globals(ctx).
+
+ The default error handler is great for most needs, but if you want to change
+ it, you can do so by creating a new parser with a call to with_error_handler():
+
auto const parser = /* ... */; +my_error_handler error_handler; +auto result = boost::parser::parse("str", boost::parser::with_error_handler(parser, error_handler)); ++
![]() |
+Tip | +
|---|---|
+ If your parsing environment does not allow you to report errors to a terminal,
+ you may want to use |
![]() |
+Important | +
|---|---|
+ Globals and the error handler are ignored, if present, on any parser except + the top-level parser. + |
+ Now would be a good time to describe the parse context in some detail. Any + semantic action that you write will need to use state in the parse context, + so you need to know what's available. +
+
+ The parse context is an object that stores the current state of the parse
+ — the current- and end-iterators, the error handler, etc. Data may
+ seem to be "added" to or "removed" from it at different
+ times during the parse. For instance, when a parser p
+ with a semantic action a
+ succeeds, the context adds the attribute that p
+ produces to the parse context, then calls a,
+ passing it the context.
+
+ Though the context object appears to have things added to or removed from + it, it does not. In reality, there is no one context object. Contexts are + formed at various times during the parse, usually when starting a subparser. + Each context is formed by taking the previous context and adding or changing + members as needed to form a new context object. When the function containing + the new context object returns, its context object (if any) is destructed. + This is efficient to do, because the parse context has only about a dozen + data members, and each data member is less than or equal to the size of a + pointer. Copying the entire context when mutating the context is therefore + fast. The context does no memory allocation. +
+![]() |
+Tip | +
|---|---|
+ All these functions that take the parse context as their first parameter
+ will find by found by Argument-Dependent Lookup. You will probably never
+ need to qualify them with |
+ By convention, the names of all Boost.Parser functions that take a parse + context, and are therefore intended for use inside semantic actions, contain + a leading underscore. +
+
+ _pass() returns a reference to a
+ bool indicating the success
+ of failure of the current parse. This can be used to force the current parse
+ to pass or fail:
+
[](auto & ctx) { + // If the attribute fails to meet this predicate, fail the parse. + if (!necessary_condition(_attr(ctx))) + _pass(ctx) = false; +} ++
+ Note that for a semantic action to be executed, its associated parser must
+ already have succeeded. So unless you previously wrote _pass(ctx)
+ = false
+ within your action, _pass(ctx)
+ = true
+ does nothing; it's redundant.
+
+ _begin() and _end()
+ return the beginning and end of the range that you passed to parse(), respectively. _where() returns a subrange indicating the bounds
+ of the input matched by the current parse. _where()
+ can be useful if you just want to parse some text and return a result consisting
+ of where certain elements are located, without producing any other attributes.
+ _where() can also be essential in
+ tracking where things are located, to provide good diagnostics at a later
+ point in the parse. Think mismatched tags in XML; if you parse a close-tag
+ at the end of an element, and it does not match the open-tag, you want to
+ produce an error message that mentions or shows both tags. Stashing _where(ctx).begin()
+ somewhere that is available to the close-tag parser will enable that. See
+ Error
+ Handling and Debugging for an example of this.
+
+ _error_handler() returns a reference to the
+ error handler associated with the parser passed to parse().
+ Using _error_handler(), you can generate errors
+ and warnings from within your semantic actions. See Error
+ Handling and Debugging for concrete examples.
+
+ _attr() returns a reference to the
+ value of the current parser's attribute. It is available only when the current
+ parser's parse is successful. If the parser has no semantic action, no attribute
+ gets added to the parse context. It can be used to read and write the current
+ parser's attribute:
+
[](auto & ctx) { _attr(ctx) = 3; } ++
+ If the current parser has no attribute, a none is returned.
+
+ _val() returns a reference to the
+ value of the attribute of the current rule being used to parse (if any),
+ and is available even before the rule's parse is successful. It can be used
+ to set the current rule's attribute, even from a parser that is a subparser
+ inside the rule. Let's say we're writing a parser with a semantic action
+ that is within a rule. If we want to set the current rule's value to some
+ function of subparser's attribute, we would write this semantic action:
+
[](auto & ctx) { _val(ctx) = some_function(_attr(ctx)); } ++
+ If there is no current rule, or the current rule has no attribute, a none
+ is returned.
+
+ You need to use _val() in cases where the default
+ attribute for a rule's
+ parser is not directly compatible with the attribute type of the rule.
+ In these cases, you'll need to write some code like the example above to
+ compute the rule's
+ attribute from the rule's
+ parser's generated attribute. For more info on rules, see the next page, and
+ More About Rules.
+
+ _globals() returns a reference to a
+ user-supplied object that contains whatever data you want to use during the
+ parse. The "globals" for a parse is an object — typically
+ a struct — that you give to the top-level parser. Then you can use
+ _globals() to access it at any time
+ during the parse. We'll see how globals get associated with the top-level
+ parser in The parse()
+ API later. As an example, say that you have an early part of the parse
+ that needs to record some black-listed values, and that later parts of the
+ parse might need to parse values, failing the parse if they see the black-listed
+ values. In the early part of the parse, you could write something like this.
+
[](auto & ctx) { + // black_list is a std::unordered_set. + _globals(ctx).black_list.insert(_attr(ctx)); +} ++
+ Later in the parse, you could then use black_list
+ to check values as they are parsed.
+
[](auto & ctx) { + if (_globals(ctx).black_list.contains(_attr(ctx))) + _pass(ctx) = false; +} ++
+ _locals() returns a reference to one
+ or more values that are local to the current rule being parsed, if any. If
+ there are two or more local values, _locals()
+ returns a reference to a boost::parser::tuple. Rules with locals are
+ something we haven't gotten to yet (see More
+ About Rules), but for now all you need to know is that you can provide
+ a template parameter (LocalState)
+ to rule,
+ and the rule will default construct an object of that type for use within
+ the rule. You access it via _locals():
+
[](auto & ctx) { + auto & local = _locals(ctx); + // Use local here. If 'local' is a hana::tuple, access its members like this: + using namespace hana::literals; + auto & first_element = local[0_c]; + auto & second_element = local[1_c]; +} ++
+ If there is no current rule, or the current rule has no locals, a none
+ is returned.
+
+ _params(), like _locals(),
+ applies to the current rule being used to parse, if any (see More
+ About Rules). It also returns a reference to a single value, if the
+ current rule has only one parameter, or a boost::parser::tuple of multiple values if
+ the current rule has multiple parameters. If there is no current rule, or
+ the current rule has no parameters, a none is returned.
+
+ Unlike with _locals(), you do
+ not provide a template parameter to rule. Instead you call the
+ rule's
+ with()
+ member function (again, see More
+ About Rules).
+
![]() |
+Note | +
|---|---|
+ |
_no_case()
+
+ _no_case() returns true
+ if the current parse context is inside one or more (possibly nested) no_case[] directives. I don't have a
+ use case for this, but if I didn't expose it, it would be the only thing
+ in the context that you could not examine from inside a semantic action.
+ It was easy to add, so I did.
+
+ Boost.Parser comes with all the parsers most parsing tasks will ever need.
+ Each one is a constexpr object,
+ or a constexpr function. Some
+ of the non-functions are also callable, such as char_, which may be used directly,
+ or with arguments, as in char_('a', 'z'). Any parser that can be called, whether
+ a function or callable object, will be called a callable parser
+ from now on. Note that there are no nullary callable parsers; they each take
+ one or more arguments.
+
+ Each callable parser takes one or more parse arguments. + A parse argument may be a value or an invocable object that accepts a reference + to the parse context. The reference parameter may be mutable or constant. + For example: +
+struct get_attribute +{ + template<typename Context> + auto operator()(Context & ctx) + { + return _attr(ctx); + } +}; ++
+ This can also be a lambda. For example: +
+[](auto const & ctx) { return _attr(ctx); } ++
+ The operation that produces a value from a parse argument, which may be a
+ value or a callable taking a parse context argument, is referred to as resolving
+ the parse argument. If a parse argument arg
+ can be called with the current context, then the resolved value of arg is arg(ctx);
+ otherwise, the resolved value is just arg.
+
+ Some callable parsers take a parse predicate. A parse
+ predicate is not quite the same as a parse argument, because it must be a
+ callable object, and cannot be a value. A parse predicate's return type must
+ be contextually convertible to bool.
+ For example:
+
struct equals_three +{ + template<typename Context> + bool operator()(Context const & ctx) + { + return _attr(ctx) == 3; + } +}; ++
+ This may of course be a lambda: +
+[](auto & ctx) { return _attr(ctx) == 3; } ++
+ The notional macro RESOLVE() expands to the result of resolving a parse
+ argument or parse predicate. You'll see it used in the rest of the documentation.
+
+ An example of how parse arguments are used: +
+namespace bp = boost::parser; +// This parser matches one code point that is at least 'a', and at most +// the value of last_char, which comes from the globals. +auto last_char = [](auto & ctx) { return _globals(ctx).last_char; } +auto subparser = bp::char_('a', last_char); ++
+ Don't worry for now about what the globals are for now; the take-away is + that you can make any argument you pass to a parser depend on the current + state of the parse, by using the parse context: +
+namespace bp = boost::parser; +// This parser parses two code points. For the parse to succeed, the +// second one must be >= 'a' and <= the first one. +auto set_last_char = [](auto & ctx) { _globals(ctx).last_char = _attr(x); }; +auto parser = bp::char_[set_last_char] >> subparser; ++
+ Each callable parser returns a new parser, parameterized using the arguments + given in the invocation. +
+
+ This table lists all the Boost.Parser parsers. For the callable parsers,
+ a separate entry exists for each possible arity of arguments. For a parser
+ p, if there is no entry for
+ p without arguments, p is a function, and cannot itself be used
+ as a parser; it must be called. In the table below:
+
char");
+ RESOLVE()
+ is a notional macro that expands to the resolution of parse argument
+ or evaluation of a parse predicate (see The
+ Parsers And Their Uses);
+ RESOLVE(pred) == true"
+ is a shorthand notation for "RESOLVE(pred) is contextually convertible to bool and true";
+ likewise for false;
+ c is a character of type
+ char, char8_t,
+ or char32_t;
+ str is a string literal
+ of type char const[], char8_t
+ const [],
+ or char32_t const
+ [];
+ pred is a parse predicate;
+ arg0, arg1,
+ arg2, ... are parse arguments;
+ a is a semantic action;
+ r is an object whose
+ type models parsable_range;
+ p, p1,
+ p2, ... are parsers;
+ and
+ escapes is a symbols<T> object, where T is char
+ or char32_t.
+ ![]() |
+Note | +
|---|---|
|
+
+ The definition of + +template<typename T> +concept parsable_range = (std::ranges::forward_range<T> && + code_unit<std::ranges::range_value_t<T>>) || + detail::is_tokens_view_v<T>; ++ + + |
![]() |
+Note | +
|---|---|
+ Some of the parsers in this table consume no input. All parsers consume + the input they match unless otherwise stated in the table below. + |
Table 1.6. Parsers and Their Semantics
+|
+ + Parser + + |
+
+ + Semantics + + |
+
+ + Attribute Type + + |
+
+ + Notes + + |
+
|---|---|---|---|
|
+
+ |
+
+ + Matches epsilon, the empty string. Always + matches, and consumes no input. + + |
+
+ + None. + + |
+
+
+ Matching |
+
|
+
+ |
+
+
+ Fails to match the input if |
+
+ + None. + + |
++ | +
|
+
+ |
+
+ + Matches a single whitespace code point (see note), according to + the Unicode White_Space property. + + |
+
+ + None. + + |
+
+
+ For more info, see the Unicode
+ properties. |
+
|
+
+ |
+
+ + Matches a single newline (see note), following the "hard" + line breaks in the Unicode line breaking algorithm. + + |
+
+ + None. + + |
+
+
+ For more info, see the Unicode
+ Line Breaking Algorithm. |
+
|
+
+ |
+
+ + Matches only at the end of input, and consumes no input. + + |
+
+ + None. + + |
++ | +
|
+
+ |
+
+
+ Always matches, and consumes no input. Generates the attribute
+ |
+
+
+ |
+
+
+ An important use case for |
+
|
+
+ |
+
+ + Matches any single code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+
+ Matches exactly the code point |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+
+ Matches the next code point |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+
+ Matches the next code point |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ |
+
|
+
+ |
+
+ + Matches a single code point. + + |
+
+
+ |
+
+
+ Similar to |
+
|
+
+ |
+
+ + Matches a single code point. + + |
+
+
+ |
+
+
+ Similar to |
+
|
+
+ |
++ + | +
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single control-character code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single decimal digit code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single punctuation code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single hexidecimal digit code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single lower-case code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single upper-case code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+
+ Matches exactly the given code point |
+
+ + None. + + |
+
+
+ |
+
|
+
+ |
+
+
+ Matches exactly the given code point |
+
+ + None. + + |
+
+
+ This is a UDL
+ that represents |
+
|
+
+ |
+
+
+ Matches exactly the given string |
+
+ + None. + + |
+
+
+ |
+
|
+
+ |
+
+
+ Matches exactly the given string |
+
+ + None. + + |
+
+
+ This is a UDL
+ that represents |
+
|
+
+ |
+
+
+ Matches exactly |
+
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ Matches exactly |
+
+
+ |
+
+
+ This is a UDL
+ that represents |
+
|
+
+ |
+
+
+ Matches |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches a binary unsigned integral value. + + |
+
+
+ |
+
+
+ For example, |
+
|
+
+ |
+
+
+ Matches exactly the binary unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches an octal unsigned integral value. + + |
+
+
+ |
+
+
+ For example, |
+
|
+
+ |
+
+
+ Matches exactly the octal unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches a hexadecimal unsigned integral value. + + |
+
+
+ |
+
+
+ For example, |
+
|
+
+ |
+
+
+ Matches exactly the hexadecimal unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches an unsigned integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches an unsigned integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches an unsigned integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches an unsigned integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the unsigned integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches a signed integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the signed integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches a signed integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the signed integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches a signed integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the signed integral value |
+
+
+ |
++ | +
|
+
+ |
+
+ + Matches a signed integral value. + + |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches exactly the signed integral value |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches a floating-point number. |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches a floating-point number. |
+
+
+ |
++ | +
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ The special value |
+
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ The special value |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
+
+
+ It is an error to write |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
+
+
+ It is an error to write |
+
|
+
+ |
+
+
+ |
+
+
+ |
+
+
+ Unlike the other entries in this table, |
+
|
+
+ |
+
+
+ Matches |
+
+
+ |
+
+
+ The result does not include the quotes. A quote within the string
+ can be written by escaping it with a backslash. A backslash within
+ the string can be written by writing two consecutive backslashes.
+ Any other use of a backslash will fail the parse. Skipping is disabled
+ while parsing the entire string, as if using |
+
|
+
+ |
+
+
+ Matches |
+
+
+ |
+
+
+ The result does not include the |
+
|
+
+ |
+
+
+ Matches some character |
+
+
+ |
+
+
+ The result does not include the |
+
|
+
+ |
+
+
+ Matches |
+
+
+ |
+
+
+ The result does not include the |
+
|
+
+ |
+
+
+ Matches some character |
+
+
+ |
+
+
+ The result does not include the |
+
![]() |
+Important | +
|---|---|
+ All the character parsers, like |
![]() |
+Note | +
|---|---|
+ A slightly more complete description of the attributes generated by these + parsers is in a subsequent section. The attributes are repeated here so + you can use see all the properties of the parsers in one place. + |
+ If you have an integral type IntType
+ that is not covered by any of the Boost.Parser parsers, you can use a more
+ verbose declaration to declare a parser for IntType.
+ If IntType were unsigned,
+ you would use uint_parser.
+ If it were signed, you would use int_parser.
+ For example:
+
constexpr parser_interface<int_parser<IntType>> hex_int; ++
+ uint_parser and int_parser accept three more non-type template
+ parameters after the type parameter. They are Radix,
+ MinDigits, and MaxDigits. Radix
+ defaults to 10, MinDigits to 1,
+ and MaxDigits to -1, which is
+ a sentinel value meaning that there is no max number of digits.
+
+ So, if you wanted to parse exactly eight hexadecimal digits in a row in order
+ to recognize Unicode character literals like C++ has (e.g. \Udeadbeef),
+ you could use this parser for the digits at the end:
+
constexpr parser_interface<uint_parser<unsigned int, 16, 8, 8>> hex_int; ++
+ Boost.Parser has optional support for lexing before parsing. The optional
+ support is based on an external dependency, CTRE.
+ CTRE
+ produces a sequence of tokens by matching a set of regexes that you provide.
+ Each regex is used to match against the input to produce one token with an
+ ID associated with that regex. When you call parse(),
+ you pass it a lazy range of tokens that adapts the input, and parse() parses the tokens, not the
+ underlying characters. When you backtrack, you just move back to an earlier
+ token, not an earlier place in the underlying sequence of characters.
+
+ Let's look at an example of how to do token parsing. First, you must include + the lexer header before the parser header. +
++
+#include <boost/parser/lexer.hpp> +#include <boost/parser/parser.hpp> ++
+
++ The inclusion of this optional header is what enables token parsing. Character + parsing ("normal" parsing) is unaffected by this header inclusion + — you can always do character parsing. +
+![]() |
+Important | +
|---|---|
+ CTRE
+ is a header-only library, and it can be included as a single header. It
+ requires C++20 or later, Boost.Parser's support for token parsing does
+ as well. Boost.Parser uses the single-header version with Unicode support,
+ |
+ Then, you define a lexer and its tokens. +
++
+auto const foo = bp::token_spec<"foo", 0>; +auto const bar = bp::token_spec<"b.r", 1>; +auto const baz = bp::token_spec<"b.z", 2>; + +auto const lexer = bp::lexer<char, int> | foo | bar | baz; ++
+
+
+ Here, we first see three boost::parser::token_spec_ts. Each one consists
+ of an NTTP
+ regex string literal and an NTTP
+ token ID; the first one matches "foo", and has an
+ ID of 0, etc. boost::parser::lexer_t takes two template
+ parameters. The first parameter indicates that the value type of the parsed
+ input sequence is char. The second one indicates that the ID-type
+ of all subsequent boost::parser::token_spec_ts will be int.
+ We create a full lexer by starting with the lexer<...>
+ expression, followed by a piped-together sequence of boost::parser::token_spec_ts.
+
+ The final lexer lexer has a combined regex string, "(foo)|(b.*r)|(b.+z)".
+ This string is built up at compile time, and is represented by an NTTP.
+ It is the single regex given to CTRE,
+ which CTRE
+ uses to produce a sequence of matches from it.
+
+ lexer and token_spec are variable templates; they
+ make variables from the templates boost::parser::lexer_t and boost::parser::token_spec_t, respectively.
+ The are provided as a notational convenience, just so you don't have to put
+ {} after every lexer and token spec you write. boost::parser::lexer_t and boost::parser::token_spec_t are empty classes.
+ Their configury is stored in NTTPs.
+
+ Next, you create a range of boost::parser::tokens from your input. This
+ range of tokens is what parse()
+ will parse.
+
+
+auto r = "foobazbar" | bp::to_tokens(lexer); ++
+
+
+ The input must model std::ranges::contiguous_range. This is
+ due to the way CTRE
+ works; it produces a sequence of matches that are convertible to std::basic_string_view<CharType>.
+ In our case, since we are lexing a sequence of char, CTRE
+ will produce a sequence of std::basic_string matches. Note that
+ the value type/character type we specified for boost::parser::lexer_t above must match the
+ input sequence's value type/character type, or the program is ill-formed.
+ Also note that because we are lexing a contiguous range of characters, you
+ cannot use any of the boost::parser::as_utf* range adaptors
+ when doing token parsing.
+
+ Next, you define a parser. +
++
+auto parser = foo >> baz >> bar; ++
+
+
+ This has the same semantics as the character parsers you've seen in the rest
+ of the documentation. Each boost::parser::token_spec_t has the same interface
+ as a parser, so it can be used with all the parser combining operations,
+ like operator>>. However, unlike when doing character
+ parsing, when token parsing all the terminal parsers are restricted to a
+ subset of the terminal parsers that are available in character parsing (see
+ the full list in the table below). This is because most of the parsers in
+ Boost.Parser parse sequences of characters. For example, if you used int_(42)
+ above instead of foo, the int_ parser would try to match
+ two consecutive values from the input sequence, and would expect them to
+ equal '4' and '2', respectively. It would instead
+ see two tokens, and the comparisons would not even compile.
+
+ Finally, you can put everything together in a call to parse().
+
+
+auto result = bp::parse(r, parser); +assert(result); +assert(std::get<0>(*result) == "foo"); +assert(std::get<1>(*result) == "baz"); +assert(std::get<2>(*result) == "bar"); ++
+
+
+ As you can see, the parse succeeded, and we got three attributes out of it.
+ Each attribute has the type std::string_view.
+
+ Capture groups are valid regex syntax, but you cannot use them in your boost::parser::token_spec_t
+ regexes. For instance, bp::token_spec<"(foo)+", 0>
+ (to match one or more consecutive "foo"s) will compile
+ and run, and you will get garbage results. Boost.Parser relies on the exact
+ number and order of capture groups to do its token generation. If you want
+ to group a part of your regex, use a non-capture group, like "(?:foo)+".
+
+ Using the parser above, what if we tried to parse the token range "foo
+ baz bar" | bp::to_tokens(lexer) instead? Turns out, we get the
+ same answer. You cannot use am explicit skipper when parsing tokens. However,
+ parsers are much simpler when you have a notion of a skipper, especially
+ for whitespace. So, boost::parser::lexer_t has one built in; it
+ uses "\\s+" by default. Whitespace is matched, but
+ produces no tokens. If you want to change the whitespace/skipper regex, you
+ can provide it when specifying the lexer. For example, here is how you would
+ specify the whitespace/skipped tokens to be any sequence of whitespace characters,
+ or any C++-style trailing comment (// ...).
+
bp::lexer<char, int, "\\s+|\\/\\/.*$"> ++
+ If whitespace information is important in your parse, simply provide ""
+ or the more readable convenience constant bp::no_ws to lexer<>
+ as the whitespace regex, and make a regular token that matches whitespace.
+ That way, you'll see all the whitespace in the sequence of tokens that you
+ parse.
+
+ The parser we looked at in the initial simple example produced three std::string_views,
+ one for each token we parsed. However, we may know that a particular token
+ is meant to match numbers. If this is the case, we can let Boost.Parser know
+ that we expect the token to be interpretable as a particular type of numeric
+ value. I'm using "numeric" for brevity, but this includes bool
+ as well. For example:
+
+
+constexpr auto true_false = bp::token_spec<"true|false", 0, bool>; +constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>; +constexpr auto number = bp::token_spec<"\\d+(?:\\.\\d*)?", 2, double>; ++
+
+
+ The attribute types for these tokens are bool, std::string_view,
+ and double, respectively. identifier has attribute
+ type std::string_view because that is the default if you do
+ not specify a type.
+
+ A boost::parser::token
+ is essentially a variant of std::basic_string_view<CharType>,
+ long long, and long double. The latter two types
+ were selected because they can fit any value of an integral or floating-point
+ type, respectively. Even though boost::parser::token effectively erases the
+ exact type when it is integral or floating-point, the token parser retains
+ the information of what the exact type is. This is why true_false
+ above has an attribute type of bool and not long long.
+
+ CTRE
+ produces a sequence of substrings. Each token produced by Boost.Parser gets
+ its numeric value (if it should have one) by parsing the substring from
+ CTRE
+ with — you guessed it — a Boost.Parser parser. The parser for
+ bool is just bool_; the one for int
+ is int_,
+ etc. The integral-type parsers all support a radix/base. If you specify an
+ integral value type for one of your tokens, you can also specify a base,
+ like bp::token_spec<"\\d+", int, 16> to parse
+ hex-encoded ints.
+
+ Part of the advantage of doing lexing before parsing is that you don't have
+ to reparse everything over and over again. If the subsequence "1.23456789"
+ is found in the input, you only lex it once. After that, it's already in
+ the right form as a floating-point number; backtracking will not provoke
+ reparsing of those ten characters.
+
+ Just about any parser above a certain size will have punctuation of some
+ sort — elements of the input, usually a single character, that delimit
+ other parts of the input, like commas and braces. To make it easier to specify
+ such tokens, Boost.Parser provides boost::parser::token_chars. You can give
+ boost::parser::token_chars
+ a list of individual characters, and it will create a separate, single-character
+ regex for each one, and add it to your lexer. Each such token will have the
+ special ID boost::parser::character_id.
+
+ Note that the single character you provide must be a char in
+ the ASCII range (that is, less than 128). If you want to use
+ a single character that is outside the ASCII range, just make a normal boost::parser::token_spec_t
+ for it. Here is an example using boost::parser::token_chars.
+
+
+constexpr auto true_false = bp::token_spec<"true|false", 0, bool>; +constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>; + +constexpr auto lexer = + bp::lexer<char, int> | true_false | identifier | bp::token_chars<'=', ';'>; + +auto parser = identifier >> '=' >> true_false >> ';'; +auto r = "foo = false;" | bp::to_tokens(lexer); +auto result = bp::parse(r, parser); +assert(result); +assert(std::get<0>(*result) == "foo"); +assert(std::get<1>(*result) == false); ++
+
+
+ Just like in a character parser, we can use character literals to match the
+ single-character tokens ('=' and ';' in the example
+ above). The character literals are turned into char_ parsers. char_
+ parsers that you explicitly write may be used as well. They will only match
+ single-character tokens, though (that is, tokens with the ID boost::parser::character_id).
+
+ Even though char_
+ and string() (and lots of other character
+ parsers — see the table below) are available when doing token parsing,
+ their semantics are subtly different when using for token parsing. This is
+ because token parsing involves parsing chunks of input as tokens, rather
+ than individual characters. This may sound obvious, but the implications
+ are not. Consider this example.
+
+
+constexpr auto true_false = bp::token_spec<"true|false", 0, bool>; +constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>; + +constexpr auto lexer = + bp::lexer<char, int> | true_false | identifier | bp::token_chars<'=', ';'>; + +auto parser = bp::string("=;"); + +// NOTE: Character parsing here. +auto character_parse_result = bp::parse("=;", parser); +assert(character_parse_result); +assert(*character_parse_result == "=;"); + +// NOTE: Token parsing here. +auto token_parse_result = bp::parse("=;" | bp::to_tokens(lexer), parser); +assert(!token_parse_result); ++
+
+
+ Why doesn't the token parsing case work? In the character parsing case,
+ string
+ tries to match characters from the input, one at a time; it sees '='
+ followed by ';', so it matches. In the token parsing case, this
+ does not happen. Instead, the input is broken up into two tokens (one for
+ '=' and one for ';'). string("=;")
+ tries to match the first token in its entirety, but that token is a character
+ token, not a token with a std::basic_string_view attribute.
+ Even if that token did have astd::basic_string_view attribute,
+ it would be "=", not "=;",
+ and so the match would still fail.
+
+ So, even though string matching is available using string(),
+ make sure you understand that string()
+ is looking for 1) a token with a string view attribute, and 2) a full match
+ of the token's string view against the range provided to string().
+
+ char_
+ is also a bit different, since it only matches character tokens that you
+ make with boost::parser::token_chars.
+ Such tokens have the token ID boost::parser::character_id. char_
+ will never match any other kind of token.
+ This goes for all the character parsers (blank, punct, upper, etc).
+
+ The character class parsers (e.g. punct) are also limited in
+ token parsing vs. their use in character parsing. boost::parser::token_chars limits characters
+ to the ASCII range for simplicity, and to discourage parsing of sequences
+ of tokens to find things that are detectable using PCRE
+ directly. In other words, if you need the full set of punctuation characters,
+ use "\p{P}" in one of your token regexes, rather than
+ trying to parse punctuation characters out of the input using punct.
+ Because boost::parser::token_chars
+ limits characters to the ASCII range, all the matching for any character
+ class parser (like punct)
+ above the ASCII range will fail.
+
![]() |
+Important | +
|---|---|
+ Though the string and character parsers are available, they're a bit clunky
+ and should be avoided in most cases. Instead, use the character handling
+ from the PCRE regex language
+ to make the tokens you want. The best use of string and character parsers
+ in your Boost.Parser token parsers is as literals like |
+ One more important difference between token and character parsing is the
+ effect that using lexeme[] and/or skip[]
+ has. If you use lexeme[] or skip[],
+ you are changing the sequence tokens that must be in the token cache. As
+ such, whenever you enter or leave
+ a lexeme[] or
+ skip[] directive, the token cache
+ is flushed. The flushed tokens are everything from the current token position
+ to the end of the cache. If you write bp::lexeme[p] frequently
+ enough in your parsers, you could be in for some very uneven performance.
+
![]() |
+Important | +
|---|---|
+ Though you may be used to using |
+ So far, we've only seen examples of parsing for a particular token. Sometimes
+ we want to match only occurrences of a given token with a particular value,
+ just like when we write something like char_('a', 'z') in
+ a character parser.
+
+ Just as with char_
+ and most other Boost.Parser parsers, you can just add the value to match
+ in parens after the token, like true_false(true) or identifier("exact
+ string").
+
+ So far, we've only seen int used as the token ID type. Any integral
+ type or enum can be used, though. There are limitations on the values you
+ can provide for IDs. First, the values must all be nonnegative; negative
+ values are reserved for use by Boost.Parser. Second, the values must not
+ exceed 2^23-1; no one is likely to have very many unique IDs,
+ and token storage can be reduced a bit by using 3 bytes for the ID instead
+ of 4.
+
+ Using an enum has the advantage of making the code a lot clearer. For instance: +
+enum class token_names { foo, bar };
+auto const foo = bp::token_spec<"foo", token_names::foo>;
+auto const bar = bp::token_spec<"b.r", token_names::bar>;
+
+
+ ... reads a lot better than just using IDs like 0 and 1.
+
+ There is another important advantage related to diagnostic messages. Consider + this parse. +
+constexpr auto lexer = bp::lexer<char, token_names> | foo;
+bp::parse("bar" | bp::to_tokens(lexer), bp::eps > foo);
+
++ Here is what the diagnostic looks like. +
+1:0: error: Expected tok<0> here: +bar +^ ++
+ If we added a specific string value we expect, that would be included. +
+bp::parse("bar" | bp::to_tokens(lexer), bp::eps > foo("foo"));
+
+1:0: error: Expected tok<0>("foo") here:
+bar
+^
+
+
+ Instead of "tok<N>", it might be nice to give
+ the failed expectation a user-friendly name. In character parsers we usually
+ do this by giving rules
+ user-facing diagnostic text. This makes your parse failures much easier to
+ understand and correct. However, many boost::parser::token_spec_ts may already have
+ a nice name, so why not use it? If you use enumerators for you token IDs,
+ and make their enumeration streamable, Boost.Parser will detect this, and
+ use the streamed enumerator instead of "tok<N>".
+ Here is what we could have written instead.
+
enum class printable_tokens { foo, bar };
+std::ostream & operator<<(std::ostream & os, printable_tokens tok)
+{
+ switch (tok) {
+ case printable_tokens::foo: os << "foo"; break;
+ case printable_tokens::bar: os << "bar"; break;
+ }
+ return os;
+}
+
+auto const foo = bp::token_spec<"foo", printable_tokens::foo>;
+auto const bar = bp::token_spec<"b.*r", printable_tokens::bar>;
+
+constexpr auto lexer = bp::lexer<char, printable_tokens> | foo;
+bp::parse("bar" | bp::to_tokens(lexer), bp::eps > foo);
+
++ That results in the enumerator being printed instead. +
+1:0: error: Expected foo here: +bar +^ ++
![]() |
+Important | +
|---|---|
+ If you provide a streamable enumeration as the token ID type, this enables
+ the alternate printing behavior described above. If you specify a particular
+ value for the token parser, that value is printed as the expected value.
+ So the diagnostic name for |
+ The takeaway here is that you should use a streamable enumeration for your + ID type. It makes your code easier to read, and produces better diagnostics. +
+
+ Given that I told you earlier that we will make a sequence of tokens and
+ backtrack within those tokens, you may be wondering where the tokens are
+ stored. The boost::parser::tokens_view
+ (the type created by the range adaptor boost::parser::to_tokens) uses internal storage
+ or user-provided external storage to store the tokens as they are generated.
+ Here is an example of using external storage.
+
+
+constexpr auto true_false = bp::token_spec<"true|false", 0, bool>; +constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>; + +constexpr auto lexer = + bp::lexer<char, int> | true_false | identifier | bp::token_chars<'=', ';'>; + +auto parser = identifier >> '=' >> true_false >> ';'; +std::vector<bp::token<char>> cache; +auto r = "foo = false;" | bp::to_tokens(lexer, std::ref(cache)); +auto result = bp::parse(r, parser); +assert(result); +assert(std::get<0>(*result) == "foo"); +assert(std::get<1>(*result) == false); +assert(cache.size() == 4u); ++
+
+
+ The cache could have been a boost::container::small_vector<bp::token,
+ N>, or even a static_vector of appropriate size, to
+ reduce or eliminate memory allocations.
+
+ Note the size of the cache after the parse; it still contains some tokens. + This is a special case of a more general phenomenon: the token cache grows + without bound when there are no expectation points. This is because, without + expectation points, backtracking is unbounded (refer to the Expectation + points section to see why). If you can go back arbitrarily far in + order to backtrack, you need to be sure that there will be a token at the + place you backtrack to. +
++ However, if you use expectation points, the cache is trimmed. The prefix + of tokens before the expectation point is erased from the token cache. +
++
+auto parser = identifier >> '=' > true_false >> ';'; +std::vector<bp::token<char>> cache; +auto r = "foo = false;" | bp::to_tokens(lexer, std::ref(cache)); +auto result = bp::parse(r, parser); +assert(result); +assert(std::get<0>(*result) == "foo"); +assert(std::get<1>(*result) == false); +assert(cache.size() == 2u); ++
+
+
+ Note the use of std::ref() to pass a reference to cache.
+ This is necessary because boost::parser::to_tokens uses std::bind_back()
+ (or a workalike in C++17 mode). As with the other binders in std,
+ it does not gracefully propagate bare lvalue references, so you have to use
+ std::ref().
+
+ Parse failures that fail the top-level parse happen only at expectation points.
+ Lexing failures that fail the top-level parse can happen at any point in
+ the input. If there is no token regex that matches the current point of the
+ input, we cannot continue to lex. Lexing failures are usually caused by bad
+ input, or failure to specify the correct set of boost::parser::token_spec_ts to cover all
+ valid input. However, it may also be that you have written an impossible
+ boost::parser::token_spec_t.
+ Consider this one.
+
constexpr auto bad_token = bp::token_spec<"foo", 0, int>; ++
+ This boost::parser::token_spec_t
+ can never generate a valid token. It will match "foo"
+ in the input, but then it will try to parse "foo"
+ as an int, which is guaranteed to fail.
+
+ The takeaway here is that a lexing failure might be due to bad input, but
+ it can also be the sign of a bug in one or more of your boost::parser::token_spec_ts.
+
+ Many of the parsers that work in character parsing do not work in token parsing, + because they try to parse individual characters from the input. Token parsing + only provides tokens, not characters. This table describes all the parsers + compatible with token parsing. +
+
+ This table lists all the Boost.Parser parsers usable during token parsing.
+ For the callable parsers, a separate entry exists for each possible arity
+ of arguments. For a parser p, if there is no entry for p
+ without arguments, p is a function, and cannot itself be used
+ as a parser; it must be called. In the table below:
+
char");
+ RESOLVE()
+ is a notional macro that expands to the resolution of parse argument
+ or evaluation of a parse predicate (see The
+ Parsers And Their Uses);
+ RESOLVE(pred) ==
+ true" is a shorthand notation for "RESOLVE(pred)
+ is contextually convertible to bool and true";
+ likewise for false;
+ c is a character of some character type;
+ str is a string literal of type CharType const[],
+ for some character type Char\Type;
+ pred is a parse predicate;
+ arg0, arg1, arg2, ... are parse
+ arguments;
+ a is a semantic action;
+ r is an object whose type models parsable_range;
+ tok is a token parser created using boost::parser::token_spec_t; and
+ p, p1, p2, ... are parsers.
+ ![]() |
+Note | +
|---|---|
|
+
+ The definition of + +template<typename T> +concept parsable_range = (std::ranges::forward_range<T> && + code_unit<std::ranges::range_value_t<T>>) || + detail::is_tokens_view_v<T>; ++ + + |
![]() |
+Note | +
|---|---|
+ Some of the parsers in this table consume no input. All parsers consume + the input they match unless otherwise stated in the table below. + |
Table 1.11. Token Parsers and Their Semantics
+|
+ + Parser + + |
+
+ + Semantics + + |
+
+ + Attribute Type + + |
+
+ + Notes + + |
+
|---|---|---|---|
|
+
+ |
+
+
+ Matches any token with the same ID as |
+
+
+ The attribute type given when specifying |
++ | +
|
+
+ |
+
+
+ Matches exactly the value |
+
+
+ The attribute type given when specifying |
+
+
+ This case applies only when |
+
|
+
+ |
+
+
+ Matches exactly the value |
+
+
+ The attribute type given when specifying |
+
+ + This overload does not take parse + arguments. + + |
+
|
+
+ |
+
+ + Matches epsilon, the empty string. Always + matches, and consumes no input. + + |
+
+ + None. + + |
+
+
+ Matching |
+
|
+
+ |
+
+
+ Fails to match the input if |
+
+ + None. + + |
++ | +
|
+
+ |
+
+ + Matches a single whitespace code point (see note), according to + the Unicode White_Space property. + + |
+
+ + None. + + |
+
+
+ For more info, see the Unicode
+ properties. |
+
|
+
+ |
+
+ + Matches a single newline (see note), following the "hard" + line breaks in the Unicode line breaking algorithm. + + |
+
+ + None. + + |
+
+
+ For more info, see the Unicode
+ Line Breaking Algorithm. |
+
|
+
+ |
+
+ + Matches only at the end of input, and consumes no input. + + |
+
+ + None. + + |
++ | +
|
+
+ |
+
+
+ Always matches, and consumes no input. Generates the attribute
+ |
+
+
+ |
+
+
+ An important use case for |
+
|
+
+ |
+
+ + Matches any single code point. + + |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ Only matches tokens with the ID |
+
|
+
+ |
+
+
+ Matches exactly the code point |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ Only matches tokens with the ID |
+
|
+
+ |
+
+
+ Matches the next code point |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ Only matches tokens with the ID |
+
|
+
+ |
+
+
+ Matches the next code point |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ |
+
|
+
+ |
+
+ + Matches a single code point. + + |
+
+
+ |
+
+
+ Similar to |
+
|
+
+ |
+
+ + Matches a single code point. + + |
+
+
+ |
+
+
+ Similar to |
+
|
+
+ |
++ + | +
+
+ The code point type in Unicode parsing, or |
+
+
+ Only matches tokens with the ID |
+
|
+
+ |
+
+ + Matches a single control-character code point. + + |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ Only matches tokens with the ID |
+
|
+
+ |
+
+ + Matches a single decimal digit code point. + + |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ Only matches tokens with the ID |
+
|
+
+ |
+
+ + Matches a single punctuation code point. + + |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ Only matches tokens with the ID |
+
|
+
+ |
+
+ + Matches a single hexidecimal digit code point. + + |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ Only matches tokens with the ID |
+
|
+
+ |
+
+ + Matches a single lower-case code point. + + |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ Only matches tokens with the ID |
+
|
+
+ |
+
+ + Matches a single upper-case code point. + + |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ Only matches tokens with the ID |
+
|
+
+ |
+
+
+ Matches exactly the given code point |
+
+ + None. + + |
+
+
+ |
+
|
+
+ |
+
+
+ Matches exactly the given code point |
+
+ + None. + + |
+
+
+ This is a UDL
+ that represents |
+
|
+
+ |
+
+
+ Matches exactly the given string |
+
+ + None. + + |
+
+
+ |
+
|
+
+ |
+
+
+ Matches exactly the given string |
+
+ + None. + + |
+
+
+ This is a UDL
+ that represents |
+
|
+
+ |
+
+
+ Matches exactly |
+
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ Matches exactly |
+
+
+ |
+
+
+ This is a UDL
+ that represents |
+
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ The special value |
+
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ The special value |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
+
+
+ It is an error to write |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
+
+
+ It is an error to write |
+
|
+
+ |
+
+
+ |
+
+
+ |
+
+
+ Unlike the other entries in this table, |
+
+ One directive that works in character parsing does not work in token parsing
+ — the argument form of skip[].
+ The argument to skip[] is a new skipper, and this
+ cannot be changed in the middle of tokenization. The set of tokens and their
+ regexes are fixed at compile time. The nullary form of skip[]
+ works fine; all it does is re-enable skipping that has been turned off by
+ lexeme[].
+
+ Not all the parse() and callback_parse()
+ overloads can do token parsing. In particular, the overloads that take a
+ skipper are precluded, since the skipper must be built into the lexer itself
+ (see the section above about whitespace handling for details).
+
+ There are a few details you might want to know about how CTRE + works. +
++ CTRE + uses PCRE as its regex grammar. +
+
+ "Maximum munch" appears not to be the way CTRE
+ tokenizes input. For instance, if you have boost::parser::token_spec_t A that matches
+ "<==" and boost::parser::token_spec_t B that matches
+ "<|>|<=|>=|==|!=", the input characters
+ "<==" will be tokenized as "<=="
+ if the lexer includes A | B, but will be parsed as "<"
+ followed by "==" if the lexer includes B | A.
+
+ CTRE
+ uses char32_t for all its compile time strings. If you give
+ it a regex string literal like bp::token_spec<"foo", 0>
+ (that is, an array of char), it will be interpreted in one of
+ two ways. By default, the chars are copied into an array of
+ char32_t, unmodified. This is fine if you provide an ASCII regex,
+ or a regex in a non-Unicode encoding. However, if you define CTRE_STRING_IS_UTF8
+ before including <boost/parser/lexer.hpp>, the array of
+ char will be interpreted as UTF-8, and will be transcoded to
+ UTF-32 before being stored in the array of char32_t. All the
+ charN_t character types will be interpreted as UTF-N encoded,
+ and will be transcoded to UTF-32 if needed. wchar_t is taken
+ to mean UTF-32 even on Windows. Again, all
+ of this transcoding happens at compile time.
+
+ Error handling during token parsing mostly Just Works. That is, you don't + need to know or do anything special just because you are parsing tokens. +
+
+ However, the error reporting functions all operate at the level of character
+ input, not tokens. The higher level functions provided in error_handling_fwd.hpp
+ and error_handling.hpp
+ (like write_formatted_message()) simply get the iterators to
+ the underlying range of input before doing their work. The lower-level functions
+ provided in error_handling_fwd.hpp
+ and error_handling.hpp
+ (like find_line_position()) do not. Each function's API documentation
+ specifies whether or not it does this "normalization" to underlying
+ iterators. If you use the lower-level API directly in your code, you can
+ call one of the overloads of normalize_iterators() to get the
+ underlying iterators in the token parsing case.
+
+ Boost.Parser was designed from the start to be Unicode friendly. There are
+ numerous references to the "Unicode code path" and the "non-Unicode
+ code path" in the Boost.Parser documentation. Though there are in fact
+ two code paths for Unicode and non-Unicode parsing, the code is not very
+ different in the two code paths, as they are written generically. The only
+ difference is that the Unicode code path parses the input as a range of code
+ points, and the non-Unicode path does not. In effect, this means that, in
+ the Unicode code path, when you call parse(r, p) for some input range r
+ and some parser p, the parse
+ happens as if you called parse(r | boost::parser::as_utf32, p)
+ instead. (Of course, it does not matter if r
+ is a proper range, or an iterator/sentinel pair; those both work fine with
+ boost::parser::as_utf32.)
+
+ Matching "characters" within Boost.Parser's parsers is assumed
+ to be a code point match. In the Unicode path there is a code point from
+ the input that is matched to each char_ parser. In the non-Unicode
+ path, the encoding is unknown, and so each element of the input is considered
+ to be a whole "character" in the input encoding, analogous to a
+ code point. From this point on, I will therefore refer to a single element
+ of the input exclusively as a code point.
+
+ So, let's say we write this parser: +
+constexpr auto char8_parser = boost::parser::char_('\xcc'); ++
+ For any char_
+ parser that should match a value or values, the type of the value to match
+ is retained. So char8_parser
+ contains a char that it will
+ use for matching. If we had written:
+
constexpr auto char32_parser = boost::parser::char_(U'\xcc'); ++
+ char32_parser would instead
+ contain a char32_t that it would
+ use for matching.
+
+ So, at any point during the parse, if char8_parser
+ were being used to match a code point next_cp
+ from the input, we would see the moral equivalent of next_cp
+ == '\xcc',
+ and if char32_parser were
+ being used to match next_cp,
+ we'd see the equivalent of next_cp
+ == U'\xcc'. The take-away here is that you can write
+ char_
+ parsers that match specific values, without worrying if the input is Unicode
+ or not because, under the covers, what takes place is a simple comparison
+ of two integral values.
+
![]() |
+Note | +
|---|---|
+ Boost.Parser actually promotes any two values to a common type using |
+ Since matches are always done at a code point level (remember, a "code
+ point" in the non-Unicode path is assumed to be a single char), you get different results trying to
+ match UTF-8 input in the Unicode and non-Unicode code paths:
+
namespace bp = boost::parser; + +{ + std::string str = (char const *)u8"\xcc\x80"; // encodes the code point U+0300 + auto first = str.begin(); + + // Since we've done nothing to indicate that we want to do Unicode + // parsing, and we've passed a range of char to parse(), this will do + // non-Unicode parsing. + std::string chars; + assert(bp::parse(first, str.end(), *bp::char_('\xcc'), chars)); + + // Finds one match of the *char* 0xcc, because the value in the parser + // (0xcc) was matched against the two code points in the input (0xcc and + // 0x80), and the first one was a match. + assert(chars == "\xcc"); +} +{ + std::u8string str = u8"\xcc\x80"; // encodes the code point U+0300 + auto first = str.begin(); + + // Since the input is a range of char8_t, this will do Unicode + // parsing. The same thing would have happened if we passed + // str | boost::parser::as_utf32 or even str | boost::parser::as_utf8. + std::string chars; + assert(bp::parse(first, str.end(), *bp::char_('\xcc'), chars)); + + // Finds zero matches of the *code point* 0xcc, because the value in + // the parser (0xcc) was matched against the single code point in the + // input, 0x0300. + assert(chars == ""); +} ++
+ Additionally, it is expected that most programs will use UTF-8 for the encoding
+ of Unicode strings. Boost.Parser is written with this typical case in mind.
+ This means that if you are parsing 32-bit code points (as you always are
+ in the Unicode path), and you want to catch the result in a container C of char
+ or char8_t values, Boost.Parser
+ will silently transcode from UTF-32 to UTF-8 and write the attribute into
+ C. This means that std::string,
+ std::u8string, etc. are fine to use as attribute
+ out-parameters for *char_, and the result
+ will be UTF-8.
+
![]() |
+Note | +
|---|---|
+ UTF-16 strings as attributes are not supported directly. If you want to
+ use UTF-16 strings as attributes, you may need to do so by transcoding
+ a UTF-8 or UTF-32 attribute to UTF-16 within a semantic action. You can
+ do this by using |
+ The treatment of strings as UTF-8 is nearly ubiquitous within Boost.Parser.
+ For instance, though the entire interface of symbols uses std::string
+ or std::string_view, UTF-32 comparisons are used
+ internally.
+
+ I mentioned above that the use of boost::parser::utf*_view as the range to parse opts you in
+ to Unicode parsing. Here's a bit more about these views and how best to use
+ them.
+
+ If you want to do Unicode parsing, you're always going to be comparing code + points at each step of the parse. As such, you're going to implicitly convert + any parse input to UTF-32, if needed. This is what all the parse API functions + do internally. +
+
+ However, there are times when you have parse input that is a sequence of
+ UTF-8-encoded chars, and you
+ want to do Unicode-aware parsing. As mentioned previously, Boost.Parser has
+ a special case for char inputs,
+ and it will not assume that char sequences are UTF-8. If you want to tell
+ the parse API to do Unicode processing on them anyway, you can use the as_utf32 range adapter. (Note that you
+ can use any of the as_utf* adaptors and the semantics will not differ
+ from the semantics below.)
+
namespace bp = boost::parser; + +auto const p = '"' >> *(bp::char_ - '"' - 0xb6) >> '"'; +char const * str = "\"two wörds\""; // ö is two code units, 0xc3 0xb6 + +auto result_1 = bp::parse(str, p); // Treat each char as a code point (typically ASCII). +assert(!result_1); +auto result_2 = bp::parse(str | bp::as_utf32, p); // Unicode-aware parsing on code points. +assert(result_2); ++
+ The first call to parse()
+ treats each char as a code point,
+ and since "ö" is the
+ pair of code units 0xc3 0xb6, the parse matches the second code unit
+ against the - 0xb6
+ part of the parser above, causing the parse to fail. This happens because
+ each code unit/char in str is treated as an independent code point.
+
+ The second call to parse()
+ succeeds because, when the parse gets to the code point for 'ö', it is 0xf6
+ (U+00F6), which does not match the -
+ 0xb6 part of the parser.
+
+ The other adaptors as_utf8
+ and as_utf16 are also provided
+ for completeness, if you want to use them. They each can transcode any sequence
+ of character types.
+
![]() |
+Important | +
|---|---|
+ The |
+ One thing that Boost.Parser does not handle for you is normalization; Boost.Parser + is completely normalization-agnostic. Since all the parsers do their matching + using equality comparisons of code points, you should make sure that your + parsed range and your parsers all use the same normalization form. +
+
+ Boost.Parser has optional support for lexing before parsing. The optional
+ support is based on an external dependency, CTRE.
+ CTRE
+ produces a sequence of tokens by matching a set of regexes that you provide.
+ Each regex is used to match against the input to produce one token with an
+ ID associated with that regex. When you call parse(),
+ you pass it a lazy range of tokens that adapts the input, and parse() parses the tokens, not the
+ underlying characters. When you backtrack, you just move back to an earlier
+ token, not an earlier place in the underlying sequence of characters.
+
+ Let's look at an example of how to do token parsing. First, you must include + the lexer header before the parser header. +
++
+#include <boost/parser/lexer.hpp> +#include <boost/parser/parser.hpp> ++
+
++ The inclusion of this optional header is what enables token parsing. Character + parsing ("normal" parsing) is unaffected by this header inclusion + — you can always do character parsing. +
+![]() |
+Important | +
|---|---|
+ CTRE
+ is a header-only library, and it can be included as a single header. It
+ requires C++20 or later, Boost.Parser's support for token parsing does
+ as well. Boost.Parser uses the single-header version with Unicode support,
+ |
+ Then, you define a lexer and its tokens. +
++
+auto const foo = bp::token_spec<"foo", 0>; +auto const bar = bp::token_spec<"b.r", 1>; +auto const baz = bp::token_spec<"b.z", 2>; + +auto const lexer = bp::lexer<char, int> | foo | bar | baz; ++
+
+
+ Here, we first see three boost::parser::token_spec_ts. Each one consists
+ of an NTTP
+ regex string literal and an NTTP
+ token ID; the first one matches "foo", and has an
+ ID of 0, etc. boost::parser::lexer_t takes two template
+ parameters. The first parameter indicates that the value type of the parsed
+ input sequence is char. The second one indicates that the ID-type
+ of all subsequent boost::parser::token_spec_ts will be int.
+ We create a full lexer by starting with the lexer<...>
+ expression, follwed by a piped-together sequence of boost::parser::token_spec_ts.
+
+ The final lexer lexer has a combined regex string, "(foo)|(b.*r)|(b.+z)".
+ This string is built up at compile time, and is represented by an NTTP.
+ It is the single regex given to CTRE,
+ which CTRE
+ uses to produce a sequence of matches from it.
+
+ lexer and token_spec are variable templates; they
+ make variables from the templates boost::parser::lexer_t and boost::parser::token_spec_t, respectively.
+ The are provided as a notational convenience, just so you don't have to put
+ {} after every lexer and token spec you write. boost::parser::lexer_t and boost::parser::token_spec_t are empty classes.
+ Their configury is stored in NTTPs.
+
+ Next, you create a range of boost::parser::tokens from your input. This
+ range of tokens is what parse()
+ will parse.
+
+
+auto r = "foobazbar" | bp::to_tokens(lexer); ++
+
+
+ The input must model std::ranges::contiguous_range. This is
+ due to the way CTRE
+ works; it produces a sequence of matches that are convertible to std::basic_string_view<CharType>.
+ In our case, since we are lexing a sequence of char, CTRE
+ will produce a sequence of std::basic_string matches. Note that
+ the value type/character type we specified for boost::parser::lexer_t above must match the
+ input sequence's value type/character type, or the program is ill-formed.
+ Also note that because we are lexing a contiguous range of characters, you
+ cannot use any of the boost::parser::as_utf* range adaptors
+ when doing token parsing.
+
+ Next, you define a parser. +
++
+auto parser = foo >> baz >> bar; ++
+
+
+ This has the same semantics as the characater parsers you've seen in the
+ rest of the documentation. Each boost::parser::token_spec_t has the same interface
+ as a parser, so it can be used with all the parser combining operations,
+ like operator>>. However, unlike when doing character
+ parsing, when token parsing all the terminal parsers are restricted to a
+ subset of the terminal parsers that are available in character parsing (see
+ the full list in the table below). This is because most of the parsers in
+ Boost.Parser parse sequences of characters. For example, if you used int_(42)
+ above instead of foo, the int_ parser would try to match
+ two consecutive values from the input sequence, and would expect them to
+ equal '4' and '2', respectively. It would instead
+ see two tokens, and the comparisons would not even compile.
+
+ Finally, you can put everything together in a call to parse().
+
+
+auto result = bp::parse(r, parser); +assert(result); +assert(std::get<0>(*result) == "foo"); +assert(std::get<1>(*result) == "baz"); +assert(std::get<2>(*result) == "bar"); ++
+
+
+ As you can see, the parse succeeded, and we got three attributes out of it.
+ Each attribute has the type std::string_view.
+
+ Capture groups are valid regex syntax, but you cannot use them in your boost::parser::token_spec_t
+ regexes. For instance, bp::token_spec<"(foo)+", 0>
+ (to match one or more consecutive "foo"s) will compile
+ and run, and you will get garbage results. Boost.Parser relies on the exact
+ number and order of capture groups to do its token generation. If you want
+ to group a part of your regex, use a non-capture group, like "(?:foo)+".
+
+ Using the parser above, what if we tried to parse the token range "foo
+ baz bar" | bp::to_tokens(lexer) instead? Turns out, we get the
+ same answer. You cannot use am explicit skipper when parsing tokens. However,
+ parsers are much simpler when you have a notion of a skipper, especially
+ for whitespace. So, boost::parser::lexer_t has one built in; it
+ uses "\\s+" by default. Whitespace is matched, but
+ produces no tokens. If you want to change the whitespace/skipper regex, you
+ can provide it when specifying the lexer. For example, here is how you would
+ specify the whitespace/skipped tokens to be any sequence of whitespace charaters,
+ or any C++-style trailing comment (// ...).
+
bp::lexer<char, int, "\\s+|\\/\\/.*$"> ++
+ If whitespace information is important in your parse, simply provide ""
+ or the more readable convenience constant bp::no_ws to lexer<>
+ as the whitespace regex, and make a regular token that matches whitespace.
+ That way, you'll see all the whitespace in the sequence of tokens that you
+ parse.
+
+ The parser we looked at in the initial simple example produced three std::string_views,
+ one for each token we parsed. However, we may know that a particular token
+ is meant to match numbers. If this is the case, we can let Boost.Parser know
+ that we expect the token to be interpretable as a particular type of numeric
+ value. I'm using "numeric" for brevity, but this includes bool
+ as well. For example:
+
+
+constexpr auto true_false = bp::token_spec<"true|false", 0, bool>; +constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>; +constexpr auto number = bp::token_spec<"\\d+(?:\\.\\d*)?", 2, double>; ++
+
+
+ The attribute types for these tokens are bool, std::string_view,
+ and double, respectively. identifier has attribute
+ type std::string_view because that is the default if you do
+ not specify a type.
+
+ A boost::parser::token
+ is essentially a variant of std::basic_string_view<CharType>,
+ long long, and long double. The latter two types
+ were seleced because they can fit any value of an integral or floating-point
+ type, respectively. Even though boost::parser::token effectively erases the
+ exact type when it is integral or floating-point, the token parser retains
+ the information of what the exact type is. This is why true_false
+ above has an attribute type of bool and not long long.
+
+ CTRE
+ produces a sequence of substrings. Each token produced by Boost.Parser gets
+ its numeric value (if it should have one) by parsing the substring from
+ CTRE
+ with — you guessed it — a Boost.Parser parser. The parser for
+ bool is just bool_; the one for int
+ is int_,
+ etc. The integral-type parsers all support a radix/base. If you specify an
+ integral value type for one of your tokens, you can also specify a base,
+ like bp::token_spec<"\\d+", int, 16> to parse
+ hex-encoded ints.
+
+ Part of the advantage of doing lexing before parsing is that you don't have
+ to reparse everything over and over again. If the subsequence "1.23456789"
+ is found in the input, you only lex it once. After that, it's already in
+ the right form as a floating-point number; backtracking will not provoke
+ reparsing of those ten characters.
+
+ Just about any parser above a certain size will have punctuation of some
+ sort — elements of the input, usually a single character, that delimit
+ other parts of the input, like commas and braces. To make it easier to specify
+ such tokens, Boost.Parser provides boost::parser::token_chars. You can give
+ boost::parser::token_chars
+ a list of individual characters, and it will create a separate, single-character
+ regex for each one, and add it to your lexer. Each such token will have the
+ special ID boost::parser::character_id.
+
+ Note that the single character you provide must be a char in
+ the ASCII range (that is, less than 128). If you want to use
+ a single character that is outside the ASCII range, just make a normal boost::parser::token_spec_t
+ for it. Here is an example using boost::parser::token_chars.
+
+
+constexpr auto true_false = bp::token_spec<"true|false", 0, bool>; +constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>; + +constexpr auto lexer = + bp::lexer<char, int> | true_false | identifier | bp::token_chars<'=', ';'>; + +auto parser = identifier >> '=' >> true_false >> ';'; +auto r = "foo = false;" | bp::to_tokens(lexer); +auto result = bp::parse(r, parser); +assert(result); +assert(std::get<0>(*result) == "foo"); +assert(std::get<1>(*result) == false); ++
+
+
+ Just like in a character parser, we can use character literals to match the
+ single-character tokens ('=' and ';' in the example
+ above). The character literals are turned into char_ parsers. char_
+ parsers that you explicitly write may be used as well. They will only match
+ single-character tokens, though (that is, tokens with the ID boost::parser::character_id).
+
+ So far, we've only seen examples of parsing for a particular token. Sometimes
+ we want to match only occurrances of a given token with a particular value,
+ just like when we write something like char_('a', 'z') in
+ a character parser.
+
+ Just as with char_
+ and most other Boost.Parser parsers, you can just add the value to match
+ in parens after the token, like true_false(true) or identifier("exact
+ string").
+
+ So far, we've only seen int used as the token ID type. Any integral
+ type or enum can be used, though. There are limitations on the values you
+ can provide for IDs. First, the values must all be nonnegative; negative
+ values are reserved for use by Boost.Parser. Second, the values must not
+ exceed 2^23-1; no one is likely to have very many unique IDs,
+ and token storage can be reduced a bit by using 3 bytes for the ID instead
+ of 4.
+
+ Using an enum has the advantage of making the code a lot clearer. For instance: +
+enum class token_names { foo, bar };
+auto const foo = bp::token_spec<"foo", token_names::foo>;
+auto const bar = bp::token_spec<"b.r", token_names::bar>;
+
+
+ ... reads a lot better than just using IDs like 0 and 1.
+
+ There is another important advantage related to diagnostic messages. Consider + this parse. +
+constexpr auto lexer = bp::lexer<char, token_names> | foo;
+bp::parse("bar" | bp::to_tokens(lexer), bp::eps > foo);
+
++ Here is what the diagnostic looks like. +
+1:0: error: Expected tok<0> here: +bar +^ ++
+ If we added a specific string value we expect, that would be included. +
+bp::parse("bar" | bp::to_tokens(lexer), bp::eps > foo("foo"));
+
+1:0: error: Expected tok<0>("foo") here:
+bar
+^
+
+
+ Instead of "tok<N>", it might be nice to give
+ the failed expectation a user-friendly name. In character parsers we usually
+ do this by giving rules
+ user-facing diagnostic text. This makes your parse failures much easier to
+ understand and correct. However, many boost::parser::token_spec_ts may already have
+ a nice name, so why not use it? If you use enumerators for you token IDs,
+ and make their enumeration streamable, Boost.Parser will detect this, and
+ use the streamed enumerator instead of "tok<N>".
+ Here is what we could have written instead.
+
enum class printable_tokens { foo, bar };
+std::ostream & operator<<(std::ostream & os, printable_tokens tok)
+{
+ switch (tok) {
+ case printable_tokens::foo: os << "foo"; break;
+ case printable_tokens::bar: os << "bar"; break;
+ }
+ return os;
+}
+
+auto const foo = bp::token_spec<"foo", printable_tokens::foo>;
+auto const bar = bp::token_spec<"b.*r", printable_tokens::bar>;
+
+constexpr auto lexer = bp::lexer<char, printable_tokens> | foo;
+bp::parse("bar" | bp::to_tokens(lexer), bp::eps > foo);
+
++ That results in the enumerator being printed instead. +
+1:0: error: Expected foo here: +bar +^ ++
![]() |
+Important | +
|---|---|
+ If you provide a streamable enumeration as the token ID type, this enables
+ the alternate printing behavior described above. If you specify a particular
+ value for the token parser, that value is printed as the expected value.
+ So the diagnostic name for |
+ The takeaway here is that you should use a streamable enumeration for your + ID type. It makes your code easier to read, and produces better diagnostics. +
+
+ Given that I told you earlier that we will make a sequence of tokens and
+ backtrack within those tokens, you may be wondering where the tokens are
+ stored. The boost::parser::tokens_view
+ (the type created by the range adaptor boost::parser::to_tokens) uses internal storage
+ or user-provided external storage to store the tokens as they are generated.
+ Here is an example of using external storage.
+
+
+constexpr auto true_false = bp::token_spec<"true|false", 0, bool>; +constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>; + +constexpr auto lexer = + bp::lexer<char, int> | true_false | identifier | bp::token_chars<'=', ';'>; + +auto parser = identifier >> '=' >> true_false >> ';'; +std::vector<bp::token<char>> cache; +auto r = "foo = false;" | bp::to_tokens(lexer, std::ref(cache)); +auto result = bp::parse(r, parser); +assert(result); +assert(std::get<0>(*result) == "foo"); +assert(std::get<1>(*result) == false); +assert(cache.size() == 4u); ++
+
+
+ The cache could have been a boost::container::small_vector<bp::token,
+ N>, or even a static_vector of appropriate size, to
+ reduce or eliminate memory allocations.
+
+ Note the size of the cache after the parse; it still contains some tokens. + This is a special case of a more general phenomenon: the token cache grows + without bound when there are no expectation points. This is because, without + expectation points, backtracking is unbounded (refer to the Expectation + points section to see why). If you can go back arbitarily far in order + to backtrack, you need to be sure that there will be a token at the place + you backtrack to. +
++ However, if you use expectation points, the cache is trimmed. The prefix + of tokens before the expectation point is erased from the token cache. +
++
+auto parser = identifier >> '=' > true_false >> ';'; +std::vector<bp::token<char>> cache; +auto r = "foo = false;" | bp::to_tokens(lexer, std::ref(cache)); +auto result = bp::parse(r, parser); +assert(result); +assert(std::get<0>(*result) == "foo"); +assert(std::get<1>(*result) == false); +assert(cache.size() == 2u); ++
+
+
+ Note the use of std::ref() to pass a reference to cache.
+ This is necessary because boost::parser::to_tokens uses std::bind_back()
+ (or a workalike in C++17 mode). As with the other binders in std,
+ it does not gracefully propagate bare lvalue references, so you have to use
+ std::ref().
+
+ Parse failures that fail the top-level parse happen only at expectation points.
+ Lexing failures that fail the top-level parse can happen at any point in
+ the input. If there is no token regex that matches the current point of the
+ input, we cannot continue to lex. Lexing failures are usually caused by bad
+ input, or failure to specify the correct set of boost::parser::token_spec_ts to cover all
+ valid input. However, it may also be that you have written an impossible
+ boost::parser::token_spec_t.
+ Consider this one.
+
constexpr auto bad_token = bp::token_spec<"foo", 0, int>; ++
+ This boost::parser::token_spec_t
+ can never generate a valid token. It will match "foo"
+ in the input, but then it will try to parse "foo"
+ as an int, which is guaranteed to fail.
+
+ The takeaway here is that a lexing failure might be due to bad input, but
+ it can also be the sign of a bug in one or more of your boost::parser::token_spec_ts.
+
+ TODO +
++ Many of the parsers that work in character parsing do not work in token parsing, + because they try to parse individual characters from the input. Token parsing + only provides tokens, not characters. This table describes all the parsers + compatible with token parsing. +
+
+ This table lists all the Boost.Parser parsers usable during token parsing.
+ For the callable parsers, a separate entry exists for each possible arity
+ of arguments. For a parser p, if there is no entry for p
+ without arguments, p is a function, and cannot itself be used
+ as a parser; it must be called. In the table below:
+
char");
+ RESOLVE()
+ is a notional macro that expands to the resolution of parse argument
+ or evaluation of a parse predicate (see The
+ Parsers And Their Uses);
+ RESOLVE(pred) ==
+ true" is a shorthand notation for "RESOLVE(pred)
+ is contextually convertible to bool and true";
+ likewise for false;
+ c is a character of some character type;
+ str is a string literal of type CharType const[],
+ for some character type Char\Type;
+ pred is a parse predicate;
+ arg0, arg1, arg2, ... are parse
+ arguments;
+ a is a semantic action;
+ r is an object whose type models parsable_range;
+ tok is a token parser created using boost::parser::token_spec_t; and
+ p, p1, p2, ... are parsers.
+ ![]() |
+Note | +
|---|---|
|
+
+ The definition of + +template<typename T> +concept parsable_range = (std::ranges::forward_range<T> && + code_unit<std::ranges::range_value_t<T>>) || + detail::is_tokens_view_v<T>; ++ + + |
![]() |
+Note | +
|---|---|
+ Some of the parsers in this table consume no input. All parsers consume + the input they match unless otherwise stated in the table below. + |
Table 1.11. Token Parsers and Their Semantics
+|
+ + Parser + + |
+
+ + Semantics + + |
+
+ + Attribute Type + + |
+
+ + Notes + + |
+
|---|---|---|---|
|
+
+ |
+
+
+ Matches any token with the same ID as |
+
+
+ The attribute type given when specifying |
++ | +
|
+
+ |
+
+
+ Matches exactly the value |
+
+
+ The attribute type given when specifying |
+
+
+ This case applies only when |
+
|
+
+ |
+
+
+ Matches exactly the value |
+
+
+ The attribute type given when specifying |
+
+ + This overload does not take parse + arguments. + + |
+
|
+
+ |
+
+ + Matches epsilon, the empty string. Always + matches, and consumes no input. + + |
+
+ + None. + + |
+
+
+ Matching |
+
|
+
+ |
+
+
+ Fails to match the input if |
+
+ + None. + + |
++ | +
|
+
+ |
+
+ + Matches a single whitespace code point (see note), according to + the Unicode White_Space property. + + |
+
+ + None. + + |
+
+
+ For more info, see the Unicode
+ properties. |
+
|
+
+ |
+
+ + Matches a single newline (see note), following the "hard" + line breaks in the Unicode line breaking algorithm. + + |
+
+ + None. + + |
+
+
+ For more info, see the Unicode
+ Line Breaking Algorithm. |
+
|
+
+ |
+
+ + Matches only at the end of input, and consumes no input. + + |
+
+ + None. + + |
++ | +
|
+
+ |
+
+
+ Always matches, and consumes no input. Generates the attribute
+ |
+
+
+ |
+
+
+ An important use case for |
+
|
+
+ |
+
+ + Matches any single code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+
+ Matches exactly the code point |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+
+ Matches the next code point |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+
+ Matches the next code point |
+
+
+ The code point type in Unicode parsing, or |
+
+
+ |
+
|
+
+ |
+
+ + Matches a single code point. + + |
+
+
+ |
+
+
+ Similar to |
+
|
+
+ |
+
+ + Matches a single code point. + + |
+
+
+ |
+
+
+ Similar to |
+
|
+
+ |
++ + | +
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single control-character code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single decimal digit code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single punctuation code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single hexidecimal digit code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single lower-case code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+ + Matches a single upper-case code point. + + |
+
+
+ The code point type in Unicode parsing, or |
++ | +
|
+
+ |
+
+
+ Matches exactly the given code point |
+
+ + None. + + |
+
+
+ |
+
|
+
+ |
+
+
+ Matches exactly the given code point |
+
+ + None. + + |
+
+
+ This is a UDL
+ that represents |
+
|
+
+ |
+
+
+ Matches exactly the given string |
+
+ + None. + + |
+
+
+ |
+
|
+
+ |
+
+
+ Matches exactly the given string |
+
+ + None. + + |
+
+
+ This is a UDL
+ that represents |
+
|
+
+ |
+
+
+ Matches exactly |
+
+
+ |
+
+
+ |
+
|
+
+ |
+
+
+ Matches exactly |
+
+
+ |
+
+
+ This is a UDL
+ that represents |
+
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ The special value |
+
|
+
+ |
+
+
+ Matches iff |
+
+
+ |
+
+
+ The special value |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
+
+
+ It is an error to write |
+
|
+
+ |
+
+
+ Equivalent to |
+
+
+ |
+
+
+ It is an error to write |
+
|
+
+ |
+
+
+ |
+
+
+ |
+
+
+ Unlike the other entries in this table, |
+
+ One directive that works in character parsing does not work in token parsing
+ — the argument form of skip[].
+ The argument to skip[] is a new skipper, and this
+ cannot be changed in the middle of tokenization. The set of tokens and their
+ regexes are fixed at compile time. The nullary form of skip[]
+ works fine; all it does is re-enable skipping that has been turned off by
+ lexeme[].
+
+ Not all the parse() and callback_parse()
+ overloads can do token parsing, because some of them cannot accept a boost::parser::tokens_view
+ as input. In particular, the overloads that take a skipper are precluded,
+ since the skipper must be built into the lexer itself (see the section above
+ about whitespace handling for details).
+
+ There are a few details you might want to know about how CTRE + works. +
++ CTRE + uses PCRE as its regex grammar. +
+
+ "Maximum munch" appears not to be the way CTRE
+ tokenizes input. For instance, if you have boost::parser::token_spec_t A that matches
+ "<==" and boost::parser::token_spec_t B that matches
+ "<|>|<=|>=|==|!=", the input characters
+ "<==" will be tokenized as "<=="
+ if the lexer includes A | B, but will be parsed as "<"
+ followed by "==" if the lexer includes B | A.
+
+ CTRE
+ uses char32_t for all its compile time strings. If you give
+ it a regex string literal like bp::token_spec<"foo", 0>
+ (that is, an array of char), it will be interpreted in one of
+ two ways. By default, the chars are copied into an array of
+ char32_t, unmodified. This is fine if you provide an ASCII regex,
+ or a regex in a non-Unicode encoding. However, if you define CTRE_STRING_IS_UTF8
+ before including <boost/parser/lexer.hpp>, the array of
+ char will be interpreted as UTF-8, and will be transcoded to
+ UTF-32 before being stored in the array of char32_t. All the
+ charN_t character types will be interpreted as UTF-N encoded,
+ and will be transcoded to UTF-32 if needed. wchar_t is taken
+ to mean UTF-32 even on Windows. Again, all
+ of this trancoding happens at compile time.
+
+ TODO: Describe how it mostly just works, but that if you use the error reporting + API you need to know which functions require token iterators and which do + not, and how to get from token iterators down to the underlying input iterators. +
++ TODO: Note on the error handling-specific page that some error handling functions + require normalize_iterators, and some apply it themselves. Also note that + all the error handlers appply it. +
+
+ You should probably never need to write your own low-level parser. You have
+ primitives like char_
+ from which to build up the parsers that you need. It is unlikely that you're
+ going to need to do things on a lower level than a single character.
+
+ However. Some people are obsessed with writing everything for themselves. + We call them C++ programmers. This section is for them. However, this section + is not an in-depth tutorial. It is a basic orientation to get you familiar + enough with all the moving parts of writing a parser that you can then learn + by reading the Boost.Parser code. +
+
+ Each parser must provide two overloads of a function call().
+ One overload parses, producing an attribute (which may be the special no-attribute
+ type detail::nope). The other one parses, filling in a given
+ attribute. The type of the given attribute is a template parameter, so it
+ can take any type that you can form a reference to.
+
+ Let's take a look at a Boost.Parser parser, opt_parser. This
+ is the parser produced by use of operator-. First, here is the
+ beginning of its definition.
+
+
+template<typename Parser> +struct opt_parser +{ ++
+
++ The end of its definition is: +
++
+Parser parser_; +}; ++
+
+
+ As you can see, opt_parser's only data member is the parser
+ it adapts, parser_. Here is its attribute-generating overload
+ to call().
+
+
+template< + typename Iter, + typename Sentinel, + typename Context, + typename SkipParser> +auto call( + Iter & first, + Sentinel last, + Context const & context, + SkipParser const & skip, + detail::flags flags, + bool & success) const +{ + using attr_t = decltype(parser_.call( + first, last, context, skip, flags, success)); + detail::optional_of<attr_t> retval; + call(first, last, context, skip, flags, success, retval); + return retval; +} ++
+
++ First, let's look at the template and function parameters. +
+Iter & first is the iterator. It is taken as an out-param.
+ It is the responsibility of call() to advance first
+ if and only if the parse succeeds.
+ Sentinel last is the sentinel. If the parse has not yet
+ succeeded within call(), and first == last
+ is true, call() must fail (by setting bool
+ & success to false).
+ Context const & context is the parse context. It will
+ be some specialization of detail::parse_context. The context
+ is used in any call to a subparser's call(), and in some
+ cases a new context should be created, and the new context passed to
+ a subparser instead; more on that below.
+ SkipParser const & skip is the current skip parser.
+ skip should be used at the beginning of the parse, and in
+ between any two uses of any subparser(s).
+ detail::flags flags are a collection of flags indicating
+ various things about the current state of the parse. flags
+ is concerned with whether to produce attributes at all; whether to apply
+ the skip parser skip; whether to produce a verbose trace
+ (as when boost::parser::trace::on
+ is passed at the top level); and whether we are currently inside the
+ utility function detail::apply_parser.
+ bool & success is the final function parameter. It should
+ be set to true if the parse succeeds, and false
+ otherwise.
+
+ Now the body of the function. Notice that it just dispatches to the other
+ call() overload. This is really common, since both overloads
+ need to to the same parsing; only the attribute may differ. The first line
+ of the body defines attr_t, the default attribute type of our
+ wrapped parser parser_. It does this by getting the decltype()
+ of a use of parser_.call(). (This is the logic represented by
+ ATTR()
+ in the rest of the documentation.) Since opt_parser represents
+ an optional value, the natural type for its attribute is std::optional<.
+ However, this does not work for all cases. In particular, it does not work
+ for the "no-attribute" type ATTR(parser)>detail::nope, nor for
+ std::optional<T> —
+ is just ATTR(--p). So,
+ the second line uses an alias that takes care of those details, ATTR(-p)detail::optional_of<>.
+ The third line just calls the other overload of call(), passing
+ retval as the out-param. Finally, retval is returned
+ on the last line.
+
+ Now, on to the other overload. +
++
+template< + typename Iter, + typename Sentinel, + typename Context, + typename SkipParser, + typename Attribute> +void call( + Iter & first, + Sentinel last, + Context const & context, + SkipParser const & skip, + detail::flags flags, + bool & success, + Attribute & retval) const +{ + [[maybe_unused]] auto _ = detail::scoped_trace( + *this, first, last, context, flags, retval); + + detail::skip(first, last, skip, flags); + + if (!detail::gen_attrs(flags)) { + parser_.call(first, last, context, skip, flags, success); + success = true; + return; + } + + parser_.call(first, last, context, skip, flags, success, retval); + success = true; +} ++
+
+
+ The template and function parameters here are identical to the ones from
+ the other overload, except that we have Attribute & retval,
+ our out-param.
+
+ Let's look at the implementation a bit at a time. +
++
+[[maybe_unused]] auto _ = detail::scoped_trace( + *this, first, last, context, flags, retval); ++
+
+
+ This defines a RAII trace object that will produce the verbose trace requested
+ by the user if they passed boost::parser::trace::on to the top-level
+ parse. It only has effect if detail::enable_trace(flags) is
+ true. If trace is enabled, it will show the state of the parse
+ at the point at which it is defined, and then again when it goes out of scope.
+
![]() |
+Important | +
|---|---|
+ For the tracing code to work, you must define an overload of |
+
+detail::skip(first, last, skip, flags); ++
+
+
+ This one is pretty simple; it just applies the skip parser. opt_parser
+ only has one subparser, but if it had more than one, or if it had one that
+ it applied more than once, it would need to repeat this line using skip
+ between every pair of uses of any subparser.
+
+
+if (!detail::gen_attrs(flags)) { + parser_.call(first, last, context, skip, flags, success); + success = true; + return; +} ++
+
+
+ This path accounts for the case where we don't want to generate attributes
+ at all, perhaps because this parser sits inside an omit[]
+ directive.
+
+
+parser_.call(first, last, context, skip, flags, success, retval); +success = true; ++
+
+
+ This is the other, typical, path. Here, we do want to generate attributes,
+ and so we do the same call to parser_.call(), except that we
+ also pass retval.
+
+ Note that we set success to true after the call
+ to parser_.call() in both code paths. Since opt_parser
+ is zero-or-one, if the subparser fails, opt_parse still succeeds.
+
+ Sometimes, you need to change something about the parse context before calling
+ a subparser. For instance, rule_parser sets up the value, locals,
+ etc., that are available for that rule. action_parser adds the
+ generated attribute to the context (available as _attr(ctx)).
+ Contexts are immutable in Boost.Parser. To "modify" one for a subparser,
+ you create a new one with the appropriate call to detail::make_context().
+
detail::apply_parser()
+
+ Sometimes a parser needs to operate on an out-param that is not exactly the
+ same as its default attribute, but that is compatible in some way. To do
+ this, it's often useful for the parser to call itself, but with slightly
+ different parameters. detail::apply_parser() helps with this.
+ See the out-param overload of repeat_parser::call() for an example.
+ Note that since this creates a new scope for the ersatz parser, the scoped_trace
+ object needs to know whether we're inside detail::apply_parser
+ or not.
+
+ That's a lot, I know. Again, this section is not meant to be an in-depth
+ tutorial. You know enough now that the parsers in parser.hpp
+ are at least readable.
+
+ +BOOST_PARSER_NO_RUNTIME_ASSERTIONS +BOOST_PARSER_ASSERT(condition) +BOOST_PARSER_DISABLE_CONCEPTS +BOOST_PARSER_USE_HANA_TUPLE +BOOST_PARSER_MAX_AGGREGATE_SIZE +BOOST_PARSER_SUBRANGE +BOOST_PARSER_TRACE_TO_VS_OUTPUT +BOOST_PARSER_TOKEN_POSITION_TYPE +BOOST_PARSER_ALGO_CONSTEXPR +BOOST_PARSER_USE_CONCEPTS +BOOST_PARSER_USE_STD_TUPLE +BOOST_PARSER_CONSTEXPR +BOOST_PARSER_TRACE_OSTREAM +BOOST_PARSER_DIAGNOSTIC_PUSH +BOOST_PARSER_DIAGNOSTIC_POP+
namespace boost { + namespace parser { + struct default_error_handler; + template<typename Iter> struct lex_error; + template<typename Iter> struct line_position; + template<typename Iter> struct parse_error; + struct stream_error_handler; + + enum error_handler_result; + enum diagnostic_kind; + template<typename Iter, typename Sentinel> + std::ostream & + write_formatted_message(std::ostream &, std::wstring_view, Iter, Iter, + Sentinel, std::string_view, int64_t = 80, + int64_t = 40); + template<typename Iter, typename Sentinel, + template< class > class Exception> + std::ostream & + write_formatted_expectation_failure_error_message(std::ostream &, + std::wstring_view, + Iter, Sentinel, + Exception< Iter > const &, + int64_t = 80, + int64_t = 40); + } +}+
namespace boost { + namespace parser { + struct callback_error_handler; + struct rethrow_error_handler; + struct vs_output_error_handler; + template<typename Iter, typename Sentinel> + std::ostream & + write_formatted_message(std::ostream &, std::string_view, Iter, Iter, + Sentinel, std::string_view, int64_t = 80, + int64_t = 40); + template<typename Iter, typename Sentinel, + template< class > class Exception> + std::ostream & + write_formatted_expectation_failure_error_message(std::ostream &, + std::string_view, + Iter, Sentinel, + Exception< Iter > const &, + int64_t = 80, + int64_t = 40); + template<typename I, typename S> auto normalize_iterators(I, I, S); + template<typename I, typename S> + auto normalize_iterators(I, parse_error< I >, S); + template<typename I, typename S> + auto normalize_iterators(I, lex_error< I >, S); + template<typename Iter> + line_position< Iter > find_line_position(Iter, Iter); + template<typename Iter, typename Sentinel> + Iter find_line_end(Iter, Sentinel); + } +}+
namespace boost { + namespace parser { + template<typename CharType, typename ID, + ctll::fixed_string WsStr = "\\s+", + ctll::fixed_string RegexStr = "", + unspecified IDs = detail::nttp_array<-1>{}, + unspecified Specs = detail::nttp_array<detail::parse_spec{}>{}> + struct lexer_t; + template<typename CharType> struct token; + template<ctll::fixed_string Regex, auto ID, typename ValueType, int Base> + struct token_spec_t; + template<std::ranges::contiguous_range V, typename Lexer, + typename TokenCache> + struct tokens_view; + + constexpr ctll::fixed_string no_ws; + constexpr auto token_chars; + constexpr auto lexer; + unspecified to_tokens; + template<typename R, typename Lexer> tokens_view(R &&, Lexer); + template<typename R, typename Lexer, typename TokenCache> + tokens_view(R &&, Lexer, std::reference_wrapper< TokenCache >); + } +}+
namespace boost { + namespace parser { + template<typename Parser, typename Action> struct action_parser; + template<typename Attribute> struct attr_parser; + template<typename TagType, typename Attribute = no_attribute, + typename LocalState = no_local_state, + typename ParamsTuple = no_params> + struct callback_rule; + template<typename Expected, typename AttributeType = void> + struct char_parser; + template<typename Tag> struct char_set_parser; + template<typename Tag> struct char_subrange_parser; + template<typename Predicate> struct eps_parser; + template<typename Parser, bool FailOnMatch> struct expect_parser; + template<typename T> struct float_parser; + template<typename T, int Radix = 10, int MinDigits = 1, + int MaxDigits = -1, typename Expected = detail::nope> + struct int_parser; + template<typename Parser> struct lexeme_parser; + template<typename Parser> struct no_case_parser; + template<typename Parser> struct omit_parser; + template<typename Parser> struct raw_parser; + template<typename Parser, typename DelimiterParser = detail::nope, + typename MinType = int64_t, typename MaxType = int64_t> + struct repeat_parser; + template<typename TagType, typename Attribute = no_attribute, + typename LocalState = no_local_state, + typename ParamsTuple = no_params> + struct rule; + template<bool CanUseCallbacks, typename TagType, typename Attribute, + typename LocalState, typename ParamsTuple> + struct rule_parser; + template<typename ParserTuple, typename BacktrackingTuple, + typename CombiningGroups> + struct seq_parser; + template<typename Parser, typename SkipParser = detail::nope> + struct skip_parser; + struct sorted_t; + template<typename StrIter, typename StrSentinel> struct string_parser; + template<typename Parser> struct string_view_parser; + struct string_view_tag; + template<typename SwitchValue, typename OrParser = detail::nope> + struct switch_parser; + template<typename T> struct symbol_parser; + template<typename TokenSpec, typename Expected> struct token_parser; + template<typename Parser, typename F> struct transform_parser; + template<typename T, int Radix = 10, int MinDigits = 1, + int MaxDigits = -1, typename Expected = detail::nope> + struct uint_parser; + template<bool NewlinesOnly, bool NoNewlines> struct ws_parser; + typedef unspecified null_sentinel_t; + typedef typename attribute< R, Parser >::type attribute_t; + typedef unspecified no_attribute; + typedef unspecified no_local_state; + typedef unspecified no_params; + + constexpr bool enable_optional; + constexpr bool enable_variant; + constexpr int ws_id; + constexpr int character_id; + constexpr bool is_token_v; + constexpr sorted_t sorted; + template<typename CharT> constexpr auto null_term(CharT *); + decltype(auto) _val(Context const &); + template<typename Context> decltype(auto) _attr(Context const &); + template<typename Context> decltype(auto) _where(Context const &); + template<typename Context> decltype(auto) _begin(Context const &); + template<typename Context> decltype(auto) _end(Context const &); + template<typename Context> decltype(auto) _pass(Context const &); + template<typename Context> decltype(auto) _locals(Context const &); + template<typename Context> decltype(auto) _params(Context const &); + template<typename Context> decltype(auto) _globals(Context const &); + template<typename Context> decltype(auto) _error_handler(Context const &); + template<std::forward_iterator I, typename Context> + void _report_error(Context const &, std::string_view, I); + template<typename Context> + void _report_error(Context const &, std::string_view); + template<std::forward_iterator I, typename Context> + void _report_warning(Context const &, std::string_view, I); + template<typename Context> + void _report_warning(Context const &, std::string_view); + } +}+
+ +BOOST_PARSER_DEFINE_RULES(...)+
namespace boost { + namespace parser { + template<typename R, typename Parser> struct attribute; + template<typename Parser, typename DelimiterParser> + struct delimited_seq_parser; + template<template< class > class Parser> struct directive; + template<typename Predicate> struct if_directive; + struct merge_directive; + struct none; + template<typename Parser> struct one_plus_parser; + template<typename Parser> struct opt_parser; + template<typename ParserTuple> struct or_parser; + template<typename Parser, typename GlobalState, typename ErrorHandler> + struct parser_interface; + template<typename ParserTuple> struct perm_parser; + template<typename Quotes, typename Escapes> struct quoted_string_parser; + template<typename MinType, typename MaxType> struct repeat_directive; + struct separate_directive; + template<typename SkipParser = detail::nope> struct skip_directive; + template<typename T> struct symbols; + template<typename F> struct transform_directive; + template<typename Parser> struct zero_plus_parser; + + enum trace; + + unspecified _p; + int64_t const Inf; + constexpr directive< omit_parser > omit; + constexpr directive< raw_parser > raw; + constexpr directive< string_view_parser > string_view; + constexpr directive< lexeme_parser > lexeme; + constexpr directive< no_case_parser > no_case; + constexpr skip_directive skip; + constexpr merge_directive merge; + constexpr separate_directive separate; + unspecified eps; + constexpr parser_interface< eoi_parser > eoi; + unspecified char_; + unspecified cp; + unspecified cu; + constexpr parser_interface< quoted_string_parser<> > quoted_string; + constexpr parser_interface< ws_parser< true, false > > eol; + constexpr parser_interface< ws_parser< false, false > > ws; + constexpr parser_interface< ws_parser< false, true > > blank; + constexpr parser_interface< digit_parser > digit; + unspecified hex_digit; + unspecified control; + unspecified punct; + unspecified lower; + unspecified upper; + constexpr parser_interface< bool_parser > bool_; + constexpr parser_interface< uint_parser< unsigned int, 2 > > bin; + constexpr parser_interface< uint_parser< unsigned int, 8 > > oct; + constexpr parser_interface< uint_parser< unsigned int, 16 > > hex; + constexpr parser_interface< uint_parser< unsigned short > > ushort_; + constexpr parser_interface< uint_parser< unsigned int > > uint_; + constexpr parser_interface< uint_parser< unsigned long > > ulong_; + constexpr parser_interface< uint_parser< unsigned long long > > ulong_long; + constexpr parser_interface< int_parser< short > > short_; + constexpr parser_interface< int_parser< int > > int_; + constexpr parser_interface< int_parser< long > > long_; + constexpr parser_interface< int_parser< long long > > long_long; + constexpr parser_interface< float_parser< float > > float_; + constexpr parser_interface< float_parser< double > > double_; + template<typename Parser, typename GlobalState, typename ErrorHandler> + auto with_globals(unspecified, GlobalState &); + template<typename Parser, typename GlobalState, typename ErrorHandler> + auto with_error_handler(parser_interface< Parser, GlobalState, default_error_handler > const &, + ErrorHandler &); + template<typename T> constexpr repeat_directive< T, T > repeat(T); + template<typename MinType, typename MaxType> + constexpr repeat_directive< MinType, MaxType > repeat(MinType, MaxType); + template<typename F> auto transform(F); + template<typename Attribute> constexpr auto attr(Attribute); + constexpr auto lit(char); + constexpr auto lit(char8_t); + constexpr auto lit(char32_t); + template<parsable_range_like R> constexpr auto string(R &&); + template<parsable_range_like R> constexpr auto lit(R &&); + template<typename Predicate> constexpr auto if_(Predicate); + template<typename T> constexpr auto switch_(T); + template<typename Parser> + constexpr auto operator>>(char, parser_interface< Parser >); + template<typename Parser> + constexpr auto operator>>(char32_t, parser_interface< Parser >); + template<parsable_range_like R, typename Parser> + constexpr auto operator>>(R &&, parser_interface< Parser >); + template<typename Parser> + constexpr auto operator>(char, parser_interface< Parser >); + template<typename Parser> + constexpr auto operator>(char32_t, parser_interface< Parser >); + template<parsable_range_like R, typename Parser> + constexpr auto operator>(R &&, parser_interface< Parser >); + template<typename Parser> + constexpr auto operator|(char, parser_interface< Parser >); + template<typename Parser> + constexpr auto operator|(char32_t, parser_interface< Parser >); + template<parsable_range_like R, typename Parser> + constexpr auto operator|(R &&, parser_interface< Parser >); + template<typename Parser> + constexpr auto operator-(char, parser_interface< Parser >); + template<typename Parser> + constexpr auto operator-(char32_t, parser_interface< Parser >); + template<parsable_range_like R, typename Parser> + constexpr auto operator-(R &&, parser_interface< Parser >); + template<typename Parser> + constexpr auto operator%(char, parser_interface< Parser >); + template<typename Parser> + constexpr auto operator%(char32_t, parser_interface< Parser >); + template<parsable_range_like R, typename Parser> + constexpr auto operator%(R &&, parser_interface< Parser >); + template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler, typename Attr> + bool prefix_parse(I &, S, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + Attr &, trace = trace::off); + template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler, typename Attr> + bool parse(R const &, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + Attr &, trace = trace::off); + template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler> + auto prefix_parse(I &, S, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + trace = trace::off); + template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler> + auto parse(R const &, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + trace = trace::off); + template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler, + typename SkipParser, typename Attr> + bool prefix_parse(I &, S, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + parser_interface< SkipParser > const &, Attr &, + trace = trace::off); + template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser, typename Attr> + bool parse(R const &, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + parser_interface< SkipParser > const &, Attr &, + trace = trace::off); + template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler, + typename SkipParser> + auto prefix_parse(I &, S, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + parser_interface< SkipParser > const &, + trace = trace::off); + template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser> + auto parse(R const &, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + parser_interface< SkipParser > const &, trace = trace::off); + template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler, + typename Callbacks> + bool callback_prefix_parse(I &, S, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + Callbacks const &, trace = trace::off); + template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler, typename Callbacks> + bool callback_parse(R const &, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + Callbacks const &, trace = trace::off); + template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler, + typename SkipParser, typename Callbacks> + bool callback_prefix_parse(I &, S, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + parser_interface< SkipParser > const &, + Callbacks const &, trace = trace::off); + template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser, typename Callbacks> + bool callback_parse(R const &, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + parser_interface< SkipParser > const &, + Callbacks const &, trace = trace::off); + namespace literals { + constexpr auto operator""_l(char); + constexpr auto operator""_l(char8_t); + constexpr auto operator""_l(char32_t); + constexpr auto operator""_l(char const *, std::size_t); + constexpr auto operator""_l(char8_t const *, std::size_t); + constexpr auto operator""_l(char32_t const *, std::size_t); + constexpr auto operator""_p(char); + constexpr auto operator""_p(char8_t); + constexpr auto operator""_p(char32_t); + constexpr auto operator""_p(char const *, std::size_t); + constexpr auto operator""_p(char8_t const *, std::size_t); + constexpr auto operator""_p(char32_t const *, std::size_t); + } + } +}+
namespace boost { + namespace parser { + template<std::ranges::viewable_range V, + std::ranges::viewable_range ReplacementV, typename Parser, + typename GlobalState, typename ErrorHandler, typename SkipParser> + struct replace_view; + + unspecified replace; + template<typename V, typename ReplacementV, typename Parser, + typename GlobalState, typename ErrorHandler, typename SkipParser> + replace_view(V &&, + parser_interface< Parser, GlobalState, ErrorHandler >, + parser_interface< SkipParser >, ReplacementV &&, trace); + template<typename V, typename ReplacementV, typename Parser, + typename GlobalState, typename ErrorHandler, typename SkipParser> + replace_view(V &&, + parser_interface< Parser, GlobalState, ErrorHandler >, + parser_interface< SkipParser >, ReplacementV &&); + template<typename V, typename ReplacementV, typename Parser, + typename GlobalState, typename ErrorHandler> + replace_view(V &&, + parser_interface< Parser, GlobalState, ErrorHandler >, + ReplacementV &&, trace); + template<typename V, typename ReplacementV, typename Parser, + typename GlobalState, typename ErrorHandler> + replace_view(V &&, + parser_interface< Parser, GlobalState, ErrorHandler >, + ReplacementV &&); + } +}+
namespace boost { + namespace parser { + template<std::ranges::viewable_range V, typename Parser, + typename GlobalState, typename ErrorHandler, typename SkipParser> + struct search_all_view; + + unspecified search_all; + template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser> + auto search(R &&, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + parser_interface< SkipParser > const &, trace = trace::off); + template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename SkipParser, typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler> + auto search(I, S, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + parser_interface< SkipParser > const &, trace = trace::off); + template<parsable_range R, typename Parser, typename GlobalState, + typename ErrorHandler> + auto search(R &&, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + trace = trace::off); + template<parsable_iter I, std::sentinel_for< I > S, typename Parser, + typename GlobalState, + error_handler< I, S, GlobalState > ErrorHandler> + auto search(I, S, + parser_interface< Parser, GlobalState, ErrorHandler > const &, + trace = trace::off); + template<typename V, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser> + search_all_view(V &&, + parser_interface< Parser, GlobalState, ErrorHandler >, + parser_interface< SkipParser >, trace); + template<typename V, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser> + search_all_view(V &&, + parser_interface< Parser, GlobalState, ErrorHandler >, + parser_interface< SkipParser >); + template<typename V, typename Parser, typename GlobalState, + typename ErrorHandler> + search_all_view(V &&, + parser_interface< Parser, GlobalState, ErrorHandler >, + trace); + template<typename V, typename Parser, typename GlobalState, + typename ErrorHandler> + search_all_view(V &&, + parser_interface< Parser, GlobalState, ErrorHandler >); + } +}+
namespace boost { + namespace parser { + template<std::ranges::viewable_range V, typename Parser, + typename GlobalState, typename ErrorHandler, typename SkipParser> + struct split_view; + + unspecified split; + template<typename V, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser> + split_view(V &&, parser_interface< Parser, GlobalState, ErrorHandler >, + parser_interface< SkipParser >, trace); + template<typename V, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser> + split_view(V &&, parser_interface< Parser, GlobalState, ErrorHandler >, + parser_interface< SkipParser >); + template<typename V, typename Parser, typename GlobalState, + typename ErrorHandler> + split_view(V &&, parser_interface< Parser, GlobalState, ErrorHandler >, + trace); + template<typename V, typename Parser, typename GlobalState, + typename ErrorHandler> + split_view(V &&, parser_interface< Parser, GlobalState, ErrorHandler >); + } +}+
namespace boost { + namespace parser { + template<std::forward_iterator I, std::sentinel_for< I > S = I> + struct subrange; + template<std::forward_iterator I, std::sentinel_for< I > S = I> + constexpr subrange< I, S > make_subrange(I, S); + } +}namespace std { + namespace ranges { + } +}+
namespace boost { + namespace parser { + template<unspecified V> class utf16_view; + template<unspecified V> class utf32_view; + template<unspecified V> class utf8_view; + + typedef unspecified format; + + constexpr auto as_utf8; + constexpr auto as_utf16; + constexpr auto as_utf32; + } +}+
namespace boost { + namespace parser { + template<std::ranges::viewable_range V, std::move_constructible F, + typename Parser, typename GlobalState, typename ErrorHandler, + typename SkipParser> + struct transform_replace_view; + + unspecified transform_replace; + template<typename V, typename F, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser> + transform_replace_view(V &&, + parser_interface< Parser, GlobalState, ErrorHandler >, + parser_interface< SkipParser >, F &&, trace); + template<typename V, typename F, typename Parser, typename GlobalState, + typename ErrorHandler, typename SkipParser> + transform_replace_view(V &&, + parser_interface< Parser, GlobalState, ErrorHandler >, + parser_interface< SkipParser >, F &&); + template<typename V, typename F, typename Parser, typename GlobalState, + typename ErrorHandler> + transform_replace_view(V &&, + parser_interface< Parser, GlobalState, ErrorHandler >, + F &&, trace); + template<typename V, typename F, typename Parser, typename GlobalState, + typename ErrorHandler> + transform_replace_view(V &&, + parser_interface< Parser, GlobalState, ErrorHandler >, + F &&); + } +}+
namespace boost { + namespace parser { + typedef hana::tuple< Args... > tuple; + typedef hana::integral_constant< T, I > integral_constant; + typedef integral_constant< long long, I > llong; + template<typename T, typename U, U I> + constexpr decltype(auto) get(T &&, integral_constant< U, I >); + namespace literals { + template<char... chars> constexpr auto operator""_c(); + } + } +}+
Copyright © 2020 T. Zachary Laine
+ Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +
+Table of Contents
+structs and classesparse() API+ Boost.Parser requires C++17 or later. It is known to work with these compilers: +
+
+ C++20 concept-based constraints on template parameters are used if you build
+ in C++20 (or later) mode, and the compiler defines __cpp_lib_concepts.
+ In C++17 mode, most templates are simply unconstrained. If you want to disable
+ the use of concepts entirely, you can define BOOST_PARSER_DISABLE_CONCEPTS.
+