diff --git a/example/user_error_handler.cpp b/example/user_error_handler.cpp index d8647400..cc68cad0 100644 --- a/example/user_error_handler.cpp +++ b/example/user_error_handler.cpp @@ -31,9 +31,9 @@ struct logging_error_handler // and rethrow. Returning fail fails the top-level parse; returning // rethrow just re-throws the parse_error exception that got us here in // the first place. - template + template class Exception> bp::error_handler_result - operator()(Iter first, Sentinel last, bp::parse_error const & e) const + operator()(Iter first, Sentinel last, Exception const & e) const { bp::write_formatted_expectation_failure_error_message( ofs_, filename_, first, last, e); diff --git a/include/boost/parser/error_handling.hpp b/include/boost/parser/error_handling.hpp index 3215c4c9..a303a5bd 100644 --- a/include/boost/parser/error_handling.hpp +++ b/include/boost/parser/error_handling.hpp @@ -73,13 +73,16 @@ namespace boost { namespace parser { std::ostream & write_formatted_message( std::ostream & os, std::string_view filename, - Iter first, - Iter it, - Sentinel last, + Iter first_, + Iter it_, + Sentinel last_, std::string_view message, int64_t preferred_max_line_length, int64_t max_after_caret) { + auto [first, it, last] = + parser::normalize_iterators(first_, it_, last_); + if (!filename.empty()) os << filename << ':'; auto const position = parser::find_line_position(first, it); @@ -118,13 +121,15 @@ namespace boost { namespace parser { std::ostream & write_formatted_message( std::ostream & os, std::wstring_view filename, - Iter first, - Iter it, - Sentinel last, + Iter first_, + Iter it_, + Sentinel last_, std::string_view message, int64_t preferred_max_line_length, int64_t max_after_caret) { + auto [first, it, last] = + parser::normalize_iterators(first_, it_, last_); auto const r = filename | parser::detail::text::as_utf8; std::string s(r.begin(), r.end()); return parser::write_formatted_message( @@ -139,23 +144,26 @@ namespace boost { namespace parser { } #endif - template + template class Exception> std::ostream & write_formatted_expectation_failure_error_message( std::ostream & os, std::string_view filename, - Iter first, - Sentinel last, - parse_error const & e, + Iter first_, + Sentinel last_, + Exception const & e, int64_t preferred_max_line_length, int64_t max_after_caret) { std::string message = "error: Expected "; message += e.what(); + // TODO: Document that this gracelfully handles token iterators, and + // document the other parts of the API that do or do not. + auto [first, it, last] = parser::normalize_iterators(first_, e, last_); return parser::write_formatted_message( os, filename, first, - e.iter, + it, last, message, preferred_max_line_length, @@ -163,13 +171,13 @@ namespace boost { namespace parser { } #if defined(_MSC_VER) - template + template class Exception> std::ostream & write_formatted_expectation_failure_error_message( std::ostream & os, std::wstring_view filename, Iter first, Sentinel last, - parse_error const & e, + Exception const & e, int64_t preferred_max_line_length, int64_t max_after_caret) { @@ -180,6 +188,41 @@ namespace boost { namespace parser { } #endif + namespace detail { + template + auto normalize_iterators_impl(I first, I it, S last) + { + if constexpr (detail::is_token_iter_v) { + auto const underlying_first = it.range_begin(); + auto const underlying_it = + underlying_first + (*it).underlying_position(); + auto const underlying_last = it.range_end(); + return std::tuple( + underlying_first, underlying_it, underlying_last); + } else { + return std::tuple(first, it, last); + } + } + } + + template + auto normalize_iterators(I first, I it, S last) + { + return detail::normalize_iterators_impl(first, it, last); + } + + template + auto normalize_iterators(I first, parse_error e, S last) + { + return detail::normalize_iterators_impl(first, e.iter, last); + } + + template + auto normalize_iterators(I first, lex_error e, S last) + { + return detail::normalize_iterators_impl(first, e.iter, last); + } + /** An error handler that allows users to supply callbacks to handle the reporting of warnings and errors. The reporting of errors and/or warnings can be suppressed by supplying one or both @@ -211,9 +254,13 @@ namespace boost { namespace parser { filename_.assign(r.begin(), r.end()); } #endif - template + template< + typename Iter, + typename Sentinel, + template + class Exception> error_handler_result - operator()(Iter first, Sentinel last, parse_error const & e) const + operator()(Iter first, Sentinel last, Exception const & e) const { if (error_) { std::stringstream ss; @@ -224,6 +271,10 @@ namespace boost { namespace parser { return error_handler_result::fail; } + // TODO: Add term 'token parsing' to glossary at start of docs. + + // TODO: Add a test that exercises this function when doing token + // parsing. template void diagnose( diagnostic_kind kind, @@ -260,13 +311,15 @@ namespace boost { namespace parser { std::string filename_; }; - /** An error handler that just re-throws any exception generated by the - parse. */ struct rethrow_error_handler { - template + template< + typename Iter, + typename Sentinel, + template + class Exception> error_handler_result - operator()(Iter first, Sentinel last, parse_error const & e) const + operator()(Iter first, Sentinel last, Exception const & e) const { return error_handler_result::rethrow; } @@ -288,8 +341,6 @@ namespace boost { namespace parser { }; #if defined(_MSC_VER) || defined(BOOST_PARSER_DOXYGEN) - /** An error handler that prints to the Visual Studio debugger via calls - to `OutputDebugString()`. */ struct vs_output_error_handler : stream_error_handler { vs_output_error_handler() : @@ -309,9 +360,9 @@ namespace boost { namespace parser { // implementations - template + template class Exception> error_handler_result default_error_handler::operator()( - Iter first, Sentinel last, parse_error const & e) const + Iter first, Sentinel last, Exception const & e) const { parser::write_formatted_expectation_failure_error_message( std::cerr, "", first, last, e); @@ -343,9 +394,9 @@ namespace boost { namespace parser { diagnose(kind, message, context, parser::_where(context).begin()); } - template + template class Exception> error_handler_result stream_error_handler::operator()( - Iter first, Sentinel last, parse_error const & e) const + Iter first, Sentinel last, Exception const & e) const { std::ostream * os = err_os_; if (!os) diff --git a/include/boost/parser/error_handling_fwd.hpp b/include/boost/parser/error_handling_fwd.hpp index dfb6544f..2676c52e 100644 --- a/include/boost/parser/error_handling_fwd.hpp +++ b/include/boost/parser/error_handling_fwd.hpp @@ -34,6 +34,22 @@ namespace boost { namespace parser { Iter iter; }; + /** The exception thrown when a lexing error is encountered, consisting of + an iterator to the point of failure, and a description of the value + expected at the point of failure in `what()`. */ + template + struct lex_error : std::runtime_error + { + lex_error(Iter it, std::string msg) : + runtime_error(""), message(msg), iter(it) + {} + + char const * what() const noexcept override { return message.c_str(); } + + std::string message; + Iter iter; + }; + /** A position within a line, consisting of an iterator to the start of the line, the line number, and the column number. */ template @@ -74,13 +90,13 @@ namespace boost { namespace parser { /** Writes a formatted parse-expectation failure (meaning prefixed with the file name, line, and column number) to `os`. */ - template + template class Exception> std::ostream & write_formatted_expectation_failure_error_message( std::ostream & os, std::string_view filename, Iter first, Sentinel last, - parse_error const & e, + Exception const & e, int64_t preferred_max_line_length = 80, int64_t max_after_caret = 40); @@ -88,17 +104,32 @@ namespace boost { namespace parser { /** Writes a formatted parse-expectation failure (meaning prefixed with the file name, line, and column number) to `os`. This overload is Windows-only. */ - template + template class Exception> std::ostream & write_formatted_expectation_failure_error_message( std::ostream & os, std::wstring_view filename, Iter first, Sentinel last, - parse_error const & e, + Exception const & e, int64_t preferred_max_line_length = 80, int64_t max_after_caret = 40); #endif + /** TODO: Document that users may need to use this if they make their own + error handlers and do token parsing. */ + template + auto normalize_iterators(I first, I curr, S last); + + /** TODO: Document that users may need to use this if they make their own + error handlers and do token parsing. */ + template + auto normalize_iterators(I first, parse_error e, S last); + + /** TODO: Document that users may need to use this if they make their own + error handlers and do token parsing. */ + template + auto normalize_iterators(I first, lex_error e, S last); + /** The kinds of diagnostics that can be handled by an error handler. */ enum class diagnostic_kind { error, /// An error diagnostic. @@ -112,12 +143,16 @@ namespace boost { namespace parser { { constexpr default_error_handler() = default; - /** Handles a `parse_error` exception thrown during parsing. A - formatted parse-expectation failure is printed to `std::cerr`. - Always returns `error_handler_result::fail`. */ - template - error_handler_result operator()( - Iter first, Sentinel last, parse_error const & e) const; + /** Handles a `parse_error` or `lex_error` exception thrown during + parsing/lexing. A formatted parse-expectation failure is printed + to `std::cerr`. Always returns `error_handler_result::fail`. */ + template< + typename Iter, + typename Sentinel, + template + class Exception> + error_handler_result + operator()(Iter first, Sentinel last, Exception const & e) const; /** Prints `message` to `std::cerr`. The diagnostic is printed with the given `kind`, indicating the location as being at `it`. This @@ -194,9 +229,13 @@ namespace boost { namespace parser { formatted parse-expectation failure is printed to `*err_os_` when `err_os_` is non-null, or `std::cerr` otherwise. Always returns `error_handler_result::fail`. */ - template + template< + typename Iter, + typename Sentinel, + template + class Exception> error_handler_result - operator()(Iter first, Sentinel last, parse_error const & e) const; + operator()(Iter first, Sentinel last, Exception const & e) const; /** Let `std::ostream * s = kind == diagnostic_kind::error : err_os_ : warn_os_`; prints `message` to `*s` when `s` is non-null, or @@ -228,6 +267,16 @@ namespace boost { namespace parser { std::ostream * warn_os_; }; + /** An error handler that just re-throws any exception generated by the + parse. */ + struct rethrow_error_handler; + +#if defined(_MSC_VER) || defined(BOOST_PARSER_DOXYGEN) + /** An error handler that prints to the Visual Studio debugger via calls + to `OutputDebugString()`. */ + struct vs_output_error_handler; +#endif + }} #endif diff --git a/include/boost/parser/lexer.hpp b/include/boost/parser/lexer.hpp index 64f9b0da..d0b0fc0a 100644 --- a/include/boost/parser/lexer.hpp +++ b/include/boost/parser/lexer.hpp @@ -26,9 +26,11 @@ #include +#include #if defined(BOOST_PARSER_TESTING) #include #endif +#include #include #include #include @@ -549,19 +551,33 @@ namespace boost { namespace parser { using type = T; }; - template + template token make_token( int id, std::basic_string_view ctre_token, - BOOST_PARSER_TOKEN_POSITION_TYPE underlying_position) + BOOST_PARSER_TOKEN_POSITION_TYPE underlying_position, + TokenIter it) { auto f = ctre_token.data(); auto const l = f + ctre_token.size(); // radix==0 indicates a real number was parsed. - auto report_error = [](auto type, int radix, bool success) { - if (!success) - ; // TODO: report error. + auto report_error = [it](auto type, int radix, bool success) { + if (!success) { + using unwrapped_type = typename decltype(type)::type; + std::ostringstream oss; + auto const bytes = sizeof(unwrapped_type); + oss << (bytes * CHAR_BIT) << "-bit"; + if (!radix) { + oss << " floating-point number"; + } else { + if (radix != 10) + oss << ", base-" << radix; + oss << (std::is_signed_v ? " " : " un"); + oss << "signed integer"; + } + throw lex_error(it, oss.str()); + } }; switch (Spec.type) { @@ -581,7 +597,7 @@ namespace boost { namespace parser { } else if (std::ranges::equal(ctre_token, "false"sv)) { return {id, underlying_position, 0ll}; } else { - // TODO: report error. + throw lex_error(it, "'true' or 'false'"); } case token_parsed_type::signed_char: { @@ -845,6 +861,10 @@ namespace boost { namespace parser { static constexpr size_t initial_tokens_cache_size = 64; + iterator(Parent * parent, size_t token_offset) : + parent_(parent), token_offset_(token_offset) + {} + void fill_cache() { using string_view = typename Lexer::string_view; @@ -878,23 +898,24 @@ namespace boost { namespace parser { ++i; detail::hl::fold_n( - string_view{}, [&](auto state, auto i) { - if constexpr (!i.value) { + string_view{}, [&](auto state, auto ci) { + if constexpr (!ci.value) { return state; } - if (parse_results.template get()) { + if (parse_results.template get()) { string_view const sv = - parse_results.template get(); - int const id = parent_->lexer_.ids()[i.value]; + parse_results.template get(); + int const id = parent_->lexer_.ids()[ci.value]; constexpr detail::parse_spec parse_spec = - parent_->lexer_.specs()[i.value]; + parent_->lexer_.specs()[ci.value]; parent_->tokens_.push_back( detail::make_token( id, sv, (ctre_first.current - ctre_first.orig_begin) - - sv.size())); + sv.size(), + iterator(parent_, i))); return sv; } else { return state; diff --git a/include/boost/parser/parser.hpp b/include/boost/parser/parser.hpp index 08f38b54..14c7a758 100644 --- a/include/boost/parser/parser.hpp +++ b/include/boost/parser/parser.hpp @@ -8,7 +8,6 @@ #include #include -#include #include #include #include @@ -2330,29 +2329,6 @@ namespace boost { namespace parser { template constexpr bool is_token_iter_v = is_token_v>; - template - bool handle_parse_exception( - ErrorHandler const & error_handler, - Iter initial_first, - Sentinel last, - parse_error const & e) - { - if constexpr (is_token_iter_v) { - auto const underlying_first = e.iter.range_begin(); - auto const underlying_last = e.iter.range_end(); - parse_error underlying_error( - underlying_first + (*e.iter).underlying_position(), - e.message); - return error_handler( - underlying_first, - underlying_last, - underlying_error) == error_handler_result::rethrow; - } else { - return error_handler(initial_first, last, e) == - error_handler_result::rethrow; - } - } - template< bool Debug, typename Attr, @@ -2411,8 +2387,19 @@ namespace boost { namespace parser { return detail::make_parse_result(attr_, success); } } catch (parse_error const & e) { - if (detail::handle_parse_exception( - error_handler, initial_first, last, e)) { + if (error_handler(initial_first, last, e) == + error_handler_result::rethrow) { + throw; + } + if constexpr (std::is_reference_v) { + return false; + } else { + attr_t attr_{}; + return detail::make_parse_result(attr_, false); + } + } catch (lex_error const & e) { + if (error_handler(initial_first, last, e) == + error_handler_result::rethrow) { throw; } if constexpr (std::is_reference_v) { @@ -2470,8 +2457,14 @@ namespace boost { namespace parser { detail::final_trace(context, flags, nope{}); return success; } catch (parse_error const & e) { - if (detail::handle_parse_exception( - error_handler, initial_first, last, e)) { + if (error_handler(initial_first, last, e) == + error_handler_result::rethrow) { + throw; + } + return false; + } catch (lex_error const & e) { + if (error_handler(initial_first, last, e) == + error_handler_result::rethrow) { throw; } return false; @@ -8497,6 +8490,7 @@ namespace boost { namespace parser { #include #endif #include +#include namespace boost { namespace parser { diff --git a/test/lexer.cpp b/test/lexer.cpp index 077161cb..7da83e8c 100644 --- a/test/lexer.cpp +++ b/test/lexer.cpp @@ -496,6 +496,77 @@ int main() BOOST_TEST(position == (int)std::size(expected)); } + // lexing errors + { + // TODO: Document that a lexing error is a programming error, not an + // input error. + using namespace std::literals; + + auto const lexer = bp::lexer | + bp::token_spec<"foo", 0, float> | + bp::token_spec<"bar", 1, int> | + bp::token_spec<"baz", 2, unsigned short> | + bp::token_spec<"quux", 3, int, 8> | + bp::token_spec<"next", 4, unsigned long long, 16>; + + bool caught_exception = false; + + caught_exception = false; + try { + for (auto tok : "foo" | bp::to_tokens(lexer)) { + (void)tok; + } + } catch (std::exception const & e) { + BOOST_TEST(e.what() == "32-bit floating-point number"sv); + caught_exception = true; + } + BOOST_TEST(caught_exception); + + caught_exception = false; + try { + for (auto tok : "bar" | bp::to_tokens(lexer)) { + (void)tok; + } + } catch (std::exception const & e) { + BOOST_TEST(e.what() == "32-bit signed integer"sv); + caught_exception = true; + } + BOOST_TEST(caught_exception); + + caught_exception = false; + try { + for (auto tok : "baz" | bp::to_tokens(lexer)) { + (void)tok; + } + } catch (std::exception const & e) { + BOOST_TEST(e.what() == "16-bit unsigned integer"sv); + caught_exception = true; + } + BOOST_TEST(caught_exception); + + caught_exception = false; + try { + for (auto tok : "quux" | bp::to_tokens(lexer)) { + (void)tok; + } + } catch (std::exception const & e) { + BOOST_TEST(e.what() == "32-bit, base-8 signed integer"sv); + caught_exception = true; + } + BOOST_TEST(caught_exception); + + caught_exception = false; + try { + for (auto tok : "next" | bp::to_tokens(lexer)) { + (void)tok; + } + } catch (std::exception const & e) { + BOOST_TEST(e.what() == "64-bit, base-16 unsigned integer"sv); + caught_exception = true; + } + BOOST_TEST(caught_exception); + } + // TODO: Document the limitation of CTRE that the input must be a // continguous_range, so that string_views can be formed.