From f298bfe59b66ad7e904c5a7488a54ce69d7453ba Mon Sep 17 00:00:00 2001 From: Zach Laine Date: Wed, 13 Nov 2024 01:14:24 -0600 Subject: [PATCH] Remove the support for token_view::iterator -> underlying iterator translation in *parse_impl(); add support to the error handlers and their support functions directly instead. There are simply too many APIs there that need the translation to leave it to other code. Add lex_error exception type, and add support for all the APIs that used to take a parse_error param to now take either a parse_error or a lex_error. Throw lex_error from failed parsing of lexed tokens in detail::make_token(). See #202. --- example/user_error_handler.cpp | 4 +- include/boost/parser/error_handling.hpp | 101 +++++++++++++++----- include/boost/parser/error_handling_fwd.hpp | 73 +++++++++++--- include/boost/parser/lexer.hpp | 47 ++++++--- include/boost/parser/parser.hpp | 50 +++++----- test/lexer.cpp | 71 ++++++++++++++ 6 files changed, 266 insertions(+), 80 deletions(-) diff --git a/example/user_error_handler.cpp b/example/user_error_handler.cpp index d8647400..cc68cad0 100644 --- a/example/user_error_handler.cpp +++ b/example/user_error_handler.cpp @@ -31,9 +31,9 @@ struct logging_error_handler // and rethrow. Returning fail fails the top-level parse; returning // rethrow just re-throws the parse_error exception that got us here in // the first place. - template + template class Exception> bp::error_handler_result - operator()(Iter first, Sentinel last, bp::parse_error const & e) const + operator()(Iter first, Sentinel last, Exception const & e) const { bp::write_formatted_expectation_failure_error_message( ofs_, filename_, first, last, e); diff --git a/include/boost/parser/error_handling.hpp b/include/boost/parser/error_handling.hpp index 3215c4c9..a303a5bd 100644 --- a/include/boost/parser/error_handling.hpp +++ b/include/boost/parser/error_handling.hpp @@ -73,13 +73,16 @@ namespace boost { namespace parser { std::ostream & write_formatted_message( std::ostream & os, std::string_view filename, - Iter first, - Iter it, - Sentinel last, + Iter first_, + Iter it_, + Sentinel last_, std::string_view message, int64_t preferred_max_line_length, int64_t max_after_caret) { + auto [first, it, last] = + parser::normalize_iterators(first_, it_, last_); + if (!filename.empty()) os << filename << ':'; auto const position = parser::find_line_position(first, it); @@ -118,13 +121,15 @@ namespace boost { namespace parser { std::ostream & write_formatted_message( std::ostream & os, std::wstring_view filename, - Iter first, - Iter it, - Sentinel last, + Iter first_, + Iter it_, + Sentinel last_, std::string_view message, int64_t preferred_max_line_length, int64_t max_after_caret) { + auto [first, it, last] = + parser::normalize_iterators(first_, it_, last_); auto const r = filename | parser::detail::text::as_utf8; std::string s(r.begin(), r.end()); return parser::write_formatted_message( @@ -139,23 +144,26 @@ namespace boost { namespace parser { } #endif - template + template class Exception> std::ostream & write_formatted_expectation_failure_error_message( std::ostream & os, std::string_view filename, - Iter first, - Sentinel last, - parse_error const & e, + Iter first_, + Sentinel last_, + Exception const & e, int64_t preferred_max_line_length, int64_t max_after_caret) { std::string message = "error: Expected "; message += e.what(); + // TODO: Document that this gracelfully handles token iterators, and + // document the other parts of the API that do or do not. + auto [first, it, last] = parser::normalize_iterators(first_, e, last_); return parser::write_formatted_message( os, filename, first, - e.iter, + it, last, message, preferred_max_line_length, @@ -163,13 +171,13 @@ namespace boost { namespace parser { } #if defined(_MSC_VER) - template + template class Exception> std::ostream & write_formatted_expectation_failure_error_message( std::ostream & os, std::wstring_view filename, Iter first, Sentinel last, - parse_error const & e, + Exception const & e, int64_t preferred_max_line_length, int64_t max_after_caret) { @@ -180,6 +188,41 @@ namespace boost { namespace parser { } #endif + namespace detail { + template + auto normalize_iterators_impl(I first, I it, S last) + { + if constexpr (detail::is_token_iter_v) { + auto const underlying_first = it.range_begin(); + auto const underlying_it = + underlying_first + (*it).underlying_position(); + auto const underlying_last = it.range_end(); + return std::tuple( + underlying_first, underlying_it, underlying_last); + } else { + return std::tuple(first, it, last); + } + } + } + + template + auto normalize_iterators(I first, I it, S last) + { + return detail::normalize_iterators_impl(first, it, last); + } + + template + auto normalize_iterators(I first, parse_error e, S last) + { + return detail::normalize_iterators_impl(first, e.iter, last); + } + + template + auto normalize_iterators(I first, lex_error e, S last) + { + return detail::normalize_iterators_impl(first, e.iter, last); + } + /** An error handler that allows users to supply callbacks to handle the reporting of warnings and errors. The reporting of errors and/or warnings can be suppressed by supplying one or both @@ -211,9 +254,13 @@ namespace boost { namespace parser { filename_.assign(r.begin(), r.end()); } #endif - template + template< + typename Iter, + typename Sentinel, + template + class Exception> error_handler_result - operator()(Iter first, Sentinel last, parse_error const & e) const + operator()(Iter first, Sentinel last, Exception const & e) const { if (error_) { std::stringstream ss; @@ -224,6 +271,10 @@ namespace boost { namespace parser { return error_handler_result::fail; } + // TODO: Add term 'token parsing' to glossary at start of docs. + + // TODO: Add a test that exercises this function when doing token + // parsing. template void diagnose( diagnostic_kind kind, @@ -260,13 +311,15 @@ namespace boost { namespace parser { std::string filename_; }; - /** An error handler that just re-throws any exception generated by the - parse. */ struct rethrow_error_handler { - template + template< + typename Iter, + typename Sentinel, + template + class Exception> error_handler_result - operator()(Iter first, Sentinel last, parse_error const & e) const + operator()(Iter first, Sentinel last, Exception const & e) const { return error_handler_result::rethrow; } @@ -288,8 +341,6 @@ namespace boost { namespace parser { }; #if defined(_MSC_VER) || defined(BOOST_PARSER_DOXYGEN) - /** An error handler that prints to the Visual Studio debugger via calls - to `OutputDebugString()`. */ struct vs_output_error_handler : stream_error_handler { vs_output_error_handler() : @@ -309,9 +360,9 @@ namespace boost { namespace parser { // implementations - template + template class Exception> error_handler_result default_error_handler::operator()( - Iter first, Sentinel last, parse_error const & e) const + Iter first, Sentinel last, Exception const & e) const { parser::write_formatted_expectation_failure_error_message( std::cerr, "", first, last, e); @@ -343,9 +394,9 @@ namespace boost { namespace parser { diagnose(kind, message, context, parser::_where(context).begin()); } - template + template class Exception> error_handler_result stream_error_handler::operator()( - Iter first, Sentinel last, parse_error const & e) const + Iter first, Sentinel last, Exception const & e) const { std::ostream * os = err_os_; if (!os) diff --git a/include/boost/parser/error_handling_fwd.hpp b/include/boost/parser/error_handling_fwd.hpp index dfb6544f..2676c52e 100644 --- a/include/boost/parser/error_handling_fwd.hpp +++ b/include/boost/parser/error_handling_fwd.hpp @@ -34,6 +34,22 @@ namespace boost { namespace parser { Iter iter; }; + /** The exception thrown when a lexing error is encountered, consisting of + an iterator to the point of failure, and a description of the value + expected at the point of failure in `what()`. */ + template + struct lex_error : std::runtime_error + { + lex_error(Iter it, std::string msg) : + runtime_error(""), message(msg), iter(it) + {} + + char const * what() const noexcept override { return message.c_str(); } + + std::string message; + Iter iter; + }; + /** A position within a line, consisting of an iterator to the start of the line, the line number, and the column number. */ template @@ -74,13 +90,13 @@ namespace boost { namespace parser { /** Writes a formatted parse-expectation failure (meaning prefixed with the file name, line, and column number) to `os`. */ - template + template class Exception> std::ostream & write_formatted_expectation_failure_error_message( std::ostream & os, std::string_view filename, Iter first, Sentinel last, - parse_error const & e, + Exception const & e, int64_t preferred_max_line_length = 80, int64_t max_after_caret = 40); @@ -88,17 +104,32 @@ namespace boost { namespace parser { /** Writes a formatted parse-expectation failure (meaning prefixed with the file name, line, and column number) to `os`. This overload is Windows-only. */ - template + template class Exception> std::ostream & write_formatted_expectation_failure_error_message( std::ostream & os, std::wstring_view filename, Iter first, Sentinel last, - parse_error const & e, + Exception const & e, int64_t preferred_max_line_length = 80, int64_t max_after_caret = 40); #endif + /** TODO: Document that users may need to use this if they make their own + error handlers and do token parsing. */ + template + auto normalize_iterators(I first, I curr, S last); + + /** TODO: Document that users may need to use this if they make their own + error handlers and do token parsing. */ + template + auto normalize_iterators(I first, parse_error e, S last); + + /** TODO: Document that users may need to use this if they make their own + error handlers and do token parsing. */ + template + auto normalize_iterators(I first, lex_error e, S last); + /** The kinds of diagnostics that can be handled by an error handler. */ enum class diagnostic_kind { error, /// An error diagnostic. @@ -112,12 +143,16 @@ namespace boost { namespace parser { { constexpr default_error_handler() = default; - /** Handles a `parse_error` exception thrown during parsing. A - formatted parse-expectation failure is printed to `std::cerr`. - Always returns `error_handler_result::fail`. */ - template - error_handler_result operator()( - Iter first, Sentinel last, parse_error const & e) const; + /** Handles a `parse_error` or `lex_error` exception thrown during + parsing/lexing. A formatted parse-expectation failure is printed + to `std::cerr`. Always returns `error_handler_result::fail`. */ + template< + typename Iter, + typename Sentinel, + template + class Exception> + error_handler_result + operator()(Iter first, Sentinel last, Exception const & e) const; /** Prints `message` to `std::cerr`. The diagnostic is printed with the given `kind`, indicating the location as being at `it`. This @@ -194,9 +229,13 @@ namespace boost { namespace parser { formatted parse-expectation failure is printed to `*err_os_` when `err_os_` is non-null, or `std::cerr` otherwise. Always returns `error_handler_result::fail`. */ - template + template< + typename Iter, + typename Sentinel, + template + class Exception> error_handler_result - operator()(Iter first, Sentinel last, parse_error const & e) const; + operator()(Iter first, Sentinel last, Exception const & e) const; /** Let `std::ostream * s = kind == diagnostic_kind::error : err_os_ : warn_os_`; prints `message` to `*s` when `s` is non-null, or @@ -228,6 +267,16 @@ namespace boost { namespace parser { std::ostream * warn_os_; }; + /** An error handler that just re-throws any exception generated by the + parse. */ + struct rethrow_error_handler; + +#if defined(_MSC_VER) || defined(BOOST_PARSER_DOXYGEN) + /** An error handler that prints to the Visual Studio debugger via calls + to `OutputDebugString()`. */ + struct vs_output_error_handler; +#endif + }} #endif diff --git a/include/boost/parser/lexer.hpp b/include/boost/parser/lexer.hpp index 64f9b0da..d0b0fc0a 100644 --- a/include/boost/parser/lexer.hpp +++ b/include/boost/parser/lexer.hpp @@ -26,9 +26,11 @@ #include +#include #if defined(BOOST_PARSER_TESTING) #include #endif +#include #include #include #include @@ -549,19 +551,33 @@ namespace boost { namespace parser { using type = T; }; - template + template token make_token( int id, std::basic_string_view ctre_token, - BOOST_PARSER_TOKEN_POSITION_TYPE underlying_position) + BOOST_PARSER_TOKEN_POSITION_TYPE underlying_position, + TokenIter it) { auto f = ctre_token.data(); auto const l = f + ctre_token.size(); // radix==0 indicates a real number was parsed. - auto report_error = [](auto type, int radix, bool success) { - if (!success) - ; // TODO: report error. + auto report_error = [it](auto type, int radix, bool success) { + if (!success) { + using unwrapped_type = typename decltype(type)::type; + std::ostringstream oss; + auto const bytes = sizeof(unwrapped_type); + oss << (bytes * CHAR_BIT) << "-bit"; + if (!radix) { + oss << " floating-point number"; + } else { + if (radix != 10) + oss << ", base-" << radix; + oss << (std::is_signed_v ? " " : " un"); + oss << "signed integer"; + } + throw lex_error(it, oss.str()); + } }; switch (Spec.type) { @@ -581,7 +597,7 @@ namespace boost { namespace parser { } else if (std::ranges::equal(ctre_token, "false"sv)) { return {id, underlying_position, 0ll}; } else { - // TODO: report error. + throw lex_error(it, "'true' or 'false'"); } case token_parsed_type::signed_char: { @@ -845,6 +861,10 @@ namespace boost { namespace parser { static constexpr size_t initial_tokens_cache_size = 64; + iterator(Parent * parent, size_t token_offset) : + parent_(parent), token_offset_(token_offset) + {} + void fill_cache() { using string_view = typename Lexer::string_view; @@ -878,23 +898,24 @@ namespace boost { namespace parser { ++i; detail::hl::fold_n( - string_view{}, [&](auto state, auto i) { - if constexpr (!i.value) { + string_view{}, [&](auto state, auto ci) { + if constexpr (!ci.value) { return state; } - if (parse_results.template get()) { + if (parse_results.template get()) { string_view const sv = - parse_results.template get(); - int const id = parent_->lexer_.ids()[i.value]; + parse_results.template get(); + int const id = parent_->lexer_.ids()[ci.value]; constexpr detail::parse_spec parse_spec = - parent_->lexer_.specs()[i.value]; + parent_->lexer_.specs()[ci.value]; parent_->tokens_.push_back( detail::make_token( id, sv, (ctre_first.current - ctre_first.orig_begin) - - sv.size())); + sv.size(), + iterator(parent_, i))); return sv; } else { return state; diff --git a/include/boost/parser/parser.hpp b/include/boost/parser/parser.hpp index 08f38b54..14c7a758 100644 --- a/include/boost/parser/parser.hpp +++ b/include/boost/parser/parser.hpp @@ -8,7 +8,6 @@ #include #include -#include #include #include #include @@ -2330,29 +2329,6 @@ namespace boost { namespace parser { template constexpr bool is_token_iter_v = is_token_v>; - template - bool handle_parse_exception( - ErrorHandler const & error_handler, - Iter initial_first, - Sentinel last, - parse_error const & e) - { - if constexpr (is_token_iter_v) { - auto const underlying_first = e.iter.range_begin(); - auto const underlying_last = e.iter.range_end(); - parse_error underlying_error( - underlying_first + (*e.iter).underlying_position(), - e.message); - return error_handler( - underlying_first, - underlying_last, - underlying_error) == error_handler_result::rethrow; - } else { - return error_handler(initial_first, last, e) == - error_handler_result::rethrow; - } - } - template< bool Debug, typename Attr, @@ -2411,8 +2387,19 @@ namespace boost { namespace parser { return detail::make_parse_result(attr_, success); } } catch (parse_error const & e) { - if (detail::handle_parse_exception( - error_handler, initial_first, last, e)) { + if (error_handler(initial_first, last, e) == + error_handler_result::rethrow) { + throw; + } + if constexpr (std::is_reference_v) { + return false; + } else { + attr_t attr_{}; + return detail::make_parse_result(attr_, false); + } + } catch (lex_error const & e) { + if (error_handler(initial_first, last, e) == + error_handler_result::rethrow) { throw; } if constexpr (std::is_reference_v) { @@ -2470,8 +2457,14 @@ namespace boost { namespace parser { detail::final_trace(context, flags, nope{}); return success; } catch (parse_error const & e) { - if (detail::handle_parse_exception( - error_handler, initial_first, last, e)) { + if (error_handler(initial_first, last, e) == + error_handler_result::rethrow) { + throw; + } + return false; + } catch (lex_error const & e) { + if (error_handler(initial_first, last, e) == + error_handler_result::rethrow) { throw; } return false; @@ -8497,6 +8490,7 @@ namespace boost { namespace parser { #include #endif #include +#include namespace boost { namespace parser { diff --git a/test/lexer.cpp b/test/lexer.cpp index 077161cb..7da83e8c 100644 --- a/test/lexer.cpp +++ b/test/lexer.cpp @@ -496,6 +496,77 @@ int main() BOOST_TEST(position == (int)std::size(expected)); } + // lexing errors + { + // TODO: Document that a lexing error is a programming error, not an + // input error. + using namespace std::literals; + + auto const lexer = bp::lexer | + bp::token_spec<"foo", 0, float> | + bp::token_spec<"bar", 1, int> | + bp::token_spec<"baz", 2, unsigned short> | + bp::token_spec<"quux", 3, int, 8> | + bp::token_spec<"next", 4, unsigned long long, 16>; + + bool caught_exception = false; + + caught_exception = false; + try { + for (auto tok : "foo" | bp::to_tokens(lexer)) { + (void)tok; + } + } catch (std::exception const & e) { + BOOST_TEST(e.what() == "32-bit floating-point number"sv); + caught_exception = true; + } + BOOST_TEST(caught_exception); + + caught_exception = false; + try { + for (auto tok : "bar" | bp::to_tokens(lexer)) { + (void)tok; + } + } catch (std::exception const & e) { + BOOST_TEST(e.what() == "32-bit signed integer"sv); + caught_exception = true; + } + BOOST_TEST(caught_exception); + + caught_exception = false; + try { + for (auto tok : "baz" | bp::to_tokens(lexer)) { + (void)tok; + } + } catch (std::exception const & e) { + BOOST_TEST(e.what() == "16-bit unsigned integer"sv); + caught_exception = true; + } + BOOST_TEST(caught_exception); + + caught_exception = false; + try { + for (auto tok : "quux" | bp::to_tokens(lexer)) { + (void)tok; + } + } catch (std::exception const & e) { + BOOST_TEST(e.what() == "32-bit, base-8 signed integer"sv); + caught_exception = true; + } + BOOST_TEST(caught_exception); + + caught_exception = false; + try { + for (auto tok : "next" | bp::to_tokens(lexer)) { + (void)tok; + } + } catch (std::exception const & e) { + BOOST_TEST(e.what() == "64-bit, base-16 unsigned integer"sv); + caught_exception = true; + } + BOOST_TEST(caught_exception); + } + // TODO: Document the limitation of CTRE that the input must be a // continguous_range, so that string_views can be formed.