/** * Copyright (C) 2024 T. Zachary Laine * * Distributed under the Boost Software License, Version 1.0. (See * accompanying file LICENSE_1_0.txt or copy at * http://www.boost.org/LICENSE_1_0.txt) */ #define BOOST_PARSER_TESTING #include #include #include #include "ill_formed.hpp" #include #include #include namespace bp = boost::parser; enum class my_tokens { ws, foo, bar, baz }; int main() { // formation of token_specs { auto const token_spec = bp::token_spec<"foo", 12>; bp::token_spec_t<"foo", 12, bp::string_view_tag, 10> token_spec_explicit; static_assert(std::same_as< decltype(token_spec.parser_)::token_spec, decltype(token_spec_explicit)>); } { auto const token_spec = bp::token_spec<"foo", my_tokens::foo>; bp::token_spec_t<"foo", my_tokens::foo, bp::string_view_tag, 10> token_spec_explicit; static_assert(std::same_as< decltype(token_spec.parser_)::token_spec, decltype(token_spec_explicit)>); } { auto const token_spec = bp::token_spec<"bar", my_tokens::bar>; bp::token_spec_t<"bar", my_tokens::bar, bp::string_view_tag, 10> token_spec_explicit; static_assert(std::same_as< decltype(token_spec.parser_)::token_spec, decltype(token_spec_explicit)>); } { auto const token_spec = bp::token_spec<"foo", 12, int, 2>; bp::token_spec_t<"foo", 12, int, 2> token_spec_explicit; static_assert(std::same_as< decltype(token_spec.parser_)::token_spec, decltype(token_spec_explicit)>); } { auto const token_spec = bp::token_spec<"foo", 12>; bp::token_spec_t<"foo", 12, bp::string_view_tag, 10> token_spec_explicit; static_assert(std::same_as< decltype(token_spec.parser_)::token_spec, decltype(token_spec_explicit)>); } { auto const token_spec = bp::token_spec<"foo", 12, unsigned int, 8>; bp::token_spec_t<"foo", 12, unsigned int, 8> token_spec_explicit; static_assert(std::same_as< decltype(token_spec.parser_)::token_spec, decltype(token_spec_explicit)>); } { auto const token_spec = bp::token_spec<"foo", 12, short>; bp::token_spec_t<"foo", 12, short, 10> token_spec_explicit; static_assert(std::same_as< decltype(token_spec.parser_)::token_spec, decltype(token_spec_explicit)>); } { auto const token_spec = bp::token_spec<"foo", 12, float>; bp::token_spec_t<"foo", 12, float, 10> token_spec_explicit; static_assert(std::same_as< decltype(token_spec.parser_)::token_spec, decltype(token_spec_explicit)>); } { auto const token_spec = bp::token_spec<"foo", 12, double>; bp::token_spec_t<"foo", 12, double, 10> token_spec_explicit; static_assert(std::same_as< decltype(token_spec.parser_)::token_spec, decltype(token_spec_explicit)>); } // making lexers { auto const lexer = bp::lexer | bp::token_spec<"foo", my_tokens::foo> | bp::token_spec<"bar", my_tokens::bar> | bp::token_spec<"baz", my_tokens::baz>; // +1 because of the 0-group static_assert(decltype(lexer)::size() == 3 + 1); static_assert(std::same_as); } { auto const lexer = bp::lexer | bp::token_chars<'='>; static_assert(decltype(lexer)::size() == 1 + 1); static_assert(std::same_as); } { auto const lexer = bp::lexer | bp::token_chars<'='> | bp::token_spec<"foo", my_tokens::foo> | bp::token_spec<"bar", my_tokens::bar> | bp::token_spec<"baz", my_tokens::baz>; static_assert(decltype(lexer)::size() == 4 + 1); static_assert(std::same_as); } { auto const lexer = bp::lexer | bp::token_spec<"foo", my_tokens::foo> | bp::token_spec<"bar", my_tokens::bar> | bp::token_spec<"baz", my_tokens::baz> | bp::token_chars<'='>; static_assert(decltype(lexer)::size() == 4 + 1); static_assert(std::same_as); } { auto const lexer = bp::lexer | bp::token_chars< '=', '+', '-', '!', '?', ':', '.', ',', '(', ')', '[', ']', '{', '}', '@', ';'>; static_assert(decltype(lexer)::size() == 16 + 1); static_assert(std::same_as); } #if 0 // This is a test of whether the escapes work for every possible char // value accepted by detail::token_chars_spec. This takes a long time and // really only needs to happen once. { auto const lexer = bp::lexer | bp::token_chars< char(0), char(1), char(2), char(3), char(4), char(5), char(6), char(7), char(8), char(9), char(10), char(11), char(12), char(13), char(14), char(15), char(16), char(17), char(18), char(19), char(20), char(21), char(22), char(23), char(24), char(25), char(26), char(27), char(28), char(29), char(30), char(31), char(32), char(33), char(34), char(35), char(36), char(37), char(38), char(39), char(40), char(41), char(42), char(43), char(44), char(45), char(46), char(47), char(48), char(49), char(50), char(51), char(52), char(53), char(54), char(55), char(56), char(57), char(58), char(59), char(60), char(61), char(62), char(63), char(64), char(65), char(66), char(67), char(68), char(69), char(70), char(71), char(72), char(73), char(74), char(75), char(76), char(77), char(78), char(79), char(80), char(81), char(82), char(83), char(84), char(85), char(86), char(87), char(88), char(89), char(90), char(91), char(92), char(93), char(94), char(95), char(96), char(97), char(98), char(99), char(100), char(101), char(103), char(102), char(104), char(105), char(106), char(107), char(108), char(109), char(110), char(111), char(112), char(113), char(114), char(115), char(116), char(117), char(118), char(119), char(120), char(121), char(122), char(123), char(124), char(125), char(126), char(127)>; } #endif { // Mixed UTFs. auto const lexer = bp::lexer | bp::token_spec<"foo", my_tokens::foo> | bp::token_spec | bp::token_spec | bp::token_chars<'='>; // mutable vs. const token_views + mutable vs. const input views std::string input = "foo = bar"; auto mr_mi = input | bp::to_tokens(lexer); auto const cr_mi = input | bp::to_tokens(lexer); auto const const_input = input; auto mr_ci = input | bp::to_tokens(lexer); auto const cr_ci = input | bp::to_tokens(lexer); using tok_t = bp::token; tok_t const expected[] = { tok_t((int)my_tokens::foo, 0, "foo"), tok_t(bp::character_id, 0, (long long)'='), tok_t((int)my_tokens::bar, 0, "bar")}; int position = 0; position = 0; for (auto tok : mr_mi) { BOOST_TEST(tok == expected[position]); ++position; } BOOST_TEST(position == (int)std::size(expected)); position = 0; for (auto tok : cr_mi) { BOOST_TEST(tok == expected[position]); ++position; } BOOST_TEST(position == (int)std::size(expected)); position = 0; for (auto tok : mr_ci) { BOOST_TEST(tok == expected[position]); ++position; } BOOST_TEST(position == (int)std::size(expected)); position = 0; for (auto tok : cr_ci) { BOOST_TEST(tok == expected[position]); ++position; } BOOST_TEST(position == (int)std::size(expected)); } // Check basic plumbing of connecting UTF inputs to CTRE. { auto const lexer = bp::lexer | bp::token_spec<"foo", my_tokens::foo> | bp::token_spec<"bar", my_tokens::bar> | bp::token_spec<"baz", my_tokens::baz> | bp::token_chars<'='>; std::string s = "foo = bar"; using tok_t = bp::token; tok_t const expected[] = { tok_t((int)my_tokens::foo, 0, "foo"), tok_t(bp::character_id, 0, (long long)'='), tok_t((int)my_tokens::bar, 0, "bar")}; auto const lexer8 = bp::lexer | bp::token_spec<"foo", my_tokens::foo> | bp::token_spec<"bar", my_tokens::bar> | bp::token_spec<"baz", my_tokens::baz> | bp::token_chars<'='>; std::u8string u8s = u8"foo = bar"; using tok8_t = bp::token; tok8_t const expected8[] = { tok8_t((int)my_tokens::foo, 0, u8"foo"), tok8_t(bp::character_id, 0, (long long)'='), tok8_t((int)my_tokens::bar, 0, u8"bar")}; auto const lexer16 = bp::lexer | bp::token_spec<"foo", my_tokens::foo> | bp::token_spec<"bar", my_tokens::bar> | bp::token_spec<"baz", my_tokens::baz> | bp::token_chars<'='>; std::u16string u16s = u"foo = bar"; using tok16_t = bp::token; tok16_t const expected16[] = { tok16_t((int)my_tokens::foo, 0, u"foo"), tok16_t(bp::character_id, 0, (long long)'='), tok16_t((int)my_tokens::bar, 0, u"bar")}; auto const lexer32 = bp::lexer | bp::token_spec<"foo", my_tokens::foo> | bp::token_spec<"bar", my_tokens::bar> | bp::token_spec<"baz", my_tokens::baz> | bp::token_chars<'='>; std::u32string u32s = U"foo = bar"; using tok32_t = bp::token; tok32_t const expected32[] = { tok32_t((int)my_tokens::foo, 0, U"foo"), tok32_t(bp::character_id, 0, (long long)'='), tok32_t((int)my_tokens::bar, 0, U"bar")}; int position = 0; position = 0; for (auto tok : s | bp::to_tokens(lexer)) { BOOST_TEST(tok == expected[position]); static_assert( std:: same_as); ++position; } BOOST_TEST(position == (int)std::size(expected)); position = 0; for (auto tok : u8s | bp::to_tokens(lexer8)) { BOOST_TEST(tok == expected8[position]); static_assert(std::same_as< decltype(tok.get_string_view()), std::u8string_view>); ++position; } BOOST_TEST(position == (int)std::size(expected)); position = 0; for (auto tok : u16s | bp::to_tokens(lexer16)) { BOOST_TEST(tok == expected16[position]); static_assert(std::same_as< decltype(tok.get_string_view()), std::u16string_view>); ++position; } BOOST_TEST(position == (int)std::size(expected)); position = 0; for (auto tok : u32s | bp::to_tokens(lexer32)) { BOOST_TEST(tok == expected32[position]); static_assert(std::same_as< decltype(tok.get_string_view()), std::u32string_view>); ++position; } BOOST_TEST(position == (int)std::size(expected)); } // no-ws lexer { auto const lexer = bp::lexer | bp::token_spec<"foo", my_tokens::foo> | bp::token_spec<"bar", my_tokens::bar> | bp::token_spec<"baz", my_tokens::baz> | bp::token_chars<'='>; std::string s = "foo=bar"; using tok_t = bp::token; tok_t const expected[] = { tok_t((int)my_tokens::foo, 0, "foo"), tok_t(bp::character_id, 0, (long long)'='), tok_t((int)my_tokens::bar, 0, "bar")}; int position = 0; for (auto tok : s | bp::to_tokens(lexer)) { BOOST_TEST(tok == expected[position]); ++position; } BOOST_TEST(position == (int)std::size(expected)); } // ws-as-token lexers { auto const lexer = bp::lexer | bp::token_spec<"\\s+", my_tokens::ws> | bp::token_spec<"foo", my_tokens::foo> | bp::token_spec<"bar", my_tokens::bar> | bp::token_spec<"baz", my_tokens::baz> | bp::token_chars<'='>; std::string s = "foo = bar"; using tok_t = bp::token; tok_t const expected[] = { tok_t((int)my_tokens::foo, 0, "foo"), tok_t((int)my_tokens::ws, 0, " "), tok_t(bp::character_id, 0, (long long)'='), tok_t((int)my_tokens::ws, 0, " "), tok_t((int)my_tokens::bar, 0, "bar")}; int position = 0; for (auto tok : s | bp::to_tokens(lexer)) { BOOST_TEST(tok == expected[position]); ++position; } BOOST_TEST(position == (int)std::size(expected)); } // lexing errors { using namespace std::literals; auto const lexer = bp::lexer | bp::token_spec<"foo", 0, float> | bp::token_spec<"bar", 1, int> | bp::token_spec<"baz", 2, unsigned short> | bp::token_spec<"quux", 3, int, 8> | bp::token_spec<"next", 4, unsigned long long, 16>; bool caught_exception = false; caught_exception = false; try { for (auto tok : "foo" | bp::to_tokens(lexer)) { (void)tok; } } catch (std::exception const & e) { BOOST_TEST(e.what() == "32-bit floating-point number"sv); caught_exception = true; } BOOST_TEST(caught_exception); caught_exception = false; try { for (auto tok : "bar" | bp::to_tokens(lexer)) { (void)tok; } } catch (std::exception const & e) { BOOST_TEST(e.what() == "32-bit signed integer"sv); caught_exception = true; } BOOST_TEST(caught_exception); caught_exception = false; try { for (auto tok : "baz" | bp::to_tokens(lexer)) { (void)tok; } } catch (std::exception const & e) { BOOST_TEST(e.what() == "16-bit unsigned integer"sv); caught_exception = true; } BOOST_TEST(caught_exception); caught_exception = false; try { for (auto tok : "quux" | bp::to_tokens(lexer)) { (void)tok; } } catch (std::exception const & e) { BOOST_TEST(e.what() == "32-bit, base-8 signed integer"sv); caught_exception = true; } BOOST_TEST(caught_exception); caught_exception = false; try { for (auto tok : "next" | bp::to_tokens(lexer)) { (void)tok; } } catch (std::exception const & e) { BOOST_TEST(e.what() == "64-bit, base-16 unsigned integer"sv); caught_exception = true; } BOOST_TEST(caught_exception); } return boost::report_errors(); }