mirror of
https://github.com/boostorg/parser.git
synced 2026-01-19 04:22:13 +00:00
Address TODOs about resolving the value used in detail::token_with_value, and
doing transcoding for the match in detail::token_with_string_view. See #202.
This commit is contained in:
@@ -264,7 +264,8 @@ namespace boost { namespace parser {
|
||||
{
|
||||
os << "[value: ";
|
||||
if (token.has_string_view()) {
|
||||
os << '"' << token.get_string_view() << '"';
|
||||
os << '"' << (token.get_string_view() | detail::text::as_utf8)
|
||||
<< '"';
|
||||
} else if (token.has_long_long()) {
|
||||
if (token.id() == character_id) {
|
||||
os << "'" << (char)token.get_long_long() << "'";
|
||||
|
||||
@@ -7206,7 +7206,9 @@ namespace boost { namespace parser {
|
||||
return;
|
||||
}
|
||||
|
||||
auto const cps = make_subrange(expected_first_, expected_last_);
|
||||
using char_type = detail::char_type_from_iter<Iter, Sentinel>;
|
||||
auto const cps =
|
||||
make_subrange<char_type>(expected_first_, expected_last_);
|
||||
|
||||
if constexpr (detail::is_token_iter_v<Iter>) {
|
||||
if (!(*first).has_string_view()) {
|
||||
@@ -7215,7 +7217,7 @@ namespace boost { namespace parser {
|
||||
}
|
||||
|
||||
auto const sv = (*first).get_string_view();
|
||||
auto token_cps = make_subrange(sv.begin(), sv.end());
|
||||
auto token_cps = make_subrange<char_type>(sv.begin(), sv.end());
|
||||
auto const mismatch = detail::no_case_aware_string_mismatch(
|
||||
token_cps.begin(),
|
||||
token_cps.end(),
|
||||
@@ -7254,13 +7256,11 @@ namespace boost { namespace parser {
|
||||
}
|
||||
}
|
||||
|
||||
template<typename I, typename S>
|
||||
template<typename CharType, typename I, typename S>
|
||||
static auto make_subrange(I f, S l)
|
||||
{
|
||||
auto subrange = BOOST_PARSER_SUBRANGE(f, l);
|
||||
if constexpr (std::is_same_v<
|
||||
detail::remove_cv_ref_t<decltype(*f)>,
|
||||
char>) {
|
||||
if constexpr (std::is_same_v<CharType, char>) {
|
||||
return subrange;
|
||||
} else {
|
||||
return subrange | detail::text::as_utf32;
|
||||
|
||||
@@ -38,33 +38,65 @@ namespace boost { namespace parser {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: This needs to use resolve(value_), and we need a test for
|
||||
// that.
|
||||
template<typename T>
|
||||
template<typename Expected>
|
||||
struct token_with_value
|
||||
{
|
||||
static_assert(std::integral<T> || std::floating_point<T>);
|
||||
explicit token_with_value(T value) : value_(value) {}
|
||||
bool matches(T value) const { return value == value_; }
|
||||
T value_;
|
||||
explicit constexpr token_with_value(Expected value) :
|
||||
expected_(value)
|
||||
{}
|
||||
|
||||
template<typename T, typename Context>
|
||||
bool matches(T value, Context const & context) const
|
||||
{
|
||||
return value == detail::resolve(context, expected_);
|
||||
}
|
||||
|
||||
Expected expected_;
|
||||
};
|
||||
|
||||
template<typename Subrange>
|
||||
struct token_with_string_view
|
||||
{
|
||||
explicit token_with_string_view(Subrange value) : value_(value) {}
|
||||
explicit constexpr token_with_string_view(Subrange subrange) :
|
||||
subrange_(subrange)
|
||||
{}
|
||||
|
||||
template<typename CharType>
|
||||
bool matches(std::basic_string_view<CharType> value) const
|
||||
template<typename CharType, typename Context>
|
||||
bool matches(
|
||||
std::basic_string_view<CharType> value, Context const &) const
|
||||
{
|
||||
// TODO: this is wrong. We need to transcode both sides to
|
||||
// UTF-32, when !same_as<CharType, char>. (Need to write some
|
||||
// tests, and evaluate whether this is a good idea. If not,
|
||||
// go change the docs on token_parser).
|
||||
return std::ranges::equal(value, value_);
|
||||
auto const value_cps =
|
||||
make_subrange<CharType>(value.begin(), value.end());
|
||||
auto const subrange_cps =
|
||||
make_subrange<CharType>(subrange_.begin(), subrange_.end());
|
||||
return std::ranges::equal(
|
||||
value_cps, subrange_cps, [](auto a, auto b) {
|
||||
return cast_char(a) == cast_char(b);
|
||||
});
|
||||
}
|
||||
|
||||
Subrange value_;
|
||||
template<typename T>
|
||||
static auto cast_char(T c)
|
||||
{
|
||||
if constexpr (std::same_as<T, char>) {
|
||||
return (unsigned char)c;
|
||||
} else {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename CharType, typename I, typename S>
|
||||
static auto make_subrange(I f, S l)
|
||||
{
|
||||
auto subrange = BOOST_PARSER_SUBRANGE(f, l);
|
||||
if constexpr (std::is_same_v<CharType, char>) {
|
||||
return subrange;
|
||||
} else {
|
||||
return subrange | detail::text::as_utf32;
|
||||
}
|
||||
}
|
||||
|
||||
Subrange subrange_;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -142,7 +174,7 @@ namespace boost { namespace parser {
|
||||
if (use_expected || detail::gen_attrs(flags)) {
|
||||
auto opt_attr = detail::token_as<attribute_type<Iter>>(x);
|
||||
if constexpr (use_expected) {
|
||||
if (!opt_attr || !expected_.matches(*opt_attr)) {
|
||||
if (!opt_attr || !expected_.matches(*opt_attr, context)) {
|
||||
success = false;
|
||||
return;
|
||||
}
|
||||
@@ -157,7 +189,7 @@ namespace boost { namespace parser {
|
||||
/** Returns a `parser_interface` containing a `token_parser` that
|
||||
matches `value`. */
|
||||
template<typename T>
|
||||
requires std::is_integral_v<T> || std::is_floating_point_v<T>
|
||||
requires(!parsable_range_like<T>)
|
||||
constexpr auto operator()(T value) const noexcept
|
||||
{
|
||||
BOOST_PARSER_ASSERT(
|
||||
@@ -191,12 +223,26 @@ namespace boost { namespace parser {
|
||||
"token_spec, like 'token_spec(char-set)(char-set)'. Quit "
|
||||
"it!'"));
|
||||
auto expected =
|
||||
detail::token_with_string_view{BOOST_PARSER_SUBRANGE(
|
||||
std::ranges::begin(r), std::ranges::end(r))};
|
||||
detail::token_with_string_view{make_expected_range((R &&)r)};
|
||||
return parser_interface(
|
||||
token_parser<token_spec, decltype(expected)>(expected));
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
static constexpr auto make_expected_range(R && r)
|
||||
{
|
||||
using T = detail::remove_cv_ref_t<R>;
|
||||
if constexpr (std::is_bounded_array_v<T>) {
|
||||
constexpr auto n = std::extent_v<T>;
|
||||
auto const offset = n && !r[n - 1] ? 1 : 0;
|
||||
return BOOST_PARSER_SUBRANGE(
|
||||
std::ranges::begin(r), std::ranges::end(r) - offset);
|
||||
} else {
|
||||
return BOOST_PARSER_SUBRANGE(
|
||||
std::ranges::begin(r), std::ranges::end(r));
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Consider adding a special string_view-like type that can be
|
||||
// passed to the range overload above. It would be based on
|
||||
// adobe::name_t. When comparing it to a tokens' string_view, if it
|
||||
|
||||
@@ -13,10 +13,195 @@
|
||||
namespace bp = boost::parser;
|
||||
|
||||
constexpr auto true_false = bp::token_spec<"true|false", 0, bool>;
|
||||
constexpr auto identifier = bp::token_spec<"[a-zA-Z]\\w*", 1>;
|
||||
constexpr auto identifier = bp::token_spec<"\\p{L}+", 1>;
|
||||
|
||||
int main()
|
||||
{
|
||||
// token_parser
|
||||
{
|
||||
{
|
||||
constexpr auto lexer = bp::lexer<char, int> | true_false |
|
||||
identifier |
|
||||
bp::token_chars<'=', ';', '#', '$', '%'>;
|
||||
{
|
||||
constexpr auto parser = true_false;
|
||||
BOOST_TEST(bp::parse("true" | bp::to_tokens(lexer), parser));
|
||||
BOOST_TEST(bp::parse("false" | bp::to_tokens(lexer), parser));
|
||||
BOOST_TEST(!bp::parse("$" | bp::to_tokens(lexer), parser));
|
||||
}
|
||||
{
|
||||
constexpr auto parser = true_false(false);
|
||||
BOOST_TEST(!bp::parse("true" | bp::to_tokens(lexer), parser));
|
||||
BOOST_TEST(bp::parse("false" | bp::to_tokens(lexer), parser));
|
||||
BOOST_TEST(!bp::parse("$" | bp::to_tokens(lexer), parser));
|
||||
}
|
||||
{
|
||||
bool b = true;
|
||||
auto get_bool = [](auto & ctx) { return _globals(ctx); };
|
||||
auto parser = bp::with_globals(true_false(get_bool), b);
|
||||
BOOST_TEST(bp::parse("true" | bp::to_tokens(lexer), parser));
|
||||
BOOST_TEST(!bp::parse("false" | bp::to_tokens(lexer), parser));
|
||||
BOOST_TEST(!bp::parse("$" | bp::to_tokens(lexer), parser));
|
||||
}
|
||||
}
|
||||
{
|
||||
constexpr auto lexer = bp::lexer<char, int> | true_false |
|
||||
identifier |
|
||||
bp::token_chars<'=', ';', '#', '$', '%'>;
|
||||
{
|
||||
constexpr auto parser = identifier;
|
||||
BOOST_TEST(bp::parse("func" | bp::to_tokens(lexer), parser));
|
||||
BOOST_TEST(bp::parse("foo" | bp::to_tokens(lexer), parser));
|
||||
BOOST_TEST(!bp::parse("$" | bp::to_tokens(lexer), parser));
|
||||
}
|
||||
{
|
||||
constexpr auto parser = identifier("func");
|
||||
BOOST_TEST(bp::parse("func" | bp::to_tokens(lexer), parser));
|
||||
BOOST_TEST(!bp::parse("foo" | bp::to_tokens(lexer), parser));
|
||||
BOOST_TEST(!bp::parse("$" | bp::to_tokens(lexer), parser));
|
||||
}
|
||||
{
|
||||
constexpr auto parser = identifier("f\xC3\xBCnc");
|
||||
constexpr auto parser_u8 = identifier(u8"fünc");
|
||||
constexpr auto parser_u16 = identifier(u"fünc");
|
||||
constexpr auto parser_u32 = identifier(U"fünc");
|
||||
|
||||
constexpr auto lexer_u8 =
|
||||
bp::lexer<char8_t, int> | true_false | identifier |
|
||||
bp::token_chars<'=', ';', '#', '$', '%'>;
|
||||
constexpr auto lexer_u16 =
|
||||
bp::lexer<char16_t, int> | true_false | identifier |
|
||||
bp::token_chars<'=', ';', '#', '$', '%'>;
|
||||
constexpr auto lexer_u32 =
|
||||
bp::lexer<char32_t, int> | true_false | identifier |
|
||||
bp::token_chars<'=', ';', '#', '$', '%'>;
|
||||
|
||||
// There appears to be a bug in CTRE, related to the use of
|
||||
// char8_t. It produces bad tokens, including appearances of
|
||||
// the replacement character. The exact same input here
|
||||
// results in good tokens for all cases except char8_t input.
|
||||
#if 0
|
||||
std::cout << "char tokens:\n";
|
||||
for (auto tok : "fünc" | bp::to_tokens(lexer)) {
|
||||
std::cout << "tok=" << tok << "\n";
|
||||
}
|
||||
std::cout << "\n\n";
|
||||
|
||||
std::cout << "char tokens:\n";
|
||||
for (auto tok : "f\xC3\xBCnc" | bp::to_tokens(lexer)) {
|
||||
std::cout << "tok=" << tok << "\n";
|
||||
}
|
||||
std::cout << "\n\n";
|
||||
|
||||
// BAD!
|
||||
std::cout << "char8_t tokens:\n";
|
||||
for (auto tok : u8"fünc" | bp::to_tokens(lexer_u8)) {
|
||||
std::cout << "tok=" << tok << "\n";
|
||||
}
|
||||
std::cout << "\n\n";
|
||||
|
||||
std::cout << "char16_t tokens:\n";
|
||||
for (auto tok : u"fünc" | bp::to_tokens(lexer_u16)) {
|
||||
std::cout << "tok=" << tok << "\n";
|
||||
}
|
||||
std::cout << "\n\n";
|
||||
|
||||
std::cout << "char32_t tokens:\n";
|
||||
for (auto tok : U"fünc" | bp::to_tokens(lexer_u32)) {
|
||||
std::cout << "tok=" << tok << "\n";
|
||||
}
|
||||
std::cout << "\n\n";
|
||||
#endif
|
||||
|
||||
// Range to match is sequence of char; no transcoding will be
|
||||
// done.
|
||||
BOOST_TEST(
|
||||
bp::parse("f\xC3\xBCnc" | bp::to_tokens(lexer), parser));
|
||||
#if 0
|
||||
BOOST_TEST(
|
||||
bp::parse(u8"fünc" | bp::to_tokens(lexer_u8), parser));
|
||||
#endif
|
||||
BOOST_TEST(
|
||||
bp::parse(u"fünc" | bp::to_tokens(lexer_u16), parser));
|
||||
BOOST_TEST(
|
||||
bp::parse(U"fünc" | bp::to_tokens(lexer_u32), parser));
|
||||
|
||||
BOOST_TEST( // char input; no transcoding on this one.
|
||||
bp::parse("f\xC3\xBCnc" | bp::to_tokens(lexer), parser_u8));
|
||||
#if 0
|
||||
BOOST_TEST(
|
||||
bp::parse(u8"fünc" | bp::to_tokens(lexer_u8), parser_u8));
|
||||
#endif
|
||||
BOOST_TEST(
|
||||
bp::parse(u"fünc" | bp::to_tokens(lexer_u16), parser_u8));
|
||||
BOOST_TEST(
|
||||
bp::parse(U"fünc" | bp::to_tokens(lexer_u32), parser_u8));
|
||||
|
||||
BOOST_TEST( // char input; no transcoding on this one.
|
||||
!bp::parse(
|
||||
"f\xC3\xBCnc" | bp::to_tokens(lexer), parser_u16));
|
||||
#if 0
|
||||
BOOST_TEST(
|
||||
bp::parse(u8"fünc" | bp::to_tokens(lexer_u8), parser_u16));
|
||||
#endif
|
||||
BOOST_TEST(
|
||||
bp::parse(u"fünc" | bp::to_tokens(lexer_u16), parser_u16));
|
||||
BOOST_TEST(
|
||||
bp::parse(U"fünc" | bp::to_tokens(lexer_u32), parser_u16));
|
||||
|
||||
BOOST_TEST( // char input; no transcoding on this one.
|
||||
!bp::parse(
|
||||
"f\xC3\xBCnc" | bp::to_tokens(lexer), parser_u32));
|
||||
#if 0
|
||||
BOOST_TEST(
|
||||
bp::parse(u8"fünc" | bp::to_tokens(lexer_u8), parser_u32));
|
||||
#endif
|
||||
BOOST_TEST(
|
||||
bp::parse(u"fünc" | bp::to_tokens(lexer_u16), parser_u32));
|
||||
BOOST_TEST(
|
||||
bp::parse(U"fünc" | bp::to_tokens(lexer_u32), parser_u32));
|
||||
|
||||
BOOST_TEST(!bp::parse("func" | bp::to_tokens(lexer), parser));
|
||||
BOOST_TEST(
|
||||
!bp::parse(u8"func" | bp::to_tokens(lexer_u8), parser));
|
||||
BOOST_TEST(
|
||||
!bp::parse(u"func" | bp::to_tokens(lexer_u16), parser));
|
||||
BOOST_TEST(
|
||||
!bp::parse(U"func" | bp::to_tokens(lexer_u32), parser));
|
||||
|
||||
BOOST_TEST(
|
||||
!bp::parse("func" | bp::to_tokens(lexer), parser_u8));
|
||||
BOOST_TEST(
|
||||
!bp::parse(u8"func" | bp::to_tokens(lexer_u8), parser_u8));
|
||||
BOOST_TEST(
|
||||
!bp::parse(u"func" | bp::to_tokens(lexer_u16), parser_u8));
|
||||
BOOST_TEST(
|
||||
!bp::parse(U"func" | bp::to_tokens(lexer_u32), parser_u8));
|
||||
|
||||
BOOST_TEST(
|
||||
!bp::parse("func" | bp::to_tokens(lexer), parser_u16));
|
||||
BOOST_TEST(
|
||||
!bp::parse(u8"func" | bp::to_tokens(lexer_u8), parser_u16));
|
||||
BOOST_TEST(
|
||||
!bp::parse(u"func" | bp::to_tokens(lexer_u16), parser_u16));
|
||||
BOOST_TEST(
|
||||
!bp::parse(U"func" | bp::to_tokens(lexer_u32), parser_u16));
|
||||
|
||||
BOOST_TEST(
|
||||
!bp::parse("func" | bp::to_tokens(lexer), parser_u32));
|
||||
BOOST_TEST(
|
||||
!bp::parse(u8"func" | bp::to_tokens(lexer_u8), parser_u32));
|
||||
BOOST_TEST(
|
||||
!bp::parse(u"func" | bp::to_tokens(lexer_u16), parser_u32));
|
||||
BOOST_TEST(
|
||||
!bp::parse(U"func" | bp::to_tokens(lexer_u32), parser_u32));
|
||||
|
||||
BOOST_TEST(!bp::parse("foo" | bp::to_tokens(lexer), parser));
|
||||
BOOST_TEST(!bp::parse("$" | bp::to_tokens(lexer), parser));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// basic
|
||||
{
|
||||
constexpr auto lexer = bp::lexer<char, int> | true_false | identifier |
|
||||
|
||||
Reference in New Issue
Block a user