2
0
mirror of https://github.com/boostorg/parser.git synced 2026-02-03 09:22:14 +00:00

Add unicode symbols parser (#213)

* Add symb parser to handle unicode symbols

* Add documentation for symb

* Add tests for symb

* Fix typo in the documentation

---------

Contributed by: Antoine Fontaine <antoinefontaine@posteo.net>
This commit is contained in:
necessarily-equal
2025-02-21 06:51:17 +01:00
committed by GitHub
parent 0a34acc42a
commit b253d9ca53
9 changed files with 1203 additions and 1 deletions

View File

@@ -218,6 +218,7 @@
[def _control_ [globalref boost::parser::control `control`]]
[def _digit_ [globalref boost::parser::digit `digit`]]
[def _punct_ [globalref boost::parser::punct `punct`]]
[def _symb_ [globalref boost::parser::symb `symb`]]
[def _hex_digit_ [globalref boost::parser::hex_digit `hex_digit`]]
[def _lower_ [globalref boost::parser::lower `lower`]]
[def _upper_ [globalref boost::parser::upper `upper`]]

View File

@@ -132,6 +132,11 @@ the input they match unless otherwise stated in the table below.]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
[]]
[[ `_symb_` ]
[ Matches a single symbol code point. ]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
[]]
[[ `_hex_digit_` ]
[ Matches a single hexidecimal digit code point. ]
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]

View File

@@ -245,6 +245,13 @@ namespace boost { namespace parser { namespace detail {
std::ostream & os,
int components = 0);
template<typename Context>
void print_parser(
Context const & context,
char_set_parser<symb_chars> const & parser,
std::ostream & os,
int components = 0);
template<typename Context>
void print_parser(
Context const & context,

View File

@@ -636,6 +636,16 @@ namespace boost { namespace parser { namespace detail {
os << "punct";
}
template<typename Context>
void print_parser(
Context const & context,
char_set_parser<symb_chars> const & parser,
std::ostream & os,
int components)
{
os << "symb";
}
template<typename Context>
void print_parser(
Context const & context,

File diff suppressed because it is too large Load Diff

View File

@@ -7811,12 +7811,18 @@ namespace boost { namespace parser {
control;
/** The punctuation character parser. Matches the full set of Unicode
punctuation clases (specifically, "Pc", "Pd", "Pe", "Pf", "Pi", "Ps",
punctuation classes (specifically, "Pc", "Pd", "Pe", "Pf", "Pi", "Ps",
and "Po"). */
inline BOOST_PARSER_ALGO_CONSTEXPR
parser_interface<char_set_parser<detail::punct_chars>>
punct;
/** The symbol character parser. Matches the full set of Unicode
symbol classes (specifically, "Sc", "Sk", "Sm", and "So"). */
inline BOOST_PARSER_ALGO_CONSTEXPR
parser_interface<char_set_parser<detail::symb_chars>>
symb;
/** The lower case character parser. Matches the full set of Unicode
lower case code points (class "Ll"). */
inline BOOST_PARSER_ALGO_CONSTEXPR

View File

@@ -143,6 +143,8 @@ namespace boost { namespace parser {
struct punct_chars
{};
struct symb_chars
{};
struct lower_case_chars
{};
struct upper_case_chars

View File

@@ -245,6 +245,10 @@ void github_issue_209()
std::begin(bp::detail::char_set<detail::punct_chars>::chars),
std::end(bp::detail::char_set<detail::punct_chars>::chars)));
BOOST_TEST(std::is_sorted(
std::begin(bp::detail::char_set<detail::symb_chars>::chars),
std::end(bp::detail::char_set<detail::symb_chars>::chars)));
BOOST_TEST(std::is_sorted(
std::begin(bp::detail::char_set<detail::lower_case_chars>::chars),
std::end(bp::detail::char_set<detail::lower_case_chars>::chars)));

View File

@@ -2753,6 +2753,16 @@ int main()
BOOST_TEST(result == std::vector<uint32_t>({0x21, 0xfda}));
}
// symb_
{
auto parser = +symb;
std::u32string str = U"$^\u20AC!\u2194\u220F\U0001D7C6b\u2280\U0001FACE\U0001039F";
std::vector<uint32_t> result;
BOOST_TEST(parse(str, parser, char_ - symb, result));
BOOST_TEST(result == std::vector<uint32_t>({U'$', U'^', 0x20AC, 0x2194, 0x220F, 0x2280, 0x1FACE}));
}
// lower_
{
auto parser = +lower;