mirror of
https://github.com/boostorg/parser.git
synced 2026-02-03 09:22:14 +00:00
Add unicode symbols parser (#213)
* Add symb parser to handle unicode symbols * Add documentation for symb * Add tests for symb * Fix typo in the documentation --------- Contributed by: Antoine Fontaine <antoinefontaine@posteo.net>
This commit is contained in:
committed by
GitHub
parent
0a34acc42a
commit
b253d9ca53
@@ -218,6 +218,7 @@
|
||||
[def _control_ [globalref boost::parser::control `control`]]
|
||||
[def _digit_ [globalref boost::parser::digit `digit`]]
|
||||
[def _punct_ [globalref boost::parser::punct `punct`]]
|
||||
[def _symb_ [globalref boost::parser::symb `symb`]]
|
||||
[def _hex_digit_ [globalref boost::parser::hex_digit `hex_digit`]]
|
||||
[def _lower_ [globalref boost::parser::lower `lower`]]
|
||||
[def _upper_ [globalref boost::parser::upper `upper`]]
|
||||
|
||||
@@ -132,6 +132,11 @@ the input they match unless otherwise stated in the table below.]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
|
||||
[]]
|
||||
|
||||
[[ `_symb_` ]
|
||||
[ Matches a single symbol code point. ]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
|
||||
[]]
|
||||
|
||||
[[ `_hex_digit_` ]
|
||||
[ Matches a single hexidecimal digit code point. ]
|
||||
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
|
||||
|
||||
@@ -245,6 +245,13 @@ namespace boost { namespace parser { namespace detail {
|
||||
std::ostream & os,
|
||||
int components = 0);
|
||||
|
||||
template<typename Context>
|
||||
void print_parser(
|
||||
Context const & context,
|
||||
char_set_parser<symb_chars> const & parser,
|
||||
std::ostream & os,
|
||||
int components = 0);
|
||||
|
||||
template<typename Context>
|
||||
void print_parser(
|
||||
Context const & context,
|
||||
|
||||
@@ -636,6 +636,16 @@ namespace boost { namespace parser { namespace detail {
|
||||
os << "punct";
|
||||
}
|
||||
|
||||
template<typename Context>
|
||||
void print_parser(
|
||||
Context const & context,
|
||||
char_set_parser<symb_chars> const & parser,
|
||||
std::ostream & os,
|
||||
int components)
|
||||
{
|
||||
os << "symb";
|
||||
}
|
||||
|
||||
template<typename Context>
|
||||
void print_parser(
|
||||
Context const & context,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7811,12 +7811,18 @@ namespace boost { namespace parser {
|
||||
control;
|
||||
|
||||
/** The punctuation character parser. Matches the full set of Unicode
|
||||
punctuation clases (specifically, "Pc", "Pd", "Pe", "Pf", "Pi", "Ps",
|
||||
punctuation classes (specifically, "Pc", "Pd", "Pe", "Pf", "Pi", "Ps",
|
||||
and "Po"). */
|
||||
inline BOOST_PARSER_ALGO_CONSTEXPR
|
||||
parser_interface<char_set_parser<detail::punct_chars>>
|
||||
punct;
|
||||
|
||||
/** The symbol character parser. Matches the full set of Unicode
|
||||
symbol classes (specifically, "Sc", "Sk", "Sm", and "So"). */
|
||||
inline BOOST_PARSER_ALGO_CONSTEXPR
|
||||
parser_interface<char_set_parser<detail::symb_chars>>
|
||||
symb;
|
||||
|
||||
/** The lower case character parser. Matches the full set of Unicode
|
||||
lower case code points (class "Ll"). */
|
||||
inline BOOST_PARSER_ALGO_CONSTEXPR
|
||||
|
||||
@@ -143,6 +143,8 @@ namespace boost { namespace parser {
|
||||
|
||||
struct punct_chars
|
||||
{};
|
||||
struct symb_chars
|
||||
{};
|
||||
struct lower_case_chars
|
||||
{};
|
||||
struct upper_case_chars
|
||||
|
||||
@@ -245,6 +245,10 @@ void github_issue_209()
|
||||
std::begin(bp::detail::char_set<detail::punct_chars>::chars),
|
||||
std::end(bp::detail::char_set<detail::punct_chars>::chars)));
|
||||
|
||||
BOOST_TEST(std::is_sorted(
|
||||
std::begin(bp::detail::char_set<detail::symb_chars>::chars),
|
||||
std::end(bp::detail::char_set<detail::symb_chars>::chars)));
|
||||
|
||||
BOOST_TEST(std::is_sorted(
|
||||
std::begin(bp::detail::char_set<detail::lower_case_chars>::chars),
|
||||
std::end(bp::detail::char_set<detail::lower_case_chars>::chars)));
|
||||
|
||||
@@ -2753,6 +2753,16 @@ int main()
|
||||
BOOST_TEST(result == std::vector<uint32_t>({0x21, 0xfda}));
|
||||
}
|
||||
|
||||
// symb_
|
||||
{
|
||||
auto parser = +symb;
|
||||
|
||||
std::u32string str = U"$^\u20AC!\u2194\u220F\U0001D7C6b\u2280\U0001FACE\U0001039F";
|
||||
std::vector<uint32_t> result;
|
||||
BOOST_TEST(parse(str, parser, char_ - symb, result));
|
||||
BOOST_TEST(result == std::vector<uint32_t>({U'$', U'^', 0x20AC, 0x2194, 0x220F, 0x2280, 0x1FACE}));
|
||||
}
|
||||
|
||||
// lower_
|
||||
{
|
||||
auto parser = +lower;
|
||||
|
||||
Reference in New Issue
Block a user