// // Copyright (c) 2022 alandefreitas (alandefreitas@gmail.com) // // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt // #ifndef BOOST_URL_EXAMPLE_MAILTO_MAILTO_GRAMMAR_HPP #define BOOST_URL_EXAMPLE_MAILTO_MAILTO_GRAMMAR_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace urls = boost::urls; namespace grammar = boost::urls::grammar; /// The set of dtext_no_obs characters struct dtext_no_obs_chars_t { constexpr bool operator()(char c) const noexcept { // dtext-no-obs = %d33-90 / %d94-126 return (c >= '!' && c <= 'Z') || (c >= '^' && c <= '~'); } }; /// A character set containing dtext_no_obs characters. constexpr dtext_no_obs_chars_t dtext_no_obs_chars{}; /// A character set containing crlf characters. constexpr auto crlf_chars = grammar::lut_chars("\n\r"); /// A character set containing atext characters. constexpr auto atext_chars = grammar::lut_chars("!#$%&'*+-/=?^_`{|}~") + grammar::alnum_chars + grammar::digit_chars; /// A character set containing wsp characters. constexpr auto wsp_chars = grammar::lut_chars(" \t"); /// The set of obs_ctext characters struct obs_ctext_chars_t { static constexpr unsigned char c1 = char(1); static constexpr unsigned char c2 = char(8); static constexpr unsigned char c3 = char(11); static constexpr unsigned char c4 = char(12); static constexpr unsigned char c5 = char(14); static constexpr unsigned char c6 = char(31); static constexpr unsigned char c7 = char(127); constexpr obs_ctext_chars_t() noexcept = default; constexpr bool operator()(char c) const noexcept { // obs-ctext = obs-NO-WS-CTL // obs-NO-WS-CTL = %d1-8 / %d11 / %d12 / %d14-31 / %d127 return (static_cast(c) >= c1 && static_cast(c) <= c2) || (static_cast(c) >= c3 && static_cast(c) <= c4) || static_cast(c) == c5 || static_cast(c) == c6 || static_cast(c) == c7; } }; /// A character set containing obs_ctext characters. constexpr obs_ctext_chars_t obs_ctext_chars{}; /// A character set containing obs_qtext characters. constexpr auto obs_qtext_chars = obs_ctext_chars; /// The set of ctext characters struct ctext_chars_t { constexpr ctext_chars_t() noexcept = default; constexpr bool operator()(char c) const noexcept { // ctext = %d33-39 / %d42-91 / %d93-126 / obs-ctext return (c >= '!' && c <= '\'') || (c >= '*' && c <= '[') || (c >= ']' && c <= '~') || obs_ctext_chars(c); } }; /// A character set containing ctext characters. constexpr ctext_chars_t ctext_chars{}; /// The set of qtext characters struct qtext_chars_t { constexpr qtext_chars_t() noexcept = default; constexpr bool operator()(char c) const noexcept { // qtext = %d33 / %d35-91 / %d93-126 / obs-qtext return c == '!' || (c >= '#' && c <= '[') || (c >= ']' && c <= '~') || obs_qtext_chars(c); } }; /// A character set containing qtext characters. constexpr qtext_chars_t qtext_chars{}; /// A character set containing qchars constexpr auto qchars = urls::unreserved_chars + "!$\\()*+,;:@"; constexpr auto atext_token = grammar::token_rule(atext_chars); /// Rule for dot-atom-text = 1*atext *("." 1*atext) constexpr auto dot_atom_text_rule = grammar::range_rule( atext_token, grammar::tuple_rule( grammar::squelch( grammar::delim_rule('.')), atext_token)); /// Rule for "[" *dtext-no-obs "]" constexpr auto quoted_dtext_no_obs = grammar::tuple_rule( grammar::squelch( grammar::delim_rule('[')), grammar::optional_rule( grammar::token_rule(dtext_no_obs_chars)), grammar::squelch( grammar::delim_rule(']'))); /// Rule for domain = dot-atom-text / "[" *dtext-no-obs "]" constexpr auto domain_rule = grammar::variant_rule( dot_atom_text_rule, quoted_dtext_no_obs); /// Rule for obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR) constexpr auto obs_qp_rule = grammar::tuple_rule( grammar::delim_rule('\\'), grammar::variant_rule( grammar::delim_rule('\0'), grammar::delim_rule(ctext_chars), grammar::delim_rule(crlf_chars))); /// Rule for quoted-pair = ("\" (VCHAR / WSP)) / obs-qp constexpr auto quoted_pair_rule = grammar::variant_rule( grammar::tuple_rule( grammar::delim_rule('\\'), grammar::variant_rule( grammar::delim_rule(grammar::vchars), grammar::delim_rule(' '))), obs_qp_rule); /// Rule for obs-FWS = 1*WSP *(CRLF 1*WSP) constexpr auto obs_fws_rule = grammar::tuple_rule( grammar::token_rule(wsp_chars), grammar::range_rule( grammar::delim_rule(crlf_chars), grammar::token_rule(wsp_chars))); /// Rule for FWS = ([*WSP CRLF] 1*WSP) / obs-FWS constexpr auto fws_rule = grammar::variant_rule( grammar::tuple_rule( grammar::optional_rule( grammar::tuple_rule( grammar::optional_rule( grammar::token_rule(wsp_chars)), grammar::delim_rule(crlf_chars))), grammar::token_rule(wsp_chars)), obs_fws_rule); namespace detail { // workaround for value-based recursive rules struct ccontent_and_comment_rules { struct ccontent_rule_t { using value_type = boost::core::string_view; boost::system::result< value_type > parse( char const*& it, char const* end ) const noexcept { auto it0 = it; bool v = ccontent_and_comment_rules:: parse_ccontent(it, end); if (v) return boost::core::string_view(it0, it); return grammar::error::invalid; } }; static bool parse_ccontent(char const*& it, char const* end) noexcept { // ccontent = ctext / quoted-pair / comment return grammar::parse( it, end, grammar::variant_rule( grammar::delim_rule(ctext_chars), quoted_pair_rule, ccontent_rule_t{})).has_value(); }; struct comment_rule_t { using value_type = boost::core::string_view; boost::system::result< value_type > parse( char const*& it, char const* end ) const noexcept { auto it0 = it; bool v = ccontent_and_comment_rules:: parse_comment(it, end); if (v) return boost::core::string_view(it0, it); return grammar::error::invalid; } }; static bool parse_comment(char const*& it, char const* end) noexcept { // comment = "(" *([FWS] ccontent) [FWS] ")" return grammar::parse( it, end, grammar::tuple_rule( grammar::delim_rule('('), grammar::range_rule( grammar::tuple_rule( grammar::optional_rule(fws_rule), ccontent_rule_t{})), grammar::optional_rule(fws_rule), grammar::delim_rule(')'))).has_value(); } }; } /// Rule for ccontent = ctext / quoted-pair / comment constexpr auto ccontent_rule = detail::ccontent_and_comment_rules::ccontent_rule_t{}; /// Rule for comment = "(" *([FWS] ccontent) [FWS] ")" constexpr auto comment_rule = detail::ccontent_and_comment_rules::comment_rule_t{}; /// Rule for CFWS = (1*([FWS] comment) [FWS]) / FWS constexpr auto cfws_rule = grammar::variant_rule( grammar::tuple_rule( grammar::range_rule( grammar::tuple_rule( grammar::optional_rule(fws_rule), comment_rule), 1), grammar::optional_rule(fws_rule)), fws_rule); /// Rule for qcontent = qtext / quoted-pair constexpr auto qcontent_rule = grammar::variant_rule( grammar::delim_rule(qtext_chars), quoted_pair_rule); /// Rule for quoted-string = [CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS] constexpr auto quoted_string_rule = grammar::tuple_rule( grammar::optional_rule(cfws_rule), grammar::delim_rule('"'), grammar::range_rule( grammar::tuple_rule( grammar::optional_rule(fws_rule), qcontent_rule)), grammar::optional_rule(fws_rule), grammar::delim_rule('"'), grammar::optional_rule(cfws_rule)); /// Rule for local-part = dot-atom-text / quoted-string constexpr auto local_part_rule = grammar::variant_rule( dot_atom_text_rule, quoted_string_rule); /// Rule for addr-spec = local-part "@" domain constexpr auto addr_spec_rule = grammar::tuple_rule( local_part_rule, grammar::squelch( grammar::delim_rule('@')), domain_rule); /// Rule for to = addr-spec *("," addr-spec ) constexpr auto to_rule = grammar::range_rule( addr_spec_rule, grammar::tuple_rule( grammar::squelch( grammar::delim_rule(',')), addr_spec_rule)); /// Rule for hfvalue = *qchar constexpr auto hfvalue_rule = urls::pct_encoded_rule(qchars); /// Rule for hfname = *qchar struct hfname_rule_t { using value_type = boost::core::string_view; boost::system::result parse( char const*& it, char const* end ) const noexcept { boost::core::string_view s(it, end); it += s.size(); auto r = grammar::parse(s, hfvalue_rule); if (!r) return r.error(); // The user agent interpreting a 'mailto' URI SHOULD NOT create a // message if any of the header fields are considered dangerous static const boost::core::string_view valid_k[] = { "to", "subject", "keywords", "cc", "body", "in-reply-to" }; if (std::any_of( std::begin(valid_k), std::end(valid_k), [s](boost::core::string_view valid_k) { return grammar::ci_is_equal(s, valid_k); })) return s; return grammar::error::invalid; } }; constexpr auto hfname_rule = hfname_rule_t{}; #endif