2
0
mirror of https://github.com/boostorg/url.git synced 2026-01-21 17:32:21 +00:00
Files
url/test/unit/grammar.cpp
Vinnie Falco 9d2c1e5409 Refactor grammar:
The Rule concept is changed:

* rules are stateful values
* nested value_type holds the result of parsing
* member function `Rule::parse` is the algorithm
* parse returns `result<value_type>`

And:

* All rfc3986 rules are reimplemented
* New grammar non-terminal elements introduced:
  - char_rule
  - not_empty_rule
  - optional_rule
  - sequence_rule
  - variant_rule
2022-07-27 19:31:16 -07:00

766 lines
14 KiB
C++

//
// Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
// Official repository: https://github.com/CPPAlliance/url
//
#if 1
// Test that header file is self-contained.
#include <boost/url/grammar.hpp>
#else
#include <boost/url/string_view.hpp>
#include <boost/url/error_code.hpp>
#include <new>
#include <tuple>
#include <utility>
#include "test_suite.hpp"
#include <boost/url/grammar/charset.hpp>
#include <boost/url/grammar/error.hpp>
#include <boost/url/grammar/parse.hpp>
#include <type_traits>
namespace boost {
namespace urls {
namespace grammar {
#if 0
namespace grammar_ {
namespace grammar = urls::grammar;
using grammar::error;
using grammar::is_rule;
using grammar::parse_tag;
#endif
//------------------------------------------------
//
// parse
//
//------------------------------------------------
struct none
{
};
//------------------------------------------------
template<class Rule>
struct not_empty_rule
{
using value_type =
typename Rule::value_type;
explicit
not_empty_rule(
Rule const& r)
: r_(r)
{
}
friend
void
parse(
parse_tag const&,
char const*& it,
char const* end,
error_code& ec,
not_empty_rule const& r,
value_type& t)
{
auto const it0 = it;
t = grammar::parse(
it, end, ec, r.r_);
if( ! ec.failed() &&
it == it0)
{
// can't be empty
ec = error::syntax;
}
}
private:
Rule r_;
};
//------------------------------------------------
//
// token_rule
//
//------------------------------------------------
template<class CharSet>
struct token_rule_t
{
using value_type = string_view;
BOOST_STATIC_ASSERT(
grammar::is_charset<CharSet>::value);
constexpr
token_rule_t() = default;
explicit
constexpr
token_rule_t(
CharSet const& cs) noexcept
: cs_(cs)
{
}
friend
void
tag_invoke(
parse_tag const&,
char const*& it,
char const* end,
error_code& ec,
token_rule_t<CharSet> const& r,
string_view& t) noexcept
{
auto const it0 = it;
if(it == end)
{
ec = grammar::error::incomplete;
return;
}
it = (grammar::find_if_not)(it, end, r.cs_);
if(it == it0)
{
ec = grammar::error::syntax;
return;
}
t = string_view(it0, it - it0);
ec = {};
}
private:
CharSet cs_;
};
template<class CharSet>
constexpr
auto
token_rule(
CharSet const& cs) noexcept ->
token_rule_t<CharSet>
{
return token_rule_t<CharSet>(cs);
}
//------------------------------------------------
//
// ows_comma_ows_rule
//
//------------------------------------------------
struct ws_t
{
constexpr
bool
operator()(char c) const noexcept
{
return c == ' ' || c == '\t';
}
};
constexpr ws_t ws{};
/*
@par BNF
@code
ows-comma = OWS "," OWS
@endcode
*/
struct ows_comma_ows_rule_t
{
using value_type = none;
constexpr
ows_comma_ows_rule_t() = default;
friend
void
tag_invoke(
parse_tag const&,
char const*& it0,
char const* end,
error_code& ec,
ows_comma_ows_rule_t const&,
none&) noexcept
{
auto it = it0;
// OWS
it = grammar::find_if_not(
it, end, ws);
if(it == end)
{
ec = error::syntax;
return;
}
// ","
if(*it != ',')
{
ec = error::syntax;
return;
}
++it;
// OWS
it = grammar::find_if_not(
it, end, ws);
it0 = it;
ec = {};
}
};
constexpr ows_comma_ows_rule_t ows_comma_ows_rule{};
//------------------------------------------------
//
// range
//
//------------------------------------------------
namespace detail {
template<class T>
class optional
{
BOOST_STATIC_ASSERT(
std::is_trivial<T>::value);
union U
{
T t;
constexpr U() = default;
constexpr U(
T const& t_) noexcept
: t(t_)
{
}
};
U u_;
bool b_ = false;
public:
constexpr optional() = default;
~optional()
{
if(b_)
u_.t.~T();
}
constexpr
optional(
T const& t) noexcept
: u_(t)
, b_(true)
{
}
constexpr
optional(
optional const& other) noexcept = default;
constexpr
T const&
operator*() const noexcept
{
return u_.t;
}
};
} // detail
/** A forward range of parsed elements
@tparam R The rule used to parse each element
*/
template<class R>
class range
{
static_assert(
is_rule<R>::value,
"Rule requirements not met");
public:
using value_type = typename R::value_type;
using reference = value_type;
using const_reference = value_type;
using pointer = void const*;
using size_type = std::size_t;
using difference_type =
std::ptrdiff_t;
using fn = void(*)(
char const*&,
char const*,
error_code&,
R const&,
value_type&);
class iterator;
using const_iterator = iterator;
~range() = default;
range() = default;
range(
range const&) noexcept = default;
range& operator=(
range const& v) noexcept = default;
iterator begin() const noexcept;
iterator end() const noexcept;
/** Return the parsed string
*/
string_view
string() const noexcept
{
return s_;
}
/** Return the number of elements in the range
*/
std::size_t
size() const noexcept
{
return n_;
}
/** Return true if the range is empty
*/
bool
empty() const noexcept
{
return n_ == 0;
}
private:
string_view s_;
std::size_t n_ = 0;
urls::detail::optional<R> r_;
fn begin_ = nullptr;
fn increment_ = nullptr;
range(
string_view s,
std::size_t n,
R const& r,
fn begin,
fn increment) noexcept
: s_(s)
, n_(n)
, r_(r)
, begin_(begin)
, increment_(increment)
{
}
template<class R_>
friend
range<R_>
parse_range(
char const*& it,
char const* end,
error_code& ec,
R_ const& r,
typename range<R_>::fn begin,
typename range<R_>::fn increment,
std::size_t N,
std::size_t M);
};
template<class R>
class range<R>::iterator
{
public:
using value_type = typename R::value_type;
using reference = value_type const&;
using pointer = void const*;
using difference_type = std::ptrdiff_t;
using iterator_category =
std::forward_iterator_tag;
iterator() = default;
iterator(
iterator const&) noexcept = default;
iterator& operator=(
iterator const&) noexcept = default;
reference
operator*() const noexcept
{
return t_;
}
bool
operator==(
iterator other) const noexcept
{
// can't compare iterators
// from different containers!
BOOST_ASSERT(r_ == other.r_);
return p_ == other.p_;
}
bool
operator!=(
iterator other) const noexcept
{
return !(*this == other);
}
iterator&
operator++() noexcept
{
BOOST_ASSERT(p_ != nullptr);
error_code ec;
auto const end =
r_->s_.data() +
r_->s_.size();
r_->increment_(p_, end,
ec, *r_->r_, t_);
if(ec == error::end)
p_ = nullptr;
else
BOOST_ASSERT(! ec.failed());
return *this;
}
iterator
operator++(int) noexcept
{
auto tmp = *this;
++*this;
return tmp;
}
private:
friend class range<R>;
range<R> const* r_ = nullptr;
char const* p_ = nullptr;
typename R::value_type t_;
iterator(
range<R> const& r) noexcept
: r_(&r)
, p_(r.s_.data())
{
error_code ec;
auto const end =
r_->s_.data() +
r_->s_.size();
r_->begin_(p_, end,
ec, *r_->r_, t_);
if(ec == error::end)
p_ = nullptr;
else
BOOST_ASSERT(! ec.failed());
}
constexpr
iterator(
range<R> const& r,
int) noexcept
: p_(nullptr)
, r_(&r)
{
}
};
template<class T>
auto
range<T>::
begin() const noexcept ->
iterator
{
return { *this };
}
template<class T>
auto
range<T>::
end() const noexcept ->
iterator
{
return { *this, 0 };
}
template<class R>
range<R>
parse_range(
char const*& it,
char const* end,
error_code& ec,
R const& r,
typename range<R>::fn begin,
typename range<R>::fn increment,
std::size_t N = 0,
std::size_t M = std::size_t(-1))
{
typename R::value_type t;
std::size_t n = 0;
auto const it0 = it;
begin(it, end, ec, r, t);
if(ec.failed())
{
if(ec != error::end)
return {};
if(n < N)
{
// too few
ec = error::syntax;
return {};
}
// good
ec = {};
return {
string_view(it0, it - it0),
n, r, begin, increment};
}
for(;;)
{
++n;
increment(it, end, ec, r, t);
if(ec.failed())
{
if(ec != error::end)
return {};
ec = {};
break;
}
if(n > M)
{
// too many
ec = error::syntax;
return {};
}
}
// good
return {
string_view(it0, it - it0),
n, r, begin, increment};
}
//------------------------------------------------
/** Rule for a comma-delimited list of elements
This rule defines a list containing
at least n and at most m of Element,
each separated by at least one comma
and optional whitespace.
@par BNF
@code
#element => [ 1#element ]
1#element => element *( OWS "," OWS element )
<n>#<m>element => element <n-1>*<m-1>( OWS "," OWS element )
@endcode
Senders must emit compliant values, but
receivers should accept values generated
with the legacy production rules:
@par Legacy BNF
@code
#element => [ element ] *( OWS "," OWS [ element ] )
@endcode
@tparam R The rule to use for elements
@tparam N The minimum number of elements, which may be zero
@tparam M The maximum number of elements.
@par Specification
@li <a href="https://www.rfc-editor.org/rfc/rfc9110.html#section-5.6.1"
>5.6.1. Lists (#rule ABNF Extension) (rfc9110)</a>
*/
template<
class R,
std::size_t N = 0,
std::size_t M = std::size_t(-1)>
struct list_rule_t
{
BOOST_STATIC_ASSERT(M >= N);
using value_type = range<R>;
constexpr
list_rule_t(
R const& r)
: r_(r)
{
}
friend
void
tag_invoke(
parse_tag const&,
char const*& it,
char const* end,
error_code& ec,
list_rule_t const& r,
value_type& t)
{
t = parse_range(
it, end, ec, r.r_,
increment,
increment,
N,
M);
}
private:
static
void
increment(
char const*& it,
char const* end,
error_code& ec,
R const& r,
typename R::value_type& t)
{
// *( OWS "," OWS )
for(;;)
{
parse(
it, end, ec,
ows_comma_ows_rule);
if(ec.failed())
break;
}
// element
t = parse(
it, end, ec, r);
if(! ec.failed())
{
// *( OWS "," OWS )
for(;;)
{
parse(
it, end, ec,
ows_comma_ows_rule);
if(ec.failed())
break;
}
ec = {};
return;
}
ec = error::end;
return;
}
R r_;
};
template<
class R,
std::size_t N = 0,
std::size_t M = std::size_t(-1)>
constexpr
auto
list_rule(R const& r) ->
list_rule_t<R, N, M>
{
return list_rule_t<R, N, M>{ r };
}
//------------------------------------------------
constexpr auto transfer_coding =
token_rule(grammar::alnum_chars);
constexpr auto transfer_encoding =
list_rule(transfer_coding);
//------------------------------------------------
struct grammar_test
{
void
testList()
{
auto const bad = [](
string_view s)
{
error_code ec;
parse(
s, ec, transfer_encoding);
BOOST_TEST(ec.failed());
};
auto const ok = [](
string_view s,
std::initializer_list<
string_view> init)
{
error_code ec;
auto v = parse(
s, ec, transfer_encoding);
if(! BOOST_TEST(! ec.failed()))
return;
if(! BOOST_TEST_EQ(
v.size(), init.size()))
return;
auto it0 = init.begin();
auto it1 = v.begin();
while(it0 != init.end())
{
BOOST_TEST_EQ(*it0, *it1);
++it0;
++it1;
}
};
ok( "", {} );
bad( " " );
bad( "\t" );
bad( " \t" );
ok( ",", {} );
ok( " ,", {} );
ok( "\t,", {} );
ok( " \t,", {} );
ok( ", ", {} );
ok( ",\t", {} );
ok( ", \t", {} );
ok( "\t , \t", {} );
ok( "x", { "x" } );
ok( "x,y", { "x", "y" } );
ok( "x,y,z", { "x", "y", "z" } );
ok( "x,,", { "x" } );
ok( ",,x", { "x" } );
ok( " ,\t , x ,", { "x" } );
ok( "x , , , y, , ,z, ", { "x", "y", "z" } );
}
void
run()
{
testList();
}
};
TEST_SUITE(
grammar_test,
"boost.url.grammar");
} // grammar_
} // urls
} // boost
#endif