mirror of
https://github.com/boostorg/url.git
synced 2026-01-19 04:42:15 +00:00
fix: consistent behavior for space-as-plus option
This commit refactors all functions so they have consistent behavior for the space-as-plus encoding option. - any_params_iter objects store and apply the appropriate option when measuring and copying - when the option is enabled, encoding functions encode space-as-plus and plus as %2B regardless of the charset - normalization and comparison algorithms take into consideration special query chars whose meaning changes depending on encoding - all params_view objects created with default options enable space-as-plus encoding fix #903
This commit is contained in:
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@@ -57,8 +57,8 @@ jobs:
|
||||
id: cpp-matrix
|
||||
with:
|
||||
compilers: |
|
||||
gcc >=4.8
|
||||
clang >=3.8
|
||||
gcc >=4.8 <15
|
||||
clang >=3.8 <20
|
||||
msvc >=14.20
|
||||
apple-clang *
|
||||
mingw *
|
||||
|
||||
@@ -63,8 +63,7 @@ public:
|
||||
void
|
||||
rewind() noexcept = 0;
|
||||
|
||||
// Measure and increment current element
|
||||
// element.
|
||||
// Measure and increment current element.
|
||||
// Returns false on end of range.
|
||||
// n is increased by encoded size.
|
||||
// Can throw on bad percent-escape
|
||||
@@ -84,19 +83,19 @@ public:
|
||||
|
||||
//------------------------------------------------
|
||||
//
|
||||
// query_iter
|
||||
// query_string_iter
|
||||
//
|
||||
//------------------------------------------------
|
||||
|
||||
// A string of plain query params
|
||||
struct BOOST_SYMBOL_VISIBLE
|
||||
query_iter
|
||||
query_string_iter
|
||||
: any_params_iter
|
||||
{
|
||||
// ne = never empty
|
||||
BOOST_URL_DECL
|
||||
explicit
|
||||
query_iter(
|
||||
query_string_iter(
|
||||
core::string_view s,
|
||||
bool ne = false) noexcept;
|
||||
|
||||
@@ -121,16 +120,18 @@ private:
|
||||
// A 1-param range allowing
|
||||
// self-intersection
|
||||
struct BOOST_SYMBOL_VISIBLE
|
||||
param_iter
|
||||
single_param_iter
|
||||
: any_params_iter
|
||||
{
|
||||
explicit
|
||||
param_iter(
|
||||
param_view const&) noexcept;
|
||||
single_param_iter(
|
||||
param_view const&,
|
||||
bool space_as_plus) noexcept;
|
||||
|
||||
private:
|
||||
bool has_value_;
|
||||
bool at_end_ = false;
|
||||
bool space_as_plus_ = false;
|
||||
|
||||
void rewind() noexcept override;
|
||||
bool measure(std::size_t&) noexcept override;
|
||||
@@ -145,10 +146,15 @@ private:
|
||||
|
||||
struct params_iter_base
|
||||
{
|
||||
bool space_as_plus_ = true;
|
||||
protected:
|
||||
explicit params_iter_base(
|
||||
bool space_as_plus) noexcept
|
||||
: space_as_plus_(space_as_plus)
|
||||
{}
|
||||
|
||||
// return encoded size
|
||||
BOOST_URL_DECL
|
||||
static
|
||||
void
|
||||
measure_impl(
|
||||
std::size_t& n,
|
||||
@@ -156,7 +162,6 @@ protected:
|
||||
|
||||
// encode to dest
|
||||
BOOST_URL_DECL
|
||||
static
|
||||
void
|
||||
copy_impl(
|
||||
char*& dest,
|
||||
@@ -180,9 +185,11 @@ struct params_iter
|
||||
|
||||
params_iter(
|
||||
FwdIt first,
|
||||
FwdIt last) noexcept
|
||||
FwdIt last,
|
||||
bool space_as_plus) noexcept
|
||||
: any_params_iter(
|
||||
first == last)
|
||||
, params_iter_base(space_as_plus)
|
||||
, it0_(first)
|
||||
, it_(first)
|
||||
, end_(last)
|
||||
@@ -404,10 +411,10 @@ private:
|
||||
template<class FwdIt>
|
||||
params_iter<FwdIt>
|
||||
make_params_iter(
|
||||
FwdIt first, FwdIt last)
|
||||
FwdIt first, FwdIt last, bool space_as_plus)
|
||||
{
|
||||
return params_iter<
|
||||
FwdIt>(first, last);
|
||||
FwdIt>(first, last, space_as_plus);
|
||||
}
|
||||
|
||||
template<class FwdIt>
|
||||
|
||||
@@ -31,6 +31,9 @@ constexpr char const* const empty_c_str_ = "";
|
||||
// This is the private 'guts' of a
|
||||
// url_view, exposed so different parts
|
||||
// of the implementation can work on it.
|
||||
// It stores the offsets and properties of
|
||||
// a URL string stored elsewhere and pointed
|
||||
// to by cs_.
|
||||
struct BOOST_URL_DECL url_impl : parts_base
|
||||
{
|
||||
static
|
||||
@@ -139,8 +142,9 @@ public:
|
||||
|
||||
//------------------------------------------------
|
||||
|
||||
// this allows a params to come from a
|
||||
// url_impl or a separate core::string_view
|
||||
// This class represents a query string, which
|
||||
// can originate from either an url_impl object
|
||||
// or an independent core::string_view.
|
||||
class BOOST_URL_DECL query_ref
|
||||
: private parts_base
|
||||
{
|
||||
|
||||
@@ -44,7 +44,7 @@ namespace urls {
|
||||
|
||||
@param s The string to measure.
|
||||
|
||||
@param unreserved The set of characters
|
||||
@param allowed The set of characters
|
||||
that is not percent-encoded.
|
||||
|
||||
@param opt The options for encoding. If
|
||||
@@ -64,7 +64,7 @@ template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
|
||||
std::size_t
|
||||
encoded_size(
|
||||
core::string_view s,
|
||||
CS const& unreserved,
|
||||
CS const& allowed,
|
||||
encoding_opts opt = {}) noexcept;
|
||||
|
||||
//------------------------------------------------
|
||||
@@ -100,7 +100,7 @@ encoded_size(
|
||||
|
||||
@param s The string to encode.
|
||||
|
||||
@param unreserved The set of characters
|
||||
@param allowed The set of characters
|
||||
that is not percent-encoded.
|
||||
|
||||
@param opt The options for encoding. If
|
||||
@@ -122,7 +122,7 @@ encode(
|
||||
char* dest,
|
||||
std::size_t size,
|
||||
core::string_view s,
|
||||
CS const& unreserved,
|
||||
CS const& allowed,
|
||||
encoding_opts opt = {});
|
||||
|
||||
#ifndef BOOST_URL_DOCS
|
||||
@@ -133,7 +133,7 @@ encode_unsafe(
|
||||
char* dest,
|
||||
std::size_t size,
|
||||
core::string_view s,
|
||||
CS const& unreserved,
|
||||
CS const& allowed,
|
||||
encoding_opts opt);
|
||||
#endif
|
||||
|
||||
@@ -162,7 +162,7 @@ encode_unsafe(
|
||||
|
||||
@param s The string to encode.
|
||||
|
||||
@param unreserved The set of characters
|
||||
@param allowed The set of characters
|
||||
that is not percent-encoded.
|
||||
|
||||
@param opt The options for encoding. If
|
||||
@@ -186,7 +186,7 @@ template<
|
||||
BOOST_URL_STRTOK_RETURN
|
||||
encode(
|
||||
core::string_view s,
|
||||
CS const& unreserved,
|
||||
CS const& allowed,
|
||||
encoding_opts opt = {},
|
||||
StringToken&& token = {}) noexcept;
|
||||
|
||||
|
||||
@@ -32,18 +32,59 @@ struct encoding_opts
|
||||
{
|
||||
/** True if spaces encode to and from plus signs
|
||||
|
||||
This option controls whether or not
|
||||
Although not prescribed by RFC 3986,
|
||||
many applications decode plus signs
|
||||
in URL queries as spaces. In particular,
|
||||
the form-urlencoded Media Type in HTML
|
||||
for submitting forms uses this convention.
|
||||
|
||||
This option controls whether
|
||||
the PLUS character ("+") is used to
|
||||
represent the SP character (" ") when
|
||||
encoding or decoding.
|
||||
Although not prescribed by the RFC, plus
|
||||
signs are commonly treated as spaces upon
|
||||
decoding when used in the query of URLs
|
||||
using well known schemes such as HTTP.
|
||||
|
||||
When this option is `true`, both the
|
||||
encoded SP ("%20") and the PLUS
|
||||
character ("+") represent a space (" ")
|
||||
when decoding. To represent a plus sign,
|
||||
its encoded form ("%2B") is used.
|
||||
|
||||
The @ref encode and @ref encode_size functions
|
||||
will encode spaces as plus signs when
|
||||
this option is `true`, regardless of the
|
||||
allowed character set. They will also
|
||||
encode plus signs as "%2B" when this
|
||||
option is `true`, regardless of the
|
||||
allowed character set.
|
||||
|
||||
Note that when a URL is normalized,
|
||||
all unreserved percent-encoded characters are
|
||||
replaced with their unreserved equivalents.
|
||||
However, normalizing the URL query maintains
|
||||
the decoded and encoded "&=+" as they are
|
||||
because they might have different meanings.
|
||||
|
||||
This behavior is not optional because
|
||||
normalization can only mitigate false
|
||||
negatives, but it should eliminate
|
||||
false positives.
|
||||
Making it optional would allow
|
||||
a false positive because there's
|
||||
at least one very relevant schema (HTTP)
|
||||
where a decoded or encoded "&=+" has different
|
||||
meanings and represents different resources.
|
||||
|
||||
The same considerations apply to URL comparison
|
||||
algorithms in the library, as they treat URLs
|
||||
as if they were normalized.
|
||||
|
||||
@par Specification
|
||||
@li <a href="https://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1">
|
||||
application/x-www-form-urlencoded (w3.org)</a>
|
||||
@li <a href="https://datatracker.ietf.org/doc/html/rfc1866#section-8.2.1">
|
||||
The form-urlencoded Media Type (RFC 1866)</a>
|
||||
@li <a href="https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2.2">
|
||||
Section 6.2.2.2. Percent-Encoding Normalization (RFC 3986)</a>
|
||||
*/
|
||||
bool space_as_plus = false;
|
||||
|
||||
|
||||
@@ -30,14 +30,15 @@ template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
|
||||
std::size_t
|
||||
encoded_size(
|
||||
core::string_view s,
|
||||
CS const& unreserved,
|
||||
CS const& allowed,
|
||||
encoding_opts opt) noexcept
|
||||
{
|
||||
/* If you get a compile error here, it
|
||||
means that the value you passed does
|
||||
not meet the requirements stated in
|
||||
the documentation.
|
||||
*/
|
||||
/*
|
||||
If you get a compilation error here, it
|
||||
means that the value you passed does
|
||||
not meet the requirements stated in
|
||||
the documentation.
|
||||
*/
|
||||
BOOST_STATIC_ASSERT(
|
||||
grammar::is_charset<CS>::value);
|
||||
|
||||
@@ -45,29 +46,49 @@ encoded_size(
|
||||
auto it = s.data();
|
||||
auto const last = it + s.size();
|
||||
|
||||
if(! opt.space_as_plus ||
|
||||
unreserved(' '))
|
||||
if (!opt.space_as_plus)
|
||||
{
|
||||
while(it != last)
|
||||
while (it != last)
|
||||
{
|
||||
if(unreserved(*it))
|
||||
n += 1;
|
||||
char const c = *it;
|
||||
if (allowed(c))
|
||||
{
|
||||
++n;
|
||||
}
|
||||
else
|
||||
{
|
||||
n += 3;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(it != last)
|
||||
// '+' is always encoded (thus
|
||||
// spending 3 chars) even if
|
||||
// allowed because "%2B" and
|
||||
// "+" have different meanings
|
||||
// when space as plus is enabled
|
||||
using FNT = bool (*)(CS const& allowed, char);
|
||||
FNT takes_one_char =
|
||||
allowed('+') ?
|
||||
(allowed(' ') ?
|
||||
FNT([](CS const& allowed, char c){ return allowed(c) && c != '+'; }) :
|
||||
FNT([](CS const& allowed, char c){ return (allowed(c) || c == ' ') && c != '+'; })) :
|
||||
(allowed(' ') ?
|
||||
FNT([](CS const& allowed, char c){ return allowed(c); }) :
|
||||
FNT([](CS const& allowed, char c){ return allowed(c) || c == ' '; }));
|
||||
while (it != last)
|
||||
{
|
||||
auto c = *it;
|
||||
if(unreserved(c))
|
||||
++n;
|
||||
else if(c == ' ')
|
||||
char const c = *it;
|
||||
if (takes_one_char(allowed, c))
|
||||
{
|
||||
++n;
|
||||
}
|
||||
else
|
||||
{
|
||||
n += 3;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
}
|
||||
@@ -82,10 +103,10 @@ encode(
|
||||
char* dest,
|
||||
std::size_t size,
|
||||
core::string_view s,
|
||||
CS const& unreserved,
|
||||
CS const& allowed,
|
||||
encoding_opts opt)
|
||||
{
|
||||
/* If you get a compile error here, it
|
||||
/* If you get a compilation error here, it
|
||||
means that the value you passed does
|
||||
not meet the requirements stated in
|
||||
the documentation.
|
||||
@@ -94,7 +115,7 @@ encode(
|
||||
grammar::is_charset<CS>::value);
|
||||
|
||||
// '%' must be reserved
|
||||
BOOST_ASSERT(! unreserved('%'));
|
||||
BOOST_ASSERT(!allowed('%'));
|
||||
|
||||
char const* const hex =
|
||||
detail::hexdigs[opt.lower_case];
|
||||
@@ -113,42 +134,32 @@ encode(
|
||||
auto const dest0 = dest;
|
||||
auto const end3 = end - 3;
|
||||
|
||||
if(! opt.space_as_plus)
|
||||
if (!opt.space_as_plus)
|
||||
{
|
||||
while(it != last)
|
||||
{
|
||||
if(unreserved(*it))
|
||||
char const c = *it;
|
||||
if (allowed(c))
|
||||
{
|
||||
if(dest == end)
|
||||
return dest - dest0;
|
||||
*dest++ = *it++;
|
||||
*dest++ = c;
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
if(dest > end3)
|
||||
if (dest > end3)
|
||||
return dest - dest0;
|
||||
encode(dest, *it++);
|
||||
encode(dest, c);
|
||||
++it;
|
||||
}
|
||||
return dest - dest0;
|
||||
}
|
||||
else if(! unreserved(' '))
|
||||
else
|
||||
{
|
||||
// VFALCO space is usually reserved,
|
||||
// and we depend on this for an
|
||||
// optimization. if this assert
|
||||
// goes off we can split the loop
|
||||
// below into two versions.
|
||||
BOOST_ASSERT(! unreserved(' '));
|
||||
|
||||
while(it != last)
|
||||
while (it != last)
|
||||
{
|
||||
if(unreserved(*it))
|
||||
{
|
||||
if(dest == end)
|
||||
return dest - dest0;
|
||||
*dest++ = *it++;
|
||||
continue;
|
||||
}
|
||||
if(*it == ' ')
|
||||
char const c = *it;
|
||||
if (c == ' ')
|
||||
{
|
||||
if(dest == end)
|
||||
return dest - dest0;
|
||||
@@ -156,9 +167,20 @@ encode(
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
else if (
|
||||
allowed(c) &&
|
||||
c != '+')
|
||||
{
|
||||
if(dest == end)
|
||||
return dest - dest0;
|
||||
*dest++ = c;
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
if(dest > end3)
|
||||
return dest - dest0;
|
||||
encode(dest, *it++);
|
||||
encode(dest, c);
|
||||
++it;
|
||||
}
|
||||
}
|
||||
return dest - dest0;
|
||||
@@ -175,14 +197,14 @@ encode_unsafe(
|
||||
char* dest,
|
||||
std::size_t size,
|
||||
core::string_view s,
|
||||
CS const& unreserved,
|
||||
CS const& allowed,
|
||||
encoding_opts opt)
|
||||
{
|
||||
BOOST_STATIC_ASSERT(
|
||||
grammar::is_charset<CS>::value);
|
||||
|
||||
// '%' must be reserved
|
||||
BOOST_ASSERT(! unreserved('%'));
|
||||
BOOST_ASSERT(!allowed('%'));
|
||||
|
||||
auto it = s.data();
|
||||
auto const last = it + s.size();
|
||||
@@ -204,42 +226,44 @@ encode_unsafe(
|
||||
};
|
||||
|
||||
auto const dest0 = dest;
|
||||
if(! opt.space_as_plus)
|
||||
if (!opt.space_as_plus)
|
||||
{
|
||||
while(it != last)
|
||||
{
|
||||
BOOST_ASSERT(dest != end);
|
||||
if(unreserved(*it))
|
||||
*dest++ = *it++;
|
||||
char const c = *it;
|
||||
if(allowed(c))
|
||||
{
|
||||
*dest++ = c;
|
||||
}
|
||||
else
|
||||
encode(dest, *it++);
|
||||
{
|
||||
encode(dest, c);
|
||||
}
|
||||
++it;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// VFALCO space is usually reserved,
|
||||
// and we depend on this for an
|
||||
// optimization. if this assert
|
||||
// goes off we can split the loop
|
||||
// below into two versions.
|
||||
BOOST_ASSERT(! unreserved(' '));
|
||||
|
||||
while(it != last)
|
||||
{
|
||||
BOOST_ASSERT(dest != end);
|
||||
if(unreserved(*it))
|
||||
{
|
||||
*dest++ = *it++;
|
||||
}
|
||||
else if(*it == ' ')
|
||||
char const c = *it;
|
||||
if (c == ' ')
|
||||
{
|
||||
*dest++ = '+';
|
||||
++it;
|
||||
}
|
||||
else if (
|
||||
allowed(c) &&
|
||||
c != '+')
|
||||
{
|
||||
*dest++ = c;
|
||||
}
|
||||
else
|
||||
{
|
||||
encode(dest, *it++);
|
||||
encode(dest, c);
|
||||
}
|
||||
++it;
|
||||
}
|
||||
}
|
||||
return dest - dest0;
|
||||
@@ -253,7 +277,7 @@ template<
|
||||
BOOST_URL_STRTOK_RETURN
|
||||
encode(
|
||||
core::string_view s,
|
||||
CS const& unreserved,
|
||||
CS const& allowed,
|
||||
encoding_opts opt,
|
||||
StringToken&& token) noexcept
|
||||
{
|
||||
@@ -261,11 +285,11 @@ encode(
|
||||
grammar::is_charset<CS>::value);
|
||||
|
||||
auto const n = encoded_size(
|
||||
s, unreserved, opt);
|
||||
s, allowed, opt);
|
||||
auto p = token.prepare(n);
|
||||
if(n > 0)
|
||||
encode_unsafe(
|
||||
p, n, s, unreserved, opt);
|
||||
p, n, s, allowed, opt);
|
||||
return token.result();
|
||||
}
|
||||
|
||||
|
||||
@@ -192,7 +192,7 @@ replace(
|
||||
u_->edit_params(
|
||||
from.it_, to.it_,
|
||||
detail::make_params_iter(
|
||||
first, last)),
|
||||
first, last, opt_.space_as_plus)),
|
||||
opt_);
|
||||
}
|
||||
|
||||
@@ -212,7 +212,7 @@ assign(FwdIt first, FwdIt last,
|
||||
begin().it_,
|
||||
end().it_,
|
||||
detail::make_params_iter(
|
||||
first, last));
|
||||
first, last, opt_.space_as_plus));
|
||||
}
|
||||
|
||||
template<class FwdIt>
|
||||
@@ -230,7 +230,7 @@ insert(
|
||||
before.it_,
|
||||
before.it_,
|
||||
detail::make_params_iter(
|
||||
first, last)),
|
||||
first, last, opt_.space_as_plus)),
|
||||
opt_);
|
||||
}
|
||||
|
||||
|
||||
@@ -387,7 +387,11 @@ private:
|
||||
|
||||
Depending on where the object was obtained,
|
||||
the strings may or may not contain percent
|
||||
escapes.
|
||||
escapes. Some functions and objects might
|
||||
expect encoded strings in this view, while
|
||||
others expect decoded strings. The caller
|
||||
should be aware of the context in which
|
||||
the object will be used.
|
||||
|
||||
For most usages, key comparisons are
|
||||
case-sensitive and duplicate keys in
|
||||
|
||||
@@ -29,11 +29,13 @@ namespace urls {
|
||||
# pragma warning(disable: 4251)
|
||||
#endif
|
||||
|
||||
/** Common functionality for containers
|
||||
/** Common functionality for query parameter containers
|
||||
|
||||
This base class is used by the library
|
||||
The library uses this base class
|
||||
to provide common member functions for
|
||||
containers. This cannot be instantiated
|
||||
containers of query parameters.
|
||||
|
||||
This class should not be instantiated
|
||||
directly; Instead, use one of the
|
||||
containers or functions:
|
||||
|
||||
|
||||
@@ -2918,9 +2918,27 @@ private:
|
||||
detail::any_params_iter&&) ->
|
||||
detail::params_iter_impl;
|
||||
|
||||
// Decode any unnecessary percent-escapes
|
||||
// and ensures hexadecimals are uppercase.
|
||||
// The encoding of ignored characters is
|
||||
// preserved.
|
||||
template
|
||||
<class AllowedCharSet,
|
||||
class IgnoredCharSet>
|
||||
void
|
||||
normalize_octets_impl(
|
||||
int,
|
||||
AllowedCharSet const& allowed,
|
||||
IgnoredCharSet const& ignored,
|
||||
op_t&) noexcept;
|
||||
|
||||
template<class CharSet>
|
||||
void normalize_octets_impl(int,
|
||||
CharSet const& allowed, op_t&) noexcept;
|
||||
void
|
||||
normalize_octets_impl(
|
||||
int,
|
||||
CharSet const& allowed,
|
||||
op_t&) noexcept;
|
||||
|
||||
void decoded_to_lower_impl(int id) noexcept;
|
||||
void to_lower_impl(int id) noexcept;
|
||||
};
|
||||
|
||||
@@ -44,8 +44,8 @@ any_params_iter::
|
||||
//
|
||||
//------------------------------------------------
|
||||
|
||||
query_iter::
|
||||
query_iter(
|
||||
query_string_iter::
|
||||
query_string_iter(
|
||||
core::string_view s,
|
||||
bool ne) noexcept
|
||||
: any_params_iter(
|
||||
@@ -55,7 +55,7 @@ query_iter(
|
||||
}
|
||||
|
||||
void
|
||||
query_iter::
|
||||
query_string_iter::
|
||||
rewind() noexcept
|
||||
{
|
||||
if(empty)
|
||||
@@ -81,7 +81,7 @@ rewind() noexcept
|
||||
}
|
||||
|
||||
bool
|
||||
query_iter::
|
||||
query_string_iter::
|
||||
measure(
|
||||
std::size_t& n) noexcept
|
||||
{
|
||||
@@ -101,7 +101,7 @@ measure(
|
||||
}
|
||||
|
||||
void
|
||||
query_iter::
|
||||
query_string_iter::
|
||||
copy(
|
||||
char*& dest,
|
||||
char const* end) noexcept
|
||||
@@ -122,7 +122,7 @@ copy(
|
||||
}
|
||||
|
||||
void
|
||||
query_iter::
|
||||
query_string_iter::
|
||||
increment() noexcept
|
||||
{
|
||||
p_ += n_;
|
||||
@@ -146,32 +146,34 @@ increment() noexcept
|
||||
//
|
||||
//------------------------------------------------
|
||||
|
||||
param_iter::
|
||||
param_iter(
|
||||
param_view const& p) noexcept
|
||||
single_param_iter::
|
||||
single_param_iter(
|
||||
param_view const& p,
|
||||
bool space_as_plus) noexcept
|
||||
: any_params_iter(
|
||||
false,
|
||||
p.key,
|
||||
p.value)
|
||||
, has_value_(p.has_value)
|
||||
, space_as_plus_(space_as_plus)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
param_iter::
|
||||
single_param_iter::
|
||||
rewind() noexcept
|
||||
{
|
||||
at_end_ = false;
|
||||
}
|
||||
|
||||
bool
|
||||
param_iter::
|
||||
single_param_iter::
|
||||
measure(std::size_t& n) noexcept
|
||||
{
|
||||
if(at_end_)
|
||||
return false;
|
||||
encoding_opts opt;
|
||||
opt.space_as_plus = false;
|
||||
opt.space_as_plus = space_as_plus_;
|
||||
n += encoded_size(
|
||||
s0,
|
||||
detail::param_key_chars,
|
||||
@@ -189,21 +191,21 @@ measure(std::size_t& n) noexcept
|
||||
}
|
||||
|
||||
void
|
||||
param_iter::
|
||||
single_param_iter::
|
||||
copy(
|
||||
char*& dest,
|
||||
char const* end) noexcept
|
||||
{
|
||||
BOOST_ASSERT(! at_end_);
|
||||
encoding_opts opt;
|
||||
opt.space_as_plus = false;
|
||||
opt.space_as_plus = space_as_plus_;
|
||||
dest += encode(
|
||||
dest,
|
||||
end - dest,
|
||||
s0,
|
||||
detail::param_key_chars,
|
||||
opt);
|
||||
if(has_value_)
|
||||
if (has_value_)
|
||||
{
|
||||
*dest++ = '=';
|
||||
dest += encode(
|
||||
@@ -228,7 +230,7 @@ measure_impl(
|
||||
param_view const& p) noexcept
|
||||
{
|
||||
encoding_opts opt;
|
||||
opt.space_as_plus = false;
|
||||
opt.space_as_plus = space_as_plus_;
|
||||
n += encoded_size(
|
||||
p.key,
|
||||
detail::param_key_chars,
|
||||
@@ -251,7 +253,7 @@ copy_impl(
|
||||
param_view const& p) noexcept
|
||||
{
|
||||
encoding_opts opt;
|
||||
opt.space_as_plus = false;
|
||||
opt.space_as_plus = space_as_plus_;
|
||||
dest += encode(
|
||||
dest,
|
||||
end - dest,
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "decode.hpp"
|
||||
#include <boost/url/segments_encoded_view.hpp>
|
||||
#include <boost/url/grammar/ci_string.hpp>
|
||||
#include <boost/url/grammar/lut_chars.hpp>
|
||||
#include <boost/assert.hpp>
|
||||
#include <boost/core/ignore_unused.hpp>
|
||||
#include <cstring>
|
||||
@@ -74,6 +75,60 @@ compare_encoded(
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
compare_encoded_query(
|
||||
core::string_view lhs,
|
||||
core::string_view rhs) noexcept
|
||||
{
|
||||
static constexpr
|
||||
grammar::lut_chars
|
||||
query_compare_exception_lut = "&=+";
|
||||
|
||||
std::size_t n0 = 0;
|
||||
std::size_t n1 = 0;
|
||||
char c0 = 0;
|
||||
char c1 = 0;
|
||||
while(
|
||||
!lhs.empty() &&
|
||||
!rhs.empty())
|
||||
{
|
||||
bool const lhs_was_decoded = lhs.front() != '%';
|
||||
bool const rhs_was_decoded = rhs.front() != '%';
|
||||
pop_encoded_front(lhs, c0, n0);
|
||||
pop_encoded_front(rhs, c1, n1);
|
||||
if (c0 < c1)
|
||||
return -1;
|
||||
if (c1 < c0)
|
||||
return 1;
|
||||
// The decoded chars are the same, but
|
||||
// are these query exceptions that have
|
||||
// different meanings when decoded?
|
||||
if (query_compare_exception_lut(c0))
|
||||
{
|
||||
// If so, we only continue if both
|
||||
// chars were decoded or encoded
|
||||
// the same way.
|
||||
if (lhs_was_decoded == rhs_was_decoded)
|
||||
continue;
|
||||
// Otherwise, we return a value != 0
|
||||
// because these chars are not equal.
|
||||
// If rhs was the decoded one, it contains
|
||||
// an ascii char higher than '%'
|
||||
if (rhs_was_decoded)
|
||||
return -1;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
n0 += detail::decode_bytes_unsafe(lhs);
|
||||
n1 += detail::decode_bytes_unsafe(rhs);
|
||||
if (n0 == n1)
|
||||
return 0;
|
||||
if (n0 < n1)
|
||||
return -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void
|
||||
digest_encoded(
|
||||
core::string_view s,
|
||||
|
||||
@@ -80,6 +80,16 @@ compare_encoded(
|
||||
core::string_view lhs,
|
||||
core::string_view rhs) noexcept;
|
||||
|
||||
// compare two core::string_views as if they are both
|
||||
// percent-decoded but do not consider the special
|
||||
// query chars ("&=+") equivalent unless they are
|
||||
// both decoded or encoded the same way, because
|
||||
// that gives them different meanings
|
||||
int
|
||||
compare_encoded_query(
|
||||
core::string_view lhs,
|
||||
core::string_view rhs) noexcept;
|
||||
|
||||
// digest a core::string_view as if it were
|
||||
// percent-decoded
|
||||
void
|
||||
|
||||
@@ -180,7 +180,7 @@ params_base::
|
||||
end() const noexcept ->
|
||||
iterator
|
||||
{
|
||||
return iterator(ref_, opt_, 0);
|
||||
return {ref_, opt_, 0};
|
||||
}
|
||||
|
||||
//------------------------------------------------
|
||||
|
||||
@@ -239,7 +239,7 @@ erase(
|
||||
return u_->edit_params(
|
||||
first.it_,
|
||||
last.it_,
|
||||
detail::query_iter(s));
|
||||
detail::query_string_iter(s));
|
||||
}
|
||||
|
||||
} // urls
|
||||
|
||||
@@ -36,7 +36,7 @@ params_encoded_view::
|
||||
operator
|
||||
params_view() const noexcept
|
||||
{
|
||||
return { ref_, encoding_opts{} };
|
||||
return { ref_, encoding_opts{ true, false, false} };
|
||||
}
|
||||
|
||||
} // urls
|
||||
|
||||
@@ -66,12 +66,12 @@ insert(
|
||||
param_view const& p) ->
|
||||
iterator
|
||||
{
|
||||
return iterator(
|
||||
return {
|
||||
u_->edit_params(
|
||||
before.it_,
|
||||
before.it_,
|
||||
detail::param_iter(p)),
|
||||
opt_);
|
||||
detail::single_param_iter(p, opt_.space_as_plus)),
|
||||
opt_};
|
||||
}
|
||||
|
||||
auto
|
||||
@@ -130,7 +130,7 @@ replace(
|
||||
u_->edit_params(
|
||||
pos.it_,
|
||||
std::next(pos).it_,
|
||||
detail::param_iter(p)),
|
||||
detail::single_param_iter(p, opt_.space_as_plus)),
|
||||
opt_);
|
||||
}
|
||||
|
||||
@@ -232,7 +232,7 @@ erase(
|
||||
u_->edit_params(
|
||||
first.it_,
|
||||
last.it_,
|
||||
detail::query_iter(s)),
|
||||
detail::query_string_iter(s)),
|
||||
opt_);
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,20 @@ namespace boost {
|
||||
namespace urls {
|
||||
namespace detail {
|
||||
|
||||
struct empty_chars_t
|
||||
{
|
||||
constexpr
|
||||
bool
|
||||
operator()(char) const noexcept
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
constexpr
|
||||
empty_chars_t
|
||||
empty_chars{};
|
||||
|
||||
constexpr
|
||||
auto
|
||||
user_chars =
|
||||
@@ -58,6 +72,11 @@ auto
|
||||
query_chars =
|
||||
pchars + '/' + '?' + '[' + ']';
|
||||
|
||||
constexpr
|
||||
grammar::lut_chars
|
||||
query_ignore_chars =
|
||||
"&=+";
|
||||
|
||||
constexpr
|
||||
auto
|
||||
param_key_chars = pchars
|
||||
|
||||
@@ -1328,7 +1328,7 @@ set_query(
|
||||
edit_params(
|
||||
detail::params_iter_impl(impl_),
|
||||
detail::params_iter_impl(impl_, 0),
|
||||
detail::query_iter(s, true));
|
||||
detail::query_string_iter(s, true));
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -1401,7 +1401,7 @@ params_ref
|
||||
url_base::
|
||||
params(encoding_opts opt) noexcept
|
||||
{
|
||||
return params_ref(*this, opt);
|
||||
return {*this, opt};
|
||||
}
|
||||
|
||||
params_encoded_ref
|
||||
@@ -1635,12 +1635,15 @@ resolve(
|
||||
//
|
||||
//------------------------------------------------
|
||||
|
||||
template <class Charset>
|
||||
template <
|
||||
class AllowedCharset,
|
||||
class IgnoredCharset>
|
||||
void
|
||||
url_base::
|
||||
normalize_octets_impl(
|
||||
int id,
|
||||
Charset const& allowed,
|
||||
AllowedCharset const& allowed,
|
||||
IgnoredCharset const& ignored,
|
||||
op_t& op) noexcept
|
||||
{
|
||||
char* it = s_ + impl_.offset(id);
|
||||
@@ -1660,7 +1663,8 @@ normalize_octets_impl(
|
||||
|
||||
// decode unreserved octets
|
||||
d = detail::decode_one(it + 1);
|
||||
if (allowed(d))
|
||||
if (allowed(d) &&
|
||||
!ignored(d))
|
||||
{
|
||||
*dest = d;
|
||||
it += 3;
|
||||
@@ -1683,6 +1687,18 @@ normalize_octets_impl(
|
||||
}
|
||||
}
|
||||
|
||||
template<class CharSet>
|
||||
void
|
||||
url_base::
|
||||
normalize_octets_impl(
|
||||
int idx,
|
||||
CharSet const& allowed,
|
||||
op_t& op) noexcept
|
||||
{
|
||||
return normalize_octets_impl(
|
||||
idx, allowed, detail::empty_chars, op);
|
||||
}
|
||||
|
||||
url_base&
|
||||
url_base::
|
||||
normalize_scheme()
|
||||
@@ -1884,7 +1900,10 @@ normalize_query()
|
||||
{
|
||||
op_t op(*this);
|
||||
normalize_octets_impl(
|
||||
id_query, detail::query_chars, op);
|
||||
id_query,
|
||||
detail::query_chars,
|
||||
detail::query_ignore_chars,
|
||||
op);
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -2647,20 +2666,16 @@ edit_params(
|
||||
auto pos1 = pos0 + it1.pos;
|
||||
pos0 = pos0 + it0.pos;
|
||||
|
||||
// Iterator doesn't belong to this url
|
||||
// Iterators belong to this url
|
||||
BOOST_ASSERT(it0.ref.alias_of(impl_));
|
||||
|
||||
// Iterator doesn't belong to this url
|
||||
BOOST_ASSERT(it1.ref.alias_of(impl_));
|
||||
|
||||
// Iterator is in the wrong order
|
||||
// Iterators is in the right order
|
||||
BOOST_ASSERT(it0.index <= it1.index);
|
||||
|
||||
// Iterator is out of range
|
||||
// Iterators are within range
|
||||
BOOST_ASSERT(it0.index <= impl_.nparam_);
|
||||
BOOST_ASSERT(pos0 <= impl_.offset(id_frag));
|
||||
|
||||
// Iterator is out of range
|
||||
BOOST_ASSERT(it1.index <= impl_.nparam_);
|
||||
BOOST_ASSERT(pos1 <= impl_.offset(id_frag));
|
||||
|
||||
|
||||
@@ -701,7 +701,7 @@ compare(const url_view_base& other) const noexcept
|
||||
|
||||
if (has_query())
|
||||
{
|
||||
comp = detail::compare_encoded(
|
||||
comp = detail::compare_encoded_query(
|
||||
encoded_query(),
|
||||
other.encoded_query());
|
||||
if ( comp != 0 )
|
||||
|
||||
@@ -26,6 +26,20 @@
|
||||
namespace boost {
|
||||
namespace urls {
|
||||
|
||||
template <bool allow_plus, bool allow_space>
|
||||
struct space_as_plus_test_chars
|
||||
{
|
||||
constexpr
|
||||
bool
|
||||
operator()(char c) const noexcept
|
||||
{
|
||||
return
|
||||
(allow_plus && c == '+') ||
|
||||
(allow_space && c == ' ') ||
|
||||
unreserved_chars(c);
|
||||
}
|
||||
};
|
||||
|
||||
class encode_test
|
||||
{
|
||||
public:
|
||||
@@ -133,8 +147,35 @@ public:
|
||||
" ", test_chars{}, opt, {}) == "+");
|
||||
BOOST_TEST(encode(
|
||||
"A", test_chars{}, opt, {}) == "A");
|
||||
BOOST_TEST(encode(
|
||||
" A+", test_chars{}, opt, {}) == "+A+");
|
||||
BOOST_TEST_EQ(encode(
|
||||
" A+", test_chars{}, opt, {}), "+A%2B");
|
||||
}
|
||||
|
||||
// optimization of space-as-plus when the charset
|
||||
// already includes or excludes plus or space
|
||||
{
|
||||
encoding_opts opt;
|
||||
opt.space_as_plus = true;
|
||||
BOOST_TEST_EQ(
|
||||
encode(
|
||||
"a +",
|
||||
space_as_plus_test_chars<true, true>{},
|
||||
opt), "a+%2B");
|
||||
BOOST_TEST_EQ(
|
||||
encode(
|
||||
"a +",
|
||||
space_as_plus_test_chars<true, false>{},
|
||||
opt), "a+%2B");
|
||||
BOOST_TEST_EQ(
|
||||
encode(
|
||||
"a +",
|
||||
space_as_plus_test_chars<false, true>{},
|
||||
opt), "a+%2B");
|
||||
BOOST_TEST_EQ(
|
||||
encode(
|
||||
"a +",
|
||||
space_as_plus_test_chars<false, false>{},
|
||||
opt), "a+%2B");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -120,8 +120,9 @@ struct params_ref_test
|
||||
check(*r, init);
|
||||
}
|
||||
|
||||
// check that modification produces
|
||||
// the string and correct sequence
|
||||
// check whether modifying s0 via
|
||||
// f produces a URL with the query
|
||||
// string s1 and params init
|
||||
static
|
||||
void
|
||||
check(
|
||||
@@ -153,6 +154,9 @@ struct params_ref_test
|
||||
}
|
||||
}
|
||||
|
||||
// check whether modifying s0 via
|
||||
// f1 and f2 produces a URL with
|
||||
// the query string s1 and params init
|
||||
static
|
||||
void
|
||||
check(
|
||||
@@ -371,7 +375,7 @@ struct params_ref_test
|
||||
{
|
||||
assign(qp, { {"first",nullptr}, {"last",""}, {"full", "John Doe"} });
|
||||
};
|
||||
check(f, g, "", "first&last=&full=John%20Doe",
|
||||
check(f, g, "", "first&last=&full=John+Doe",
|
||||
{ {"first",no_value}, {"last",""}, {"full","John Doe"} });
|
||||
}
|
||||
{
|
||||
@@ -430,11 +434,11 @@ struct params_ref_test
|
||||
{
|
||||
append(qp, { {"first",nullptr}, {"last",""}, {"full", "John Doe"} });
|
||||
};
|
||||
check(f, g, "", "first&last=&full=John%20Doe",
|
||||
check(f, g, "", "first&last=&full=John+Doe",
|
||||
{ {"first",no_value}, {"last",""}, {"full","John Doe"} });
|
||||
check(f, g, "?", "&first&last=&full=John%20Doe",
|
||||
check(f, g, "?", "&first&last=&full=John+Doe",
|
||||
{ {"",no_value}, {"first",no_value}, {"last",""}, {"full","John Doe"} });
|
||||
check(f, g, "?key=value", "key=value&first&last=&full=John%20Doe",
|
||||
check(f, g, "?key=value", "key=value&first&last=&full=John+Doe",
|
||||
{ {"key","value"}, {"first",no_value}, {"last",""}, {"full","John Doe"} });
|
||||
}
|
||||
{
|
||||
@@ -892,6 +896,142 @@ struct params_ref_test
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void
|
||||
testSpaceAsPlus()
|
||||
{
|
||||
// issue #903
|
||||
{
|
||||
// "=?" in key/values
|
||||
{
|
||||
// In the general case, normalized URLs
|
||||
// always decode unreserved chars and encode
|
||||
// reserved chars.
|
||||
// However, normalizing the URL query should
|
||||
// maintain the decoded and encoded "&=+"
|
||||
// because they have different meanings
|
||||
// in a query.
|
||||
// This isn't optional either because
|
||||
// normalization can only mitigate false
|
||||
// negatives, but it should eliminate
|
||||
// false positives.
|
||||
// Making it optional would be allowing
|
||||
// a false positive because there's
|
||||
// at least one very relevant schema (HTTP)
|
||||
// where decoded/encoded "&=+" has different
|
||||
// meanings and represent different resources.
|
||||
urls::url u("https://a/a");
|
||||
params_ref params = u.params();
|
||||
params.append({"&=?", "&=?"});
|
||||
auto it = params.begin();
|
||||
const auto& param = *it;
|
||||
BOOST_TEST_EQ(param.key, "&=?");
|
||||
BOOST_TEST_EQ(param.value, "&=?");
|
||||
BOOST_TEST_EQ(u.buffer(), "https://a/a?%26%3D?=%26=?");
|
||||
u.normalize_query();
|
||||
BOOST_TEST_EQ(u.buffer(), "https://a/a?%26%3D?=%26=?");
|
||||
u.normalize();
|
||||
BOOST_TEST_EQ(u.buffer(), "https://a/a?%26%3D?=%26=?");
|
||||
}
|
||||
|
||||
// opts.space_as_plus = true
|
||||
{
|
||||
// The params_ref object represents the decoded
|
||||
// query parameters, so appending "+" represents
|
||||
// a value that should be decoded as "+" (%2B)
|
||||
// and not an encoded "+" that would be decoded
|
||||
// as space.
|
||||
urls::url u("https://a/a");
|
||||
encoding_opts opts;
|
||||
opts.space_as_plus = true;
|
||||
params_ref params = u.params(opts);
|
||||
params.append({"a+b c", "d+e f"});
|
||||
auto it = params.begin();
|
||||
const auto& param = *it;
|
||||
BOOST_TEST_EQ(param.key, "a+b c");
|
||||
BOOST_TEST_EQ(param.value, "d+e f");
|
||||
BOOST_TEST_EQ(u.buffer(), "https://a/a?a%2Bb+c=d%2Be+f");
|
||||
u.normalize_query();
|
||||
BOOST_TEST_EQ(u.buffer(), "https://a/a?a%2Bb+c=d%2Be+f");
|
||||
}
|
||||
|
||||
// opts.space_as_plus = false
|
||||
{
|
||||
// The params_ref object represents the decoded
|
||||
// query parameters without any special treatment
|
||||
// for "+" and space. "+" can remain as is
|
||||
// and space is represented as "%20".
|
||||
urls::url u("https://a/a");
|
||||
encoding_opts opts;
|
||||
opts.space_as_plus = false;
|
||||
params_ref params = u.params(opts);
|
||||
params.append({"a+b c", "d+e f"});
|
||||
auto it = params.begin();
|
||||
const auto& param = *it;
|
||||
BOOST_TEST_EQ(param.key, "a+b c");
|
||||
BOOST_TEST_EQ(param.value, "d+e f");
|
||||
BOOST_TEST_EQ(u.buffer(), "https://a/a?a+b%20c=d+e%20f");
|
||||
u.normalize_query();
|
||||
BOOST_TEST_EQ(u.buffer(), "https://a/a?a+b%20c=d+e%20f");
|
||||
}
|
||||
|
||||
// comparisons
|
||||
{
|
||||
// We should not consider two URLs equivalent
|
||||
// if the query differs in the way "&=+" is encoded.
|
||||
// u1: no space as plus
|
||||
url u1("https://a/a?%26%3D?=%26=?&a+b%20c=d+e%20f");
|
||||
// u1e: no space as plus, non-separators encoded
|
||||
url u1e("https://a/a?%26%3D?=%26=?&%61+%62%20%63=%64+%65%20%66");
|
||||
// u2: space as plus
|
||||
url u2("https://a/a?%26%3D?=%26=?&a%2Bb+c=d%2Be+f");
|
||||
// u2e: space as plus, non-separators encoded
|
||||
url u2e("https://a/a?%26%3D?=%26=?&%61%2B%62+%63=%64%2Be+%66");
|
||||
// u3: separators decoded too early
|
||||
url u3("https://a/a?&=?=&=?&a%2Bb+c=d%2Be+f");
|
||||
BOOST_TEST_EQ(u1, u1e);
|
||||
BOOST_TEST_NE(u1, u2);
|
||||
BOOST_TEST_EQ(u2, u2e);
|
||||
BOOST_TEST_NE(u2, u3);
|
||||
BOOST_TEST_NE(u1, u3);
|
||||
|
||||
// queries that differ by size
|
||||
url u4("https://a/a?a+b%20c=d+e%20f");
|
||||
url u4longer("https://a/a?%61+%62%20%63=%64+%65%20%66g");
|
||||
BOOST_TEST_NE(u4, u4longer);
|
||||
BOOST_TEST_NE(u4longer, u4);
|
||||
}
|
||||
|
||||
// append other types of any_param_range
|
||||
{
|
||||
url u("https://a/a");
|
||||
params_ref params = u.params();
|
||||
params.append({"a+b c", "d+e f"});
|
||||
params.append({{"a+b c", "d+e f"}, {"a+b c", "d+e f"}});
|
||||
|
||||
// include all other forms of any_param_range
|
||||
|
||||
BOOST_TEST_EQ(params.size(), 3);
|
||||
BOOST_TEST_EQ(u.buffer(),
|
||||
"https://a/a?a%2Bb+c=d%2Be+f&a%2Bb+c=d%2Be+f&a%2Bb+c=d%2Be+f");
|
||||
}
|
||||
|
||||
// when setting the encoded query, %2B should not be encoded
|
||||
{
|
||||
url u("https://a/a");
|
||||
u.set_encoded_query("a+b=a%2Bb");
|
||||
BOOST_TEST_EQ(u.buffer(), "https://a/a?a+b=a%2Bb");
|
||||
}
|
||||
|
||||
// when setting the decoded query, no space as plus is assumed
|
||||
{
|
||||
url u("https://a/a");
|
||||
u.set_encoded_query("a+b=a%2Bb");
|
||||
BOOST_TEST_EQ(u.buffer(), "https://a/a?a+b=a%2Bb");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void
|
||||
testAll()
|
||||
@@ -900,6 +1040,7 @@ struct params_ref_test
|
||||
testObservers();
|
||||
testModifiers();
|
||||
testJavadocs();
|
||||
testSpaceAsPlus();
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
Reference in New Issue
Block a user