2
0
mirror of https://github.com/boostorg/url.git synced 2026-01-19 04:42:15 +00:00

fix: consistent behavior for space-as-plus option

This commit refactors all functions so they have consistent behavior for the space-as-plus encoding option.

- any_params_iter objects store and apply the appropriate option when measuring and copying
- when the option is enabled, encoding functions encode space-as-plus and plus as %2B regardless of the charset
- normalization and comparison algorithms take into consideration special query chars whose meaning changes depending on encoding
- all params_view objects created with default options enable space-as-plus encoding

fix #903
This commit is contained in:
Alan de Freitas
2025-05-14 15:26:32 -05:00
parent c5d11a9c0e
commit 64859a8fc2
22 changed files with 533 additions and 150 deletions

View File

@@ -57,8 +57,8 @@ jobs:
id: cpp-matrix
with:
compilers: |
gcc >=4.8
clang >=3.8
gcc >=4.8 <15
clang >=3.8 <20
msvc >=14.20
apple-clang *
mingw *

View File

@@ -63,8 +63,7 @@ public:
void
rewind() noexcept = 0;
// Measure and increment current element
// element.
// Measure and increment current element.
// Returns false on end of range.
// n is increased by encoded size.
// Can throw on bad percent-escape
@@ -84,19 +83,19 @@ public:
//------------------------------------------------
//
// query_iter
// query_string_iter
//
//------------------------------------------------
// A string of plain query params
struct BOOST_SYMBOL_VISIBLE
query_iter
query_string_iter
: any_params_iter
{
// ne = never empty
BOOST_URL_DECL
explicit
query_iter(
query_string_iter(
core::string_view s,
bool ne = false) noexcept;
@@ -121,16 +120,18 @@ private:
// A 1-param range allowing
// self-intersection
struct BOOST_SYMBOL_VISIBLE
param_iter
single_param_iter
: any_params_iter
{
explicit
param_iter(
param_view const&) noexcept;
single_param_iter(
param_view const&,
bool space_as_plus) noexcept;
private:
bool has_value_;
bool at_end_ = false;
bool space_as_plus_ = false;
void rewind() noexcept override;
bool measure(std::size_t&) noexcept override;
@@ -145,10 +146,15 @@ private:
struct params_iter_base
{
bool space_as_plus_ = true;
protected:
explicit params_iter_base(
bool space_as_plus) noexcept
: space_as_plus_(space_as_plus)
{}
// return encoded size
BOOST_URL_DECL
static
void
measure_impl(
std::size_t& n,
@@ -156,7 +162,6 @@ protected:
// encode to dest
BOOST_URL_DECL
static
void
copy_impl(
char*& dest,
@@ -180,9 +185,11 @@ struct params_iter
params_iter(
FwdIt first,
FwdIt last) noexcept
FwdIt last,
bool space_as_plus) noexcept
: any_params_iter(
first == last)
, params_iter_base(space_as_plus)
, it0_(first)
, it_(first)
, end_(last)
@@ -404,10 +411,10 @@ private:
template<class FwdIt>
params_iter<FwdIt>
make_params_iter(
FwdIt first, FwdIt last)
FwdIt first, FwdIt last, bool space_as_plus)
{
return params_iter<
FwdIt>(first, last);
FwdIt>(first, last, space_as_plus);
}
template<class FwdIt>

View File

@@ -31,6 +31,9 @@ constexpr char const* const empty_c_str_ = "";
// This is the private 'guts' of a
// url_view, exposed so different parts
// of the implementation can work on it.
// It stores the offsets and properties of
// a URL string stored elsewhere and pointed
// to by cs_.
struct BOOST_URL_DECL url_impl : parts_base
{
static
@@ -139,8 +142,9 @@ public:
//------------------------------------------------
// this allows a params to come from a
// url_impl or a separate core::string_view
// This class represents a query string, which
// can originate from either an url_impl object
// or an independent core::string_view.
class BOOST_URL_DECL query_ref
: private parts_base
{

View File

@@ -44,7 +44,7 @@ namespace urls {
@param s The string to measure.
@param unreserved The set of characters
@param allowed The set of characters
that is not percent-encoded.
@param opt The options for encoding. If
@@ -64,7 +64,7 @@ template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
std::size_t
encoded_size(
core::string_view s,
CS const& unreserved,
CS const& allowed,
encoding_opts opt = {}) noexcept;
//------------------------------------------------
@@ -100,7 +100,7 @@ encoded_size(
@param s The string to encode.
@param unreserved The set of characters
@param allowed The set of characters
that is not percent-encoded.
@param opt The options for encoding. If
@@ -122,7 +122,7 @@ encode(
char* dest,
std::size_t size,
core::string_view s,
CS const& unreserved,
CS const& allowed,
encoding_opts opt = {});
#ifndef BOOST_URL_DOCS
@@ -133,7 +133,7 @@ encode_unsafe(
char* dest,
std::size_t size,
core::string_view s,
CS const& unreserved,
CS const& allowed,
encoding_opts opt);
#endif
@@ -162,7 +162,7 @@ encode_unsafe(
@param s The string to encode.
@param unreserved The set of characters
@param allowed The set of characters
that is not percent-encoded.
@param opt The options for encoding. If
@@ -186,7 +186,7 @@ template<
BOOST_URL_STRTOK_RETURN
encode(
core::string_view s,
CS const& unreserved,
CS const& allowed,
encoding_opts opt = {},
StringToken&& token = {}) noexcept;

View File

@@ -32,18 +32,59 @@ struct encoding_opts
{
/** True if spaces encode to and from plus signs
This option controls whether or not
Although not prescribed by RFC 3986,
many applications decode plus signs
in URL queries as spaces. In particular,
the form-urlencoded Media Type in HTML
for submitting forms uses this convention.
This option controls whether
the PLUS character ("+") is used to
represent the SP character (" ") when
encoding or decoding.
Although not prescribed by the RFC, plus
signs are commonly treated as spaces upon
decoding when used in the query of URLs
using well known schemes such as HTTP.
When this option is `true`, both the
encoded SP ("%20") and the PLUS
character ("+") represent a space (" ")
when decoding. To represent a plus sign,
its encoded form ("%2B") is used.
The @ref encode and @ref encode_size functions
will encode spaces as plus signs when
this option is `true`, regardless of the
allowed character set. They will also
encode plus signs as "%2B" when this
option is `true`, regardless of the
allowed character set.
Note that when a URL is normalized,
all unreserved percent-encoded characters are
replaced with their unreserved equivalents.
However, normalizing the URL query maintains
the decoded and encoded "&=+" as they are
because they might have different meanings.
This behavior is not optional because
normalization can only mitigate false
negatives, but it should eliminate
false positives.
Making it optional would allow
a false positive because there's
at least one very relevant schema (HTTP)
where a decoded or encoded "&=+" has different
meanings and represents different resources.
The same considerations apply to URL comparison
algorithms in the library, as they treat URLs
as if they were normalized.
@par Specification
@li <a href="https://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1">
application/x-www-form-urlencoded (w3.org)</a>
@li <a href="https://datatracker.ietf.org/doc/html/rfc1866#section-8.2.1">
The form-urlencoded Media Type (RFC 1866)</a>
@li <a href="https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2.2">
Section 6.2.2.2. Percent-Encoding Normalization (RFC 3986)</a>
*/
bool space_as_plus = false;

View File

@@ -30,14 +30,15 @@ template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
std::size_t
encoded_size(
core::string_view s,
CS const& unreserved,
CS const& allowed,
encoding_opts opt) noexcept
{
/* If you get a compile error here, it
means that the value you passed does
not meet the requirements stated in
the documentation.
*/
/*
If you get a compilation error here, it
means that the value you passed does
not meet the requirements stated in
the documentation.
*/
BOOST_STATIC_ASSERT(
grammar::is_charset<CS>::value);
@@ -45,29 +46,49 @@ encoded_size(
auto it = s.data();
auto const last = it + s.size();
if(! opt.space_as_plus ||
unreserved(' '))
if (!opt.space_as_plus)
{
while(it != last)
while (it != last)
{
if(unreserved(*it))
n += 1;
char const c = *it;
if (allowed(c))
{
++n;
}
else
{
n += 3;
}
++it;
}
}
else
{
while(it != last)
// '+' is always encoded (thus
// spending 3 chars) even if
// allowed because "%2B" and
// "+" have different meanings
// when space as plus is enabled
using FNT = bool (*)(CS const& allowed, char);
FNT takes_one_char =
allowed('+') ?
(allowed(' ') ?
FNT([](CS const& allowed, char c){ return allowed(c) && c != '+'; }) :
FNT([](CS const& allowed, char c){ return (allowed(c) || c == ' ') && c != '+'; })) :
(allowed(' ') ?
FNT([](CS const& allowed, char c){ return allowed(c); }) :
FNT([](CS const& allowed, char c){ return allowed(c) || c == ' '; }));
while (it != last)
{
auto c = *it;
if(unreserved(c))
++n;
else if(c == ' ')
char const c = *it;
if (takes_one_char(allowed, c))
{
++n;
}
else
{
n += 3;
}
++it;
}
}
@@ -82,10 +103,10 @@ encode(
char* dest,
std::size_t size,
core::string_view s,
CS const& unreserved,
CS const& allowed,
encoding_opts opt)
{
/* If you get a compile error here, it
/* If you get a compilation error here, it
means that the value you passed does
not meet the requirements stated in
the documentation.
@@ -94,7 +115,7 @@ encode(
grammar::is_charset<CS>::value);
// '%' must be reserved
BOOST_ASSERT(! unreserved('%'));
BOOST_ASSERT(!allowed('%'));
char const* const hex =
detail::hexdigs[opt.lower_case];
@@ -113,42 +134,32 @@ encode(
auto const dest0 = dest;
auto const end3 = end - 3;
if(! opt.space_as_plus)
if (!opt.space_as_plus)
{
while(it != last)
{
if(unreserved(*it))
char const c = *it;
if (allowed(c))
{
if(dest == end)
return dest - dest0;
*dest++ = *it++;
*dest++ = c;
++it;
continue;
}
if(dest > end3)
if (dest > end3)
return dest - dest0;
encode(dest, *it++);
encode(dest, c);
++it;
}
return dest - dest0;
}
else if(! unreserved(' '))
else
{
// VFALCO space is usually reserved,
// and we depend on this for an
// optimization. if this assert
// goes off we can split the loop
// below into two versions.
BOOST_ASSERT(! unreserved(' '));
while(it != last)
while (it != last)
{
if(unreserved(*it))
{
if(dest == end)
return dest - dest0;
*dest++ = *it++;
continue;
}
if(*it == ' ')
char const c = *it;
if (c == ' ')
{
if(dest == end)
return dest - dest0;
@@ -156,9 +167,20 @@ encode(
++it;
continue;
}
else if (
allowed(c) &&
c != '+')
{
if(dest == end)
return dest - dest0;
*dest++ = c;
++it;
continue;
}
if(dest > end3)
return dest - dest0;
encode(dest, *it++);
encode(dest, c);
++it;
}
}
return dest - dest0;
@@ -175,14 +197,14 @@ encode_unsafe(
char* dest,
std::size_t size,
core::string_view s,
CS const& unreserved,
CS const& allowed,
encoding_opts opt)
{
BOOST_STATIC_ASSERT(
grammar::is_charset<CS>::value);
// '%' must be reserved
BOOST_ASSERT(! unreserved('%'));
BOOST_ASSERT(!allowed('%'));
auto it = s.data();
auto const last = it + s.size();
@@ -204,42 +226,44 @@ encode_unsafe(
};
auto const dest0 = dest;
if(! opt.space_as_plus)
if (!opt.space_as_plus)
{
while(it != last)
{
BOOST_ASSERT(dest != end);
if(unreserved(*it))
*dest++ = *it++;
char const c = *it;
if(allowed(c))
{
*dest++ = c;
}
else
encode(dest, *it++);
{
encode(dest, c);
}
++it;
}
}
else
{
// VFALCO space is usually reserved,
// and we depend on this for an
// optimization. if this assert
// goes off we can split the loop
// below into two versions.
BOOST_ASSERT(! unreserved(' '));
while(it != last)
{
BOOST_ASSERT(dest != end);
if(unreserved(*it))
{
*dest++ = *it++;
}
else if(*it == ' ')
char const c = *it;
if (c == ' ')
{
*dest++ = '+';
++it;
}
else if (
allowed(c) &&
c != '+')
{
*dest++ = c;
}
else
{
encode(dest, *it++);
encode(dest, c);
}
++it;
}
}
return dest - dest0;
@@ -253,7 +277,7 @@ template<
BOOST_URL_STRTOK_RETURN
encode(
core::string_view s,
CS const& unreserved,
CS const& allowed,
encoding_opts opt,
StringToken&& token) noexcept
{
@@ -261,11 +285,11 @@ encode(
grammar::is_charset<CS>::value);
auto const n = encoded_size(
s, unreserved, opt);
s, allowed, opt);
auto p = token.prepare(n);
if(n > 0)
encode_unsafe(
p, n, s, unreserved, opt);
p, n, s, allowed, opt);
return token.result();
}

View File

@@ -192,7 +192,7 @@ replace(
u_->edit_params(
from.it_, to.it_,
detail::make_params_iter(
first, last)),
first, last, opt_.space_as_plus)),
opt_);
}
@@ -212,7 +212,7 @@ assign(FwdIt first, FwdIt last,
begin().it_,
end().it_,
detail::make_params_iter(
first, last));
first, last, opt_.space_as_plus));
}
template<class FwdIt>
@@ -230,7 +230,7 @@ insert(
before.it_,
before.it_,
detail::make_params_iter(
first, last)),
first, last, opt_.space_as_plus)),
opt_);
}

View File

@@ -387,7 +387,11 @@ private:
Depending on where the object was obtained,
the strings may or may not contain percent
escapes.
escapes. Some functions and objects might
expect encoded strings in this view, while
others expect decoded strings. The caller
should be aware of the context in which
the object will be used.
For most usages, key comparisons are
case-sensitive and duplicate keys in

View File

@@ -29,11 +29,13 @@ namespace urls {
# pragma warning(disable: 4251)
#endif
/** Common functionality for containers
/** Common functionality for query parameter containers
This base class is used by the library
The library uses this base class
to provide common member functions for
containers. This cannot be instantiated
containers of query parameters.
This class should not be instantiated
directly; Instead, use one of the
containers or functions:

View File

@@ -2918,9 +2918,27 @@ private:
detail::any_params_iter&&) ->
detail::params_iter_impl;
// Decode any unnecessary percent-escapes
// and ensures hexadecimals are uppercase.
// The encoding of ignored characters is
// preserved.
template
<class AllowedCharSet,
class IgnoredCharSet>
void
normalize_octets_impl(
int,
AllowedCharSet const& allowed,
IgnoredCharSet const& ignored,
op_t&) noexcept;
template<class CharSet>
void normalize_octets_impl(int,
CharSet const& allowed, op_t&) noexcept;
void
normalize_octets_impl(
int,
CharSet const& allowed,
op_t&) noexcept;
void decoded_to_lower_impl(int id) noexcept;
void to_lower_impl(int id) noexcept;
};

View File

@@ -44,8 +44,8 @@ any_params_iter::
//
//------------------------------------------------
query_iter::
query_iter(
query_string_iter::
query_string_iter(
core::string_view s,
bool ne) noexcept
: any_params_iter(
@@ -55,7 +55,7 @@ query_iter(
}
void
query_iter::
query_string_iter::
rewind() noexcept
{
if(empty)
@@ -81,7 +81,7 @@ rewind() noexcept
}
bool
query_iter::
query_string_iter::
measure(
std::size_t& n) noexcept
{
@@ -101,7 +101,7 @@ measure(
}
void
query_iter::
query_string_iter::
copy(
char*& dest,
char const* end) noexcept
@@ -122,7 +122,7 @@ copy(
}
void
query_iter::
query_string_iter::
increment() noexcept
{
p_ += n_;
@@ -146,32 +146,34 @@ increment() noexcept
//
//------------------------------------------------
param_iter::
param_iter(
param_view const& p) noexcept
single_param_iter::
single_param_iter(
param_view const& p,
bool space_as_plus) noexcept
: any_params_iter(
false,
p.key,
p.value)
, has_value_(p.has_value)
, space_as_plus_(space_as_plus)
{
}
void
param_iter::
single_param_iter::
rewind() noexcept
{
at_end_ = false;
}
bool
param_iter::
single_param_iter::
measure(std::size_t& n) noexcept
{
if(at_end_)
return false;
encoding_opts opt;
opt.space_as_plus = false;
opt.space_as_plus = space_as_plus_;
n += encoded_size(
s0,
detail::param_key_chars,
@@ -189,21 +191,21 @@ measure(std::size_t& n) noexcept
}
void
param_iter::
single_param_iter::
copy(
char*& dest,
char const* end) noexcept
{
BOOST_ASSERT(! at_end_);
encoding_opts opt;
opt.space_as_plus = false;
opt.space_as_plus = space_as_plus_;
dest += encode(
dest,
end - dest,
s0,
detail::param_key_chars,
opt);
if(has_value_)
if (has_value_)
{
*dest++ = '=';
dest += encode(
@@ -228,7 +230,7 @@ measure_impl(
param_view const& p) noexcept
{
encoding_opts opt;
opt.space_as_plus = false;
opt.space_as_plus = space_as_plus_;
n += encoded_size(
p.key,
detail::param_key_chars,
@@ -251,7 +253,7 @@ copy_impl(
param_view const& p) noexcept
{
encoding_opts opt;
opt.space_as_plus = false;
opt.space_as_plus = space_as_plus_;
dest += encode(
dest,
end - dest,

View File

@@ -14,6 +14,7 @@
#include "decode.hpp"
#include <boost/url/segments_encoded_view.hpp>
#include <boost/url/grammar/ci_string.hpp>
#include <boost/url/grammar/lut_chars.hpp>
#include <boost/assert.hpp>
#include <boost/core/ignore_unused.hpp>
#include <cstring>
@@ -74,6 +75,60 @@ compare_encoded(
return 1;
}
int
compare_encoded_query(
core::string_view lhs,
core::string_view rhs) noexcept
{
static constexpr
grammar::lut_chars
query_compare_exception_lut = "&=+";
std::size_t n0 = 0;
std::size_t n1 = 0;
char c0 = 0;
char c1 = 0;
while(
!lhs.empty() &&
!rhs.empty())
{
bool const lhs_was_decoded = lhs.front() != '%';
bool const rhs_was_decoded = rhs.front() != '%';
pop_encoded_front(lhs, c0, n0);
pop_encoded_front(rhs, c1, n1);
if (c0 < c1)
return -1;
if (c1 < c0)
return 1;
// The decoded chars are the same, but
// are these query exceptions that have
// different meanings when decoded?
if (query_compare_exception_lut(c0))
{
// If so, we only continue if both
// chars were decoded or encoded
// the same way.
if (lhs_was_decoded == rhs_was_decoded)
continue;
// Otherwise, we return a value != 0
// because these chars are not equal.
// If rhs was the decoded one, it contains
// an ascii char higher than '%'
if (rhs_was_decoded)
return -1;
else
return 1;
}
}
n0 += detail::decode_bytes_unsafe(lhs);
n1 += detail::decode_bytes_unsafe(rhs);
if (n0 == n1)
return 0;
if (n0 < n1)
return -1;
return 1;
}
void
digest_encoded(
core::string_view s,

View File

@@ -80,6 +80,16 @@ compare_encoded(
core::string_view lhs,
core::string_view rhs) noexcept;
// compare two core::string_views as if they are both
// percent-decoded but do not consider the special
// query chars ("&=+") equivalent unless they are
// both decoded or encoded the same way, because
// that gives them different meanings
int
compare_encoded_query(
core::string_view lhs,
core::string_view rhs) noexcept;
// digest a core::string_view as if it were
// percent-decoded
void

View File

@@ -180,7 +180,7 @@ params_base::
end() const noexcept ->
iterator
{
return iterator(ref_, opt_, 0);
return {ref_, opt_, 0};
}
//------------------------------------------------

View File

@@ -239,7 +239,7 @@ erase(
return u_->edit_params(
first.it_,
last.it_,
detail::query_iter(s));
detail::query_string_iter(s));
}
} // urls

View File

@@ -36,7 +36,7 @@ params_encoded_view::
operator
params_view() const noexcept
{
return { ref_, encoding_opts{} };
return { ref_, encoding_opts{ true, false, false} };
}
} // urls

View File

@@ -66,12 +66,12 @@ insert(
param_view const& p) ->
iterator
{
return iterator(
return {
u_->edit_params(
before.it_,
before.it_,
detail::param_iter(p)),
opt_);
detail::single_param_iter(p, opt_.space_as_plus)),
opt_};
}
auto
@@ -130,7 +130,7 @@ replace(
u_->edit_params(
pos.it_,
std::next(pos).it_,
detail::param_iter(p)),
detail::single_param_iter(p, opt_.space_as_plus)),
opt_);
}
@@ -232,7 +232,7 @@ erase(
u_->edit_params(
first.it_,
last.it_,
detail::query_iter(s)),
detail::query_string_iter(s)),
opt_);
}

View File

@@ -18,6 +18,20 @@ namespace boost {
namespace urls {
namespace detail {
struct empty_chars_t
{
constexpr
bool
operator()(char) const noexcept
{
return false;
}
};
constexpr
empty_chars_t
empty_chars{};
constexpr
auto
user_chars =
@@ -58,6 +72,11 @@ auto
query_chars =
pchars + '/' + '?' + '[' + ']';
constexpr
grammar::lut_chars
query_ignore_chars =
"&=+";
constexpr
auto
param_key_chars = pchars

View File

@@ -1328,7 +1328,7 @@ set_query(
edit_params(
detail::params_iter_impl(impl_),
detail::params_iter_impl(impl_, 0),
detail::query_iter(s, true));
detail::query_string_iter(s, true));
return *this;
}
@@ -1401,7 +1401,7 @@ params_ref
url_base::
params(encoding_opts opt) noexcept
{
return params_ref(*this, opt);
return {*this, opt};
}
params_encoded_ref
@@ -1635,12 +1635,15 @@ resolve(
//
//------------------------------------------------
template <class Charset>
template <
class AllowedCharset,
class IgnoredCharset>
void
url_base::
normalize_octets_impl(
int id,
Charset const& allowed,
AllowedCharset const& allowed,
IgnoredCharset const& ignored,
op_t& op) noexcept
{
char* it = s_ + impl_.offset(id);
@@ -1660,7 +1663,8 @@ normalize_octets_impl(
// decode unreserved octets
d = detail::decode_one(it + 1);
if (allowed(d))
if (allowed(d) &&
!ignored(d))
{
*dest = d;
it += 3;
@@ -1683,6 +1687,18 @@ normalize_octets_impl(
}
}
template<class CharSet>
void
url_base::
normalize_octets_impl(
int idx,
CharSet const& allowed,
op_t& op) noexcept
{
return normalize_octets_impl(
idx, allowed, detail::empty_chars, op);
}
url_base&
url_base::
normalize_scheme()
@@ -1884,7 +1900,10 @@ normalize_query()
{
op_t op(*this);
normalize_octets_impl(
id_query, detail::query_chars, op);
id_query,
detail::query_chars,
detail::query_ignore_chars,
op);
return *this;
}
@@ -2647,20 +2666,16 @@ edit_params(
auto pos1 = pos0 + it1.pos;
pos0 = pos0 + it0.pos;
// Iterator doesn't belong to this url
// Iterators belong to this url
BOOST_ASSERT(it0.ref.alias_of(impl_));
// Iterator doesn't belong to this url
BOOST_ASSERT(it1.ref.alias_of(impl_));
// Iterator is in the wrong order
// Iterators is in the right order
BOOST_ASSERT(it0.index <= it1.index);
// Iterator is out of range
// Iterators are within range
BOOST_ASSERT(it0.index <= impl_.nparam_);
BOOST_ASSERT(pos0 <= impl_.offset(id_frag));
// Iterator is out of range
BOOST_ASSERT(it1.index <= impl_.nparam_);
BOOST_ASSERT(pos1 <= impl_.offset(id_frag));

View File

@@ -701,7 +701,7 @@ compare(const url_view_base& other) const noexcept
if (has_query())
{
comp = detail::compare_encoded(
comp = detail::compare_encoded_query(
encoded_query(),
other.encoded_query());
if ( comp != 0 )

View File

@@ -26,6 +26,20 @@
namespace boost {
namespace urls {
template <bool allow_plus, bool allow_space>
struct space_as_plus_test_chars
{
constexpr
bool
operator()(char c) const noexcept
{
return
(allow_plus && c == '+') ||
(allow_space && c == ' ') ||
unreserved_chars(c);
}
};
class encode_test
{
public:
@@ -133,8 +147,35 @@ public:
" ", test_chars{}, opt, {}) == "+");
BOOST_TEST(encode(
"A", test_chars{}, opt, {}) == "A");
BOOST_TEST(encode(
" A+", test_chars{}, opt, {}) == "+A+");
BOOST_TEST_EQ(encode(
" A+", test_chars{}, opt, {}), "+A%2B");
}
// optimization of space-as-plus when the charset
// already includes or excludes plus or space
{
encoding_opts opt;
opt.space_as_plus = true;
BOOST_TEST_EQ(
encode(
"a +",
space_as_plus_test_chars<true, true>{},
opt), "a+%2B");
BOOST_TEST_EQ(
encode(
"a +",
space_as_plus_test_chars<true, false>{},
opt), "a+%2B");
BOOST_TEST_EQ(
encode(
"a +",
space_as_plus_test_chars<false, true>{},
opt), "a+%2B");
BOOST_TEST_EQ(
encode(
"a +",
space_as_plus_test_chars<false, false>{},
opt), "a+%2B");
}
}

View File

@@ -120,8 +120,9 @@ struct params_ref_test
check(*r, init);
}
// check that modification produces
// the string and correct sequence
// check whether modifying s0 via
// f produces a URL with the query
// string s1 and params init
static
void
check(
@@ -153,6 +154,9 @@ struct params_ref_test
}
}
// check whether modifying s0 via
// f1 and f2 produces a URL with
// the query string s1 and params init
static
void
check(
@@ -371,7 +375,7 @@ struct params_ref_test
{
assign(qp, { {"first",nullptr}, {"last",""}, {"full", "John Doe"} });
};
check(f, g, "", "first&last=&full=John%20Doe",
check(f, g, "", "first&last=&full=John+Doe",
{ {"first",no_value}, {"last",""}, {"full","John Doe"} });
}
{
@@ -430,11 +434,11 @@ struct params_ref_test
{
append(qp, { {"first",nullptr}, {"last",""}, {"full", "John Doe"} });
};
check(f, g, "", "first&last=&full=John%20Doe",
check(f, g, "", "first&last=&full=John+Doe",
{ {"first",no_value}, {"last",""}, {"full","John Doe"} });
check(f, g, "?", "&first&last=&full=John%20Doe",
check(f, g, "?", "&first&last=&full=John+Doe",
{ {"",no_value}, {"first",no_value}, {"last",""}, {"full","John Doe"} });
check(f, g, "?key=value", "key=value&first&last=&full=John%20Doe",
check(f, g, "?key=value", "key=value&first&last=&full=John+Doe",
{ {"key","value"}, {"first",no_value}, {"last",""}, {"full","John Doe"} });
}
{
@@ -892,6 +896,142 @@ struct params_ref_test
}
}
static
void
testSpaceAsPlus()
{
// issue #903
{
// "=?" in key/values
{
// In the general case, normalized URLs
// always decode unreserved chars and encode
// reserved chars.
// However, normalizing the URL query should
// maintain the decoded and encoded "&=+"
// because they have different meanings
// in a query.
// This isn't optional either because
// normalization can only mitigate false
// negatives, but it should eliminate
// false positives.
// Making it optional would be allowing
// a false positive because there's
// at least one very relevant schema (HTTP)
// where decoded/encoded "&=+" has different
// meanings and represent different resources.
urls::url u("https://a/a");
params_ref params = u.params();
params.append({"&=?", "&=?"});
auto it = params.begin();
const auto& param = *it;
BOOST_TEST_EQ(param.key, "&=?");
BOOST_TEST_EQ(param.value, "&=?");
BOOST_TEST_EQ(u.buffer(), "https://a/a?%26%3D?=%26=?");
u.normalize_query();
BOOST_TEST_EQ(u.buffer(), "https://a/a?%26%3D?=%26=?");
u.normalize();
BOOST_TEST_EQ(u.buffer(), "https://a/a?%26%3D?=%26=?");
}
// opts.space_as_plus = true
{
// The params_ref object represents the decoded
// query parameters, so appending "+" represents
// a value that should be decoded as "+" (%2B)
// and not an encoded "+" that would be decoded
// as space.
urls::url u("https://a/a");
encoding_opts opts;
opts.space_as_plus = true;
params_ref params = u.params(opts);
params.append({"a+b c", "d+e f"});
auto it = params.begin();
const auto& param = *it;
BOOST_TEST_EQ(param.key, "a+b c");
BOOST_TEST_EQ(param.value, "d+e f");
BOOST_TEST_EQ(u.buffer(), "https://a/a?a%2Bb+c=d%2Be+f");
u.normalize_query();
BOOST_TEST_EQ(u.buffer(), "https://a/a?a%2Bb+c=d%2Be+f");
}
// opts.space_as_plus = false
{
// The params_ref object represents the decoded
// query parameters without any special treatment
// for "+" and space. "+" can remain as is
// and space is represented as "%20".
urls::url u("https://a/a");
encoding_opts opts;
opts.space_as_plus = false;
params_ref params = u.params(opts);
params.append({"a+b c", "d+e f"});
auto it = params.begin();
const auto& param = *it;
BOOST_TEST_EQ(param.key, "a+b c");
BOOST_TEST_EQ(param.value, "d+e f");
BOOST_TEST_EQ(u.buffer(), "https://a/a?a+b%20c=d+e%20f");
u.normalize_query();
BOOST_TEST_EQ(u.buffer(), "https://a/a?a+b%20c=d+e%20f");
}
// comparisons
{
// We should not consider two URLs equivalent
// if the query differs in the way "&=+" is encoded.
// u1: no space as plus
url u1("https://a/a?%26%3D?=%26=?&a+b%20c=d+e%20f");
// u1e: no space as plus, non-separators encoded
url u1e("https://a/a?%26%3D?=%26=?&%61+%62%20%63=%64+%65%20%66");
// u2: space as plus
url u2("https://a/a?%26%3D?=%26=?&a%2Bb+c=d%2Be+f");
// u2e: space as plus, non-separators encoded
url u2e("https://a/a?%26%3D?=%26=?&%61%2B%62+%63=%64%2Be+%66");
// u3: separators decoded too early
url u3("https://a/a?&=?=&=?&a%2Bb+c=d%2Be+f");
BOOST_TEST_EQ(u1, u1e);
BOOST_TEST_NE(u1, u2);
BOOST_TEST_EQ(u2, u2e);
BOOST_TEST_NE(u2, u3);
BOOST_TEST_NE(u1, u3);
// queries that differ by size
url u4("https://a/a?a+b%20c=d+e%20f");
url u4longer("https://a/a?%61+%62%20%63=%64+%65%20%66g");
BOOST_TEST_NE(u4, u4longer);
BOOST_TEST_NE(u4longer, u4);
}
// append other types of any_param_range
{
url u("https://a/a");
params_ref params = u.params();
params.append({"a+b c", "d+e f"});
params.append({{"a+b c", "d+e f"}, {"a+b c", "d+e f"}});
// include all other forms of any_param_range
BOOST_TEST_EQ(params.size(), 3);
BOOST_TEST_EQ(u.buffer(),
"https://a/a?a%2Bb+c=d%2Be+f&a%2Bb+c=d%2Be+f&a%2Bb+c=d%2Be+f");
}
// when setting the encoded query, %2B should not be encoded
{
url u("https://a/a");
u.set_encoded_query("a+b=a%2Bb");
BOOST_TEST_EQ(u.buffer(), "https://a/a?a+b=a%2Bb");
}
// when setting the decoded query, no space as plus is assumed
{
url u("https://a/a");
u.set_encoded_query("a+b=a%2Bb");
BOOST_TEST_EQ(u.buffer(), "https://a/a?a+b=a%2Bb");
}
}
}
static
void
testAll()
@@ -900,6 +1040,7 @@ struct params_ref_test
testObservers();
testModifiers();
testJavadocs();
testSpaceAsPlus();
}
void