fix: consistent behavior for space-as-plus option

This commit refactors all functions so they have consistent behavior for the space-as-plus encoding option. - any_params_iter objects store and apply the appropriate option when measuring and copying - when the option is enabled, encoding functions encode space-as-plus and plus as %2B regardless of the charset - normalization and comparison algorithms take into consideration special query chars whose meaning changes depending on encoding - all params_view objects created with default options enable space-as-plus encoding fix #903
2026-01-19 04:42:15 +00:00 · 2025-05-14 15:26:32 -05:00
parent c5d11a9c0e
commit 64859a8fc2
22 changed files with 533 additions and 150 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -57,8 +57,8 @@ jobs:
        id: cpp-matrix
        with:
          compilers: |
-            gcc >=4.8
-            clang >=3.8
+            gcc >=4.8 <15
+            clang >=3.8 <20
            msvc >=14.20
            apple-clang *
            mingw *
--- a/include/boost/url/detail/any_params_iter.hpp
+++ b/include/boost/url/detail/any_params_iter.hpp
@@ -63,8 +63,7 @@ public:
    void
    rewind() noexcept = 0;

-    // Measure and increment current element
-    // element.
+    // Measure and increment current element.
    // Returns false on end of range.
    // n is increased by encoded size.
    // Can throw on bad percent-escape
@@ -84,19 +83,19 @@ public:

 //------------------------------------------------
 //
-// query_iter
+// query_string_iter
 //
 //------------------------------------------------

 // A string of plain query params
 struct BOOST_SYMBOL_VISIBLE
-    query_iter
+    query_string_iter
    : any_params_iter
 {
    // ne = never empty
    BOOST_URL_DECL
    explicit
-    query_iter(
+    query_string_iter(
        core::string_view s,
        bool ne = false) noexcept;

@@ -121,16 +120,18 @@ private:
 // A 1-param range allowing
 // self-intersection
 struct BOOST_SYMBOL_VISIBLE
-    param_iter
+    single_param_iter
    : any_params_iter
 {
    explicit
-    param_iter(
-        param_view const&) noexcept;
+    single_param_iter(
+        param_view const&,
+        bool space_as_plus) noexcept;

 private:
    bool has_value_;
    bool at_end_ = false;
+    bool space_as_plus_ = false;

    void rewind() noexcept override;
    bool measure(std::size_t&) noexcept override;
@@ -145,10 +146,15 @@ private:

 struct params_iter_base
 {
+    bool space_as_plus_ = true;
 protected:
+    explicit params_iter_base(
+        bool space_as_plus) noexcept
+        : space_as_plus_(space_as_plus)
+        {}
+
    // return encoded size
    BOOST_URL_DECL
-    static
    void
    measure_impl(
        std::size_t& n,
@@ -156,7 +162,6 @@ protected:

    // encode to dest
    BOOST_URL_DECL
-    static
    void
    copy_impl(
        char*& dest,
@@ -180,9 +185,11 @@ struct params_iter

    params_iter(
        FwdIt first,
-        FwdIt last) noexcept
+        FwdIt last,
+        bool space_as_plus) noexcept
        : any_params_iter(
            first == last)
+        , params_iter_base(space_as_plus)
        , it0_(first)
        , it_(first)
        , end_(last)
@@ -404,10 +411,10 @@ private:
 template<class FwdIt>
 params_iter<FwdIt>
 make_params_iter(
-    FwdIt first, FwdIt last)
+    FwdIt first, FwdIt last, bool space_as_plus)
 {
    return params_iter<
-        FwdIt>(first, last);
+        FwdIt>(first, last, space_as_plus);
 }

 template<class FwdIt>
--- a/include/boost/url/detail/url_impl.hpp
+++ b/include/boost/url/detail/url_impl.hpp
@@ -31,6 +31,9 @@ constexpr char const* const empty_c_str_ = "";
 // This is the private 'guts' of a
 // url_view, exposed so different parts
 // of the implementation can work on it.
+// It stores the offsets and properties of
+// a URL string stored elsewhere and pointed
+// to by cs_.
 struct BOOST_URL_DECL url_impl : parts_base
 {
    static
@@ -139,8 +142,9 @@ public:

 //------------------------------------------------

-// this allows a params to come from a
-// url_impl or a separate core::string_view
+// This class represents a query string, which
+// can originate from either an url_impl object
+// or an independent core::string_view.
 class BOOST_URL_DECL query_ref
    : private parts_base
 {
--- a/include/boost/url/encode.hpp
+++ b/include/boost/url/encode.hpp
@@ -44,7 +44,7 @@ namespace urls {

    @param s The string to measure.

-    @param unreserved The set of characters
+    @param allowed The set of characters
    that is not percent-encoded.

    @param opt The options for encoding. If
@@ -64,7 +64,7 @@ template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
 std::size_t
 encoded_size(
    core::string_view s,
-    CS const& unreserved,
+    CS const& allowed,
    encoding_opts opt = {}) noexcept;

 //------------------------------------------------
@@ -100,7 +100,7 @@ encoded_size(

    @param s The string to encode.

-    @param unreserved The set of characters
+    @param allowed The set of characters
    that is not percent-encoded.

    @param opt The options for encoding. If
@@ -122,7 +122,7 @@ encode(
    char* dest,
    std::size_t size,
    core::string_view s,
-    CS const& unreserved,
+    CS const& allowed,
    encoding_opts opt = {});

 #ifndef BOOST_URL_DOCS
@@ -133,7 +133,7 @@ encode_unsafe(
    char* dest,
    std::size_t size,
    core::string_view s,
-    CS const& unreserved,
+    CS const& allowed,
    encoding_opts opt);
 #endif

@@ -162,7 +162,7 @@ encode_unsafe(

    @param s The string to encode.

-    @param unreserved The set of characters
+    @param allowed The set of characters
    that is not percent-encoded.

    @param opt The options for encoding. If
@@ -186,7 +186,7 @@ template<
 BOOST_URL_STRTOK_RETURN
 encode(
    core::string_view s,
-    CS const& unreserved,
+    CS const& allowed,
    encoding_opts opt = {},
    StringToken&& token = {}) noexcept;

--- a/include/boost/url/encoding_opts.hpp
+++ b/include/boost/url/encoding_opts.hpp
@@ -32,18 +32,59 @@ struct encoding_opts
 {
    /** True if spaces encode to and from plus signs

-        This option controls whether or not
+        Although not prescribed by RFC 3986,
+        many applications decode plus signs
+        in URL queries as spaces. In particular,
+        the form-urlencoded Media Type in HTML
+        for submitting forms uses this convention.
+
+        This option controls whether
        the PLUS character ("+") is used to
        represent the SP character (" ") when
        encoding or decoding.
-        Although not prescribed by the RFC, plus
-        signs are commonly treated as spaces upon
-        decoding when used in the query of URLs
-        using well known schemes such as HTTP.
+
+        When this option is `true`, both the
+        encoded SP ("%20") and the PLUS
+        character ("+") represent a space (" ")
+        when decoding. To represent a plus sign,
+        its encoded form ("%2B") is used.
+
+        The @ref encode and @ref encode_size functions
+        will encode spaces as plus signs when
+        this option is `true`, regardless of the
+        allowed character set. They will also
+        encode plus signs as "%2B" when this
+        option is `true`, regardless of the
+        allowed character set.
+
+        Note that when a URL is normalized,
+        all unreserved percent-encoded characters are
+        replaced with their unreserved equivalents.
+        However, normalizing the URL query maintains
+        the decoded and encoded "&=+" as they are
+        because they might have different meanings.
+
+        This behavior is not optional because
+        normalization can only mitigate false
+        negatives, but it should eliminate
+        false positives.
+        Making it optional would allow
+        a false positive because there's
+        at least one very relevant schema (HTTP)
+        where a decoded or encoded "&=+" has different
+        meanings and represents different resources.
+
+        The same considerations apply to URL comparison
+        algorithms in the library, as they treat URLs
+        as if they were normalized.

        @par Specification
        @li <a href="https://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1">
            application/x-www-form-urlencoded (w3.org)</a>
+        @li <a href="https://datatracker.ietf.org/doc/html/rfc1866#section-8.2.1">
+            The form-urlencoded Media Type (RFC 1866)</a>
+        @li <a href="https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2.2">
+            Section 6.2.2.2. Percent-Encoding Normalization (RFC 3986)</a>
    */
    bool space_as_plus = false;

--- a/include/boost/url/impl/encode.hpp
+++ b/include/boost/url/impl/encode.hpp
@@ -30,14 +30,15 @@ template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
 std::size_t
 encoded_size(
    core::string_view s,
-    CS const& unreserved,
+    CS const& allowed,
    encoding_opts opt) noexcept
 {
-/*  If you get a compile error here, it
-    means that the value you passed does
-    not meet the requirements stated in
-    the documentation.
-*/
+    /*
+        If you get a compilation error here, it
+        means that the value you passed does
+        not meet the requirements stated in
+        the documentation.
+    */
    BOOST_STATIC_ASSERT(
        grammar::is_charset<CS>::value);

@@ -45,29 +46,49 @@ encoded_size(
    auto it = s.data();
    auto const last = it + s.size();

-    if(! opt.space_as_plus ||
-        unreserved(' '))
+    if (!opt.space_as_plus)
    {
-        while(it != last)
+        while (it != last)
        {
-            if(unreserved(*it))
-                n += 1;
+            char const c = *it;
+            if (allowed(c))
+            {
+                ++n;
+            }
            else
+            {
                n += 3;
+            }
            ++it;
        }
    }
    else
    {
-        while(it != last)
+        // '+' is always encoded (thus
+        // spending 3 chars) even if
+        // allowed because "%2B" and
+        // "+" have different meanings
+        // when space as plus is enabled
+        using FNT = bool (*)(CS const& allowed, char);
+        FNT takes_one_char =
+            allowed('+') ?
+                (allowed(' ') ?
+                     FNT([](CS const& allowed, char c){ return allowed(c) && c != '+'; }) :
+                     FNT([](CS const& allowed, char c){ return (allowed(c) || c == ' ') && c != '+'; })) :
+                (allowed(' ') ?
+                     FNT([](CS const& allowed, char c){ return allowed(c); }) :
+                     FNT([](CS const& allowed, char c){ return allowed(c) || c == ' '; }));
+        while (it != last)
        {
-            auto c = *it;
-            if(unreserved(c))
-                ++n;
-            else if(c == ' ')
+            char const c = *it;
+            if (takes_one_char(allowed, c))
+            {
                ++n;
+            }
            else
+            {
                n += 3;
+            }
            ++it;
        }
    }
@@ -82,10 +103,10 @@ encode(
    char* dest,
    std::size_t size,
    core::string_view s,
-    CS const& unreserved,
+    CS const& allowed,
    encoding_opts opt)
 {
-/*  If you get a compile error here, it
+/*  If you get a compilation error here, it
    means that the value you passed does
    not meet the requirements stated in
    the documentation.
@@ -94,7 +115,7 @@ encode(
        grammar::is_charset<CS>::value);

    // '%' must be reserved
-    BOOST_ASSERT(! unreserved('%'));
+    BOOST_ASSERT(!allowed('%'));

    char const* const hex =
        detail::hexdigs[opt.lower_case];
@@ -113,42 +134,32 @@ encode(
    auto const dest0 = dest;
    auto const end3 = end - 3;

-    if(! opt.space_as_plus)
+    if (!opt.space_as_plus)
    {
        while(it != last)
        {
-            if(unreserved(*it))
+            char const c = *it;
+            if (allowed(c))
            {
                if(dest == end)
                    return dest - dest0;
-                *dest++ = *it++;
+                *dest++ = c;
+                ++it;
                continue;
            }
-            if(dest > end3)
+            if (dest > end3)
                return dest - dest0;
-            encode(dest, *it++);
+            encode(dest, c);
+            ++it;
        }
        return dest - dest0;
    }
-    else if(! unreserved(' '))
+    else
    {
-        // VFALCO space is usually reserved,
-        // and we depend on this for an
-        // optimization. if this assert
-        // goes off we can split the loop
-        // below into two versions.
-        BOOST_ASSERT(! unreserved(' '));
-
-        while(it != last)
+        while (it != last)
        {
-            if(unreserved(*it))
-            {
-                if(dest == end)
-                    return dest - dest0;
-                *dest++ = *it++;
-                continue;
-            }
-            if(*it == ' ')
+            char const c = *it;
+            if (c == ' ')
            {
                if(dest == end)
                    return dest - dest0;
@@ -156,9 +167,20 @@ encode(
                ++it;
                continue;
            }
+            else if (
+                allowed(c) &&
+                c != '+')
+            {
+                if(dest == end)
+                    return dest - dest0;
+                *dest++ = c;
+                ++it;
+                continue;
+            }
            if(dest > end3)
                return dest - dest0;
-            encode(dest, *it++);
+            encode(dest, c);
+            ++it;
        }
    }
    return dest - dest0;
@@ -175,14 +197,14 @@ encode_unsafe(
    char* dest,
    std::size_t size,
    core::string_view s,
-    CS const& unreserved,
+    CS const& allowed,
    encoding_opts opt)
 {
    BOOST_STATIC_ASSERT(
        grammar::is_charset<CS>::value);

    // '%' must be reserved
-    BOOST_ASSERT(! unreserved('%'));
+    BOOST_ASSERT(!allowed('%'));

    auto it = s.data();
    auto const last = it + s.size();
@@ -204,42 +226,44 @@ encode_unsafe(
    };

    auto const dest0 = dest;
-    if(! opt.space_as_plus)
+    if (!opt.space_as_plus)
    {
        while(it != last)
        {
            BOOST_ASSERT(dest != end);
-            if(unreserved(*it))
-                *dest++ = *it++;
+            char const c = *it;
+            if(allowed(c))
+            {
+                *dest++ = c;
+            }
            else
-                encode(dest, *it++);
+            {
+                encode(dest, c);
+            }
+            ++it;
        }
    }
    else
    {
-        // VFALCO space is usually reserved,
-        // and we depend on this for an
-        // optimization. if this assert
-        // goes off we can split the loop
-        // below into two versions.
-        BOOST_ASSERT(! unreserved(' '));
-
        while(it != last)
        {
            BOOST_ASSERT(dest != end);
-            if(unreserved(*it))
-            {
-                *dest++ = *it++;
-            }
-            else if(*it == ' ')
+            char const c = *it;
+            if (c == ' ')
            {
                *dest++ = '+';
-                ++it;
+            }
+            else if (
+                allowed(c) &&
+                c != '+')
+            {
+                *dest++ = c;
            }
            else
            {
-                encode(dest, *it++);
+                encode(dest, c);
            }
+            ++it;
        }
    }
    return dest - dest0;
@@ -253,7 +277,7 @@ template<
 BOOST_URL_STRTOK_RETURN
 encode(
    core::string_view s,
-    CS const& unreserved,
+    CS const& allowed,
    encoding_opts opt,
    StringToken&& token) noexcept
 {
@@ -261,11 +285,11 @@ encode(
        grammar::is_charset<CS>::value);

    auto const n = encoded_size(
-        s, unreserved, opt);
+        s, allowed, opt);
    auto p = token.prepare(n);
    if(n > 0)
        encode_unsafe(
-            p, n, s, unreserved, opt);
+            p, n, s, allowed, opt);
    return token.result();
 }

--- a/include/boost/url/impl/params_ref.hpp
+++ b/include/boost/url/impl/params_ref.hpp
@@ -192,7 +192,7 @@ replace(
        u_->edit_params(
            from.it_, to.it_,
            detail::make_params_iter(
-                first, last)),
+                first, last, opt_.space_as_plus)),
        opt_);
 }

@@ -212,7 +212,7 @@ assign(FwdIt first, FwdIt last,
        begin().it_,
        end().it_,
        detail::make_params_iter(
-            first, last));
+            first, last, opt_.space_as_plus));
 }

 template<class FwdIt>
@@ -230,7 +230,7 @@ insert(
            before.it_,
            before.it_,
            detail::make_params_iter(
-                first, last)),
+                first, last, opt_.space_as_plus)),
        opt_);
 }

--- a/include/boost/url/param.hpp
+++ b/include/boost/url/param.hpp
@@ -387,7 +387,11 @@ private:

    Depending on where the object was obtained,
    the strings may or may not contain percent
-    escapes.
+    escapes. Some functions and objects might
+    expect encoded strings in this view, while
+    others expect decoded strings. The caller
+    should be aware of the context in which
+    the object will be used.

    For most usages, key comparisons are
    case-sensitive and duplicate keys in
--- a/include/boost/url/params_base.hpp
+++ b/include/boost/url/params_base.hpp
@@ -29,11 +29,13 @@ namespace urls {
 #   pragma warning(disable: 4251)
 #endif

-/** Common functionality for containers
+/** Common functionality for query parameter containers

-    This base class is used by the library
+    The library uses this base class
    to provide common member functions for
-    containers. This cannot be instantiated
+    containers of query parameters.
+
+    This class should not be instantiated
    directly; Instead, use one of the
    containers or functions:

--- a/include/boost/url/url_base.hpp
+++ b/include/boost/url/url_base.hpp
@@ -2918,9 +2918,27 @@ private:
        detail::any_params_iter&&) ->
            detail::params_iter_impl;

+    // Decode any unnecessary percent-escapes
+    // and ensures hexadecimals are uppercase.
+    // The encoding of ignored characters is
+    // preserved.
+    template
+        <class AllowedCharSet,
+         class IgnoredCharSet>
+    void
+    normalize_octets_impl(
+        int,
+        AllowedCharSet const& allowed,
+        IgnoredCharSet const& ignored,
+        op_t&) noexcept;
+
    template<class CharSet>
-    void normalize_octets_impl(int,
-        CharSet const& allowed, op_t&) noexcept;
+    void
+    normalize_octets_impl(
+        int,
+        CharSet const& allowed,
+        op_t&) noexcept;
+
    void decoded_to_lower_impl(int id) noexcept;
    void to_lower_impl(int id) noexcept;
 };
--- a/src/detail/any_params_iter.cpp
+++ b/src/detail/any_params_iter.cpp
@@ -44,8 +44,8 @@ any_params_iter::
 //
 //------------------------------------------------

-query_iter::
-query_iter(
+query_string_iter::
+query_string_iter(
    core::string_view s,
    bool ne) noexcept
    : any_params_iter(
@@ -55,7 +55,7 @@ query_iter(
 }

 void
-query_iter::
+query_string_iter::
 rewind() noexcept
 {
    if(empty)
@@ -81,7 +81,7 @@ rewind() noexcept
 }

 bool
-query_iter::
+query_string_iter::
 measure(
    std::size_t& n) noexcept
 {
@@ -101,7 +101,7 @@ measure(
 }

 void
-query_iter::
+query_string_iter::
 copy(
    char*& dest,
    char const* end) noexcept
@@ -122,7 +122,7 @@ copy(
 }

 void
-query_iter::
+query_string_iter::
 increment() noexcept
 {
    p_ += n_;
@@ -146,32 +146,34 @@ increment() noexcept
 //
 //------------------------------------------------

-param_iter::
-param_iter(
-    param_view const& p) noexcept
+single_param_iter::
+single_param_iter(
+    param_view const& p,
+    bool space_as_plus) noexcept
    : any_params_iter(
        false,
        p.key,
        p.value)
    , has_value_(p.has_value)
+    , space_as_plus_(space_as_plus)
 {
 }

 void
-param_iter::
+single_param_iter::
 rewind() noexcept
 {
    at_end_ = false;
 }

 bool
-param_iter::
+single_param_iter::
 measure(std::size_t& n) noexcept
 {
    if(at_end_)
        return false;
    encoding_opts opt;
-    opt.space_as_plus = false;
+    opt.space_as_plus = space_as_plus_;
    n += encoded_size(
        s0,
        detail::param_key_chars,
@@ -189,21 +191,21 @@ measure(std::size_t& n) noexcept
 }

 void
-param_iter::
+single_param_iter::
 copy(
    char*& dest,
    char const* end) noexcept
 {
    BOOST_ASSERT(! at_end_);
    encoding_opts opt;
-    opt.space_as_plus = false;
+    opt.space_as_plus = space_as_plus_;
    dest += encode(
        dest,
        end - dest,
        s0,
        detail::param_key_chars,
        opt);
-    if(has_value_)
+    if (has_value_)
    {
        *dest++ = '=';
        dest += encode(
@@ -228,7 +230,7 @@ measure_impl(
    param_view const& p) noexcept
 {
    encoding_opts opt;
-    opt.space_as_plus = false;
+    opt.space_as_plus = space_as_plus_;
    n += encoded_size(
        p.key,
        detail::param_key_chars,
@@ -251,7 +253,7 @@ copy_impl(
    param_view const& p) noexcept
 {
    encoding_opts opt;
-    opt.space_as_plus = false;
+    opt.space_as_plus = space_as_plus_;
    dest += encode(
        dest,
        end - dest,
--- a/src/detail/normalize.cpp
+++ b/src/detail/normalize.cpp
@@ -14,6 +14,7 @@
 #include "decode.hpp"
 #include <boost/url/segments_encoded_view.hpp>
 #include <boost/url/grammar/ci_string.hpp>
+#include <boost/url/grammar/lut_chars.hpp>
 #include <boost/assert.hpp>
 #include <boost/core/ignore_unused.hpp>
 #include <cstring>
@@ -74,6 +75,60 @@ compare_encoded(
    return 1;
 }

+int
+compare_encoded_query(
+    core::string_view lhs,
+    core::string_view rhs) noexcept
+{
+    static constexpr
+    grammar::lut_chars
+    query_compare_exception_lut = "&=+";
+
+    std::size_t n0 = 0;
+    std::size_t n1 = 0;
+    char c0 = 0;
+    char c1 = 0;
+    while(
+        !lhs.empty() &&
+        !rhs.empty())
+    {
+        bool const lhs_was_decoded = lhs.front() != '%';
+        bool const rhs_was_decoded = rhs.front() != '%';
+        pop_encoded_front(lhs, c0, n0);
+        pop_encoded_front(rhs, c1, n1);
+        if (c0 < c1)
+            return -1;
+        if (c1 < c0)
+            return 1;
+        // The decoded chars are the same, but
+        // are these query exceptions that have
+        // different meanings when decoded?
+        if (query_compare_exception_lut(c0))
+        {
+            // If so, we only continue if both
+            // chars were decoded or encoded
+            // the same way.
+            if (lhs_was_decoded == rhs_was_decoded)
+                continue;
+            // Otherwise, we return a value != 0
+            // because these chars are not equal.
+            // If rhs was the decoded one, it contains
+            // an ascii char higher than '%'
+            if (rhs_was_decoded)
+                return -1;
+            else
+                return 1;
+        }
+    }
+    n0 += detail::decode_bytes_unsafe(lhs);
+    n1 += detail::decode_bytes_unsafe(rhs);
+    if (n0 == n1)
+        return 0;
+    if (n0 < n1)
+        return -1;
+    return 1;
+}
+
 void
 digest_encoded(
    core::string_view s,
--- a/src/detail/normalize.hpp
+++ b/src/detail/normalize.hpp
@@ -80,6 +80,16 @@ compare_encoded(
    core::string_view lhs,
    core::string_view rhs) noexcept;

+// compare two core::string_views as if they are both
+// percent-decoded but do not consider the special
+// query chars ("&=+") equivalent unless they are
+// both decoded or encoded the same way, because
+// that gives them different meanings
+int
+compare_encoded_query(
+    core::string_view lhs,
+    core::string_view rhs) noexcept;
+
 // digest a core::string_view as if it were
 // percent-decoded
 void
--- a/src/params_base.cpp
+++ b/src/params_base.cpp
@@ -180,7 +180,7 @@ params_base::
 end() const noexcept ->
    iterator
 {
-    return iterator(ref_, opt_, 0);
+    return {ref_, opt_, 0};
 }

 //------------------------------------------------
--- a/src/params_encoded_ref.cpp
+++ b/src/params_encoded_ref.cpp
@@ -239,7 +239,7 @@ erase(
    return u_->edit_params(
        first.it_,
        last.it_,
-        detail::query_iter(s));
+        detail::query_string_iter(s));
 }

 } // urls
--- a/src/params_encoded_view.cpp
+++ b/src/params_encoded_view.cpp
@@ -36,7 +36,7 @@ params_encoded_view::
 operator
 params_view() const noexcept
 {
-    return { ref_, encoding_opts{} };
+    return { ref_, encoding_opts{ true, false, false} };
 }

 } // urls
--- a/src/params_ref.cpp
+++ b/src/params_ref.cpp
@@ -66,12 +66,12 @@ insert(
    param_view const& p) ->
        iterator
 {
-    return iterator(
+    return {
        u_->edit_params(
            before.it_,
            before.it_,
-            detail::param_iter(p)),
-        opt_);
+            detail::single_param_iter(p, opt_.space_as_plus)),
+        opt_};
 }

 auto
@@ -130,7 +130,7 @@ replace(
        u_->edit_params(
            pos.it_,
            std::next(pos).it_,
-            detail::param_iter(p)),
+            detail::single_param_iter(p, opt_.space_as_plus)),
        opt_);
 }

@@ -232,7 +232,7 @@ erase(
        u_->edit_params(
            first.it_,
            last.it_,
-            detail::query_iter(s)),
+            detail::query_string_iter(s)),
        opt_);
 }

--- a/src/rfc/detail/charsets.hpp
+++ b/src/rfc/detail/charsets.hpp
@@ -18,6 +18,20 @@ namespace boost {
 namespace urls {
 namespace detail {

+struct empty_chars_t
+{
+    constexpr
+    bool
+    operator()(char) const noexcept
+    {
+        return false;
+    }
+};
+
+constexpr
+empty_chars_t
+empty_chars{};
+
 constexpr
 auto
 user_chars =
@@ -58,6 +72,11 @@ auto
 query_chars =
    pchars + '/' + '?' + '[' + ']';

+constexpr
+grammar::lut_chars
+query_ignore_chars =
+    "&=+";
+
 constexpr
 auto
 param_key_chars = pchars
--- a/src/url_base.cpp
+++ b/src/url_base.cpp
@@ -1328,7 +1328,7 @@ set_query(
    edit_params(
        detail::params_iter_impl(impl_),
        detail::params_iter_impl(impl_, 0),
-        detail::query_iter(s, true));
+        detail::query_string_iter(s, true));
    return *this;
 }

@@ -1401,7 +1401,7 @@ params_ref
 url_base::
 params(encoding_opts opt) noexcept
 {
-    return params_ref(*this, opt);
+    return {*this, opt};
 }

 params_encoded_ref
@@ -1635,12 +1635,15 @@ resolve(
 //
 //------------------------------------------------

-template <class Charset>
+template <
+    class AllowedCharset,
+    class IgnoredCharset>
 void
 url_base::
 normalize_octets_impl(
    int id,
-    Charset const& allowed,
+    AllowedCharset const& allowed,
+    IgnoredCharset const& ignored,
    op_t& op) noexcept
 {
    char* it = s_ + impl_.offset(id);
@@ -1660,7 +1663,8 @@ normalize_octets_impl(

        // decode unreserved octets
        d = detail::decode_one(it + 1);
-        if (allowed(d))
+        if (allowed(d) &&
+            !ignored(d))
        {
            *dest = d;
            it += 3;
@@ -1683,6 +1687,18 @@ normalize_octets_impl(
    }
 }

+template<class CharSet>
+void
+url_base::
+normalize_octets_impl(
+    int idx,
+    CharSet const& allowed,
+    op_t& op) noexcept
+{
+    return normalize_octets_impl(
+        idx, allowed, detail::empty_chars, op);
+}
+
 url_base&
 url_base::
 normalize_scheme()
@@ -1884,7 +1900,10 @@ normalize_query()
 {
    op_t op(*this);
    normalize_octets_impl(
-        id_query, detail::query_chars, op);
+        id_query,
+        detail::query_chars,
+        detail::query_ignore_chars,
+        op);
    return *this;
 }

@@ -2647,20 +2666,16 @@ edit_params(
    auto pos1 = pos0 + it1.pos;
    pos0 = pos0 + it0.pos;

-    // Iterator doesn't belong to this url
+    // Iterators belong to this url
    BOOST_ASSERT(it0.ref.alias_of(impl_));
-
-    // Iterator doesn't belong to this url
    BOOST_ASSERT(it1.ref.alias_of(impl_));

-    // Iterator is in the wrong order
+    // Iterators is in the right order
    BOOST_ASSERT(it0.index <= it1.index);

-    // Iterator is out of range
+    // Iterators are within range
    BOOST_ASSERT(it0.index <= impl_.nparam_);
    BOOST_ASSERT(pos0 <= impl_.offset(id_frag));
-
-    // Iterator is out of range
    BOOST_ASSERT(it1.index <= impl_.nparam_);
    BOOST_ASSERT(pos1 <= impl_.offset(id_frag));

--- a/src/url_view_base.cpp
+++ b/src/url_view_base.cpp
@@ -701,7 +701,7 @@ compare(const url_view_base& other) const noexcept

    if (has_query())
    {
-        comp = detail::compare_encoded(
+        comp = detail::compare_encoded_query(
            encoded_query(),
            other.encoded_query());
        if ( comp != 0 )
--- a/test/unit/encode.cpp
+++ b/test/unit/encode.cpp
@@ -26,6 +26,20 @@
 namespace boost {
 namespace urls {

+template <bool allow_plus, bool allow_space>
+struct space_as_plus_test_chars
+{
+    constexpr
+    bool
+    operator()(char c) const noexcept
+    {
+        return
+            (allow_plus && c == '+') ||
+            (allow_space && c == ' ') ||
+            unreserved_chars(c);
+    }
+};
+
 class encode_test
 {
 public:
@@ -133,8 +147,35 @@ public:
                " ", test_chars{}, opt, {}) == "+");
            BOOST_TEST(encode(
                "A", test_chars{}, opt, {}) == "A");
-            BOOST_TEST(encode(
-                " A+", test_chars{}, opt, {}) == "+A+");
+            BOOST_TEST_EQ(encode(
+                " A+", test_chars{}, opt, {}), "+A%2B");
+        }
+
+        // optimization of space-as-plus when the charset
+        // already includes or excludes plus or space
+        {
+            encoding_opts opt;
+            opt.space_as_plus = true;
+            BOOST_TEST_EQ(
+                encode(
+                    "a +",
+                    space_as_plus_test_chars<true, true>{},
+                    opt), "a+%2B");
+            BOOST_TEST_EQ(
+                encode(
+                    "a +",
+                    space_as_plus_test_chars<true, false>{},
+                    opt), "a+%2B");
+            BOOST_TEST_EQ(
+                encode(
+                    "a +",
+                    space_as_plus_test_chars<false, true>{},
+                    opt), "a+%2B");
+            BOOST_TEST_EQ(
+                encode(
+                    "a +",
+                    space_as_plus_test_chars<false, false>{},
+                    opt), "a+%2B");
        }
    }

--- a/test/unit/params_ref.cpp
+++ b/test/unit/params_ref.cpp
@@ -120,8 +120,9 @@ struct params_ref_test
        check(*r, init);
    }

-    // check that modification produces
-    // the string and correct sequence
+    // check whether modifying s0 via
+    // f produces a URL with the query
+    // string s1 and params init
    static
    void
    check(
@@ -153,6 +154,9 @@ struct params_ref_test
        }
    }

+    // check whether modifying s0 via
+    // f1 and f2 produces a URL with
+    // the query string s1 and params init
    static
    void
    check(
@@ -371,7 +375,7 @@ struct params_ref_test
            {
                assign(qp, { {"first",nullptr}, {"last",""}, {"full", "John Doe"} });
            };
-            check(f, g, "", "first&last=&full=John%20Doe",
+            check(f, g, "", "first&last=&full=John+Doe",
                { {"first",no_value}, {"last",""}, {"full","John Doe"} });
        }
        {
@@ -430,11 +434,11 @@ struct params_ref_test
            {
                append(qp, { {"first",nullptr}, {"last",""}, {"full", "John Doe"} });
            };
-            check(f, g, "", "first&last=&full=John%20Doe",
+            check(f, g, "", "first&last=&full=John+Doe",
                { {"first",no_value}, {"last",""}, {"full","John Doe"} });
-            check(f, g, "?", "&first&last=&full=John%20Doe",
+            check(f, g, "?", "&first&last=&full=John+Doe",
                { {"",no_value}, {"first",no_value}, {"last",""}, {"full","John Doe"} });
-            check(f, g, "?key=value", "key=value&first&last=&full=John%20Doe",
+            check(f, g, "?key=value", "key=value&first&last=&full=John+Doe",
                { {"key","value"}, {"first",no_value}, {"last",""}, {"full","John Doe"} });
        }
        {
@@ -892,6 +896,142 @@ struct params_ref_test
        }
    }

+    static
+    void
+    testSpaceAsPlus()
+    {
+        // issue #903
+        {
+            // "=?" in key/values
+            {
+                // In the general case, normalized URLs
+                // always decode unreserved chars and encode
+                // reserved chars.
+                // However, normalizing the URL query should
+                // maintain the decoded and encoded "&=+"
+                // because they have different meanings
+                // in a query.
+                // This isn't optional either because
+                // normalization can only mitigate false
+                // negatives, but it should eliminate
+                // false positives.
+                // Making it optional would be allowing
+                // a false positive because there's
+                // at least one very relevant schema (HTTP)
+                // where decoded/encoded "&=+" has different
+                // meanings and represent different resources.
+                urls::url u("https://a/a");
+                params_ref params = u.params();
+                params.append({"&=?", "&=?"});
+                auto it = params.begin();
+                const auto& param = *it;
+                BOOST_TEST_EQ(param.key, "&=?");
+                BOOST_TEST_EQ(param.value, "&=?");
+                BOOST_TEST_EQ(u.buffer(), "https://a/a?%26%3D?=%26=?");
+                u.normalize_query();
+                BOOST_TEST_EQ(u.buffer(), "https://a/a?%26%3D?=%26=?");
+                u.normalize();
+                BOOST_TEST_EQ(u.buffer(), "https://a/a?%26%3D?=%26=?");
+            }
+
+            // opts.space_as_plus = true
+            {
+                // The params_ref object represents the decoded
+                // query parameters, so appending "+" represents
+                // a value that should be decoded as "+" (%2B)
+                // and not an encoded "+" that would be decoded
+                // as space.
+                urls::url u("https://a/a");
+                encoding_opts opts;
+                opts.space_as_plus = true;
+                params_ref params = u.params(opts);
+                params.append({"a+b c", "d+e f"});
+                auto it = params.begin();
+                const auto& param = *it;
+                BOOST_TEST_EQ(param.key, "a+b c");
+                BOOST_TEST_EQ(param.value, "d+e f");
+                BOOST_TEST_EQ(u.buffer(), "https://a/a?a%2Bb+c=d%2Be+f");
+                u.normalize_query();
+                BOOST_TEST_EQ(u.buffer(), "https://a/a?a%2Bb+c=d%2Be+f");
+            }
+
+            // opts.space_as_plus = false
+            {
+                // The params_ref object represents the decoded
+                // query parameters without any special treatment
+                // for "+" and space. "+" can remain as is
+                // and space is represented as "%20".
+                urls::url u("https://a/a");
+                encoding_opts opts;
+                opts.space_as_plus = false;
+                params_ref params = u.params(opts);
+                params.append({"a+b c", "d+e f"});
+                auto it = params.begin();
+                const auto& param = *it;
+                BOOST_TEST_EQ(param.key, "a+b c");
+                BOOST_TEST_EQ(param.value, "d+e f");
+                BOOST_TEST_EQ(u.buffer(), "https://a/a?a+b%20c=d+e%20f");
+                u.normalize_query();
+                BOOST_TEST_EQ(u.buffer(), "https://a/a?a+b%20c=d+e%20f");
+            }
+
+            // comparisons
+            {
+                // We should not consider two URLs equivalent
+                // if the query differs in the way "&=+" is encoded.
+                // u1: no space as plus
+                url u1("https://a/a?%26%3D?=%26=?&a+b%20c=d+e%20f");
+                // u1e: no space as plus, non-separators encoded
+                url u1e("https://a/a?%26%3D?=%26=?&%61+%62%20%63=%64+%65%20%66");
+                // u2: space as plus
+                url u2("https://a/a?%26%3D?=%26=?&a%2Bb+c=d%2Be+f");
+                // u2e: space as plus, non-separators encoded
+                url u2e("https://a/a?%26%3D?=%26=?&%61%2B%62+%63=%64%2Be+%66");
+                // u3: separators decoded too early
+                url u3("https://a/a?&=?=&=?&a%2Bb+c=d%2Be+f");
+                BOOST_TEST_EQ(u1, u1e);
+                BOOST_TEST_NE(u1, u2);
+                BOOST_TEST_EQ(u2, u2e);
+                BOOST_TEST_NE(u2, u3);
+                BOOST_TEST_NE(u1, u3);
+
+                // queries that differ by size
+                url u4("https://a/a?a+b%20c=d+e%20f");
+                url u4longer("https://a/a?%61+%62%20%63=%64+%65%20%66g");
+                BOOST_TEST_NE(u4, u4longer);
+                BOOST_TEST_NE(u4longer, u4);
+            }
+
+            // append other types of any_param_range
+            {
+                url u("https://a/a");
+                params_ref params = u.params();
+                params.append({"a+b c", "d+e f"});
+                params.append({{"a+b c", "d+e f"}, {"a+b c", "d+e f"}});
+
+                // include all other forms of any_param_range
+
+                BOOST_TEST_EQ(params.size(), 3);
+                BOOST_TEST_EQ(u.buffer(),
+                    "https://a/a?a%2Bb+c=d%2Be+f&a%2Bb+c=d%2Be+f&a%2Bb+c=d%2Be+f");
+            }
+
+            // when setting the encoded query, %2B should not be encoded
+            {
+                url u("https://a/a");
+                u.set_encoded_query("a+b=a%2Bb");
+                BOOST_TEST_EQ(u.buffer(), "https://a/a?a+b=a%2Bb");
+            }
+
+            // when setting the decoded query, no space as plus is assumed
+            {
+                url u("https://a/a");
+                u.set_encoded_query("a+b=a%2Bb");
+                BOOST_TEST_EQ(u.buffer(), "https://a/a?a+b=a%2Bb");
+            }
+        }
+    }
+
    static
    void
    testAll()
@@ -900,6 +1040,7 @@ struct params_ref_test
        testObservers();
        testModifiers();
        testJavadocs();
+        testSpaceAsPlus();
    }

    void