From 3dba527bb31d9fdfb0ac93d4794c2ebf9b9d61ae Mon Sep 17 00:00:00 2001 From: alandefreitas Date: Wed, 2 Feb 2022 18:24:40 -0300 Subject: [PATCH] document parsing functions close #121 --- CMakeLists.txt | 5 - doc/qbk/0.main.qbk | 22 +- doc/qbk/2.0.basic_usage.qbk | 99 --- doc/qbk/2.0.quicklook.qbk | 324 +++++---- doc/qbk/3.0.parsing.qbk | 110 ++- doc/qbk/3.1.scheme.qbk | 192 ++++- doc/qbk/3.2.authority.qbk | 418 ++++++++++- doc/qbk/3.3.path.qbk | 262 ++++++- doc/qbk/3.4.query.qbk | 259 ++++++- doc/qbk/3.5.fragment.qbk | 124 +++- doc/qbk/3.6.segment_views.qbk | 58 -- doc/qbk/3.7.params_views.qbk | 27 - doc/qbk/3.8.authority_view.qbk | 36 - doc/qbk/4.0.Modification.qbk | 112 --- doc/qbk/4.0.modifying.qbk | 124 ++++ doc/qbk/5.0.grammars.qbk | 4 +- doc/qbk/quickref.xml | 2 +- example/CMakeLists.txt | 22 - example/Jamfile | 12 - example/quicklook.cpp | 113 --- include/boost/url/const_string.hpp | 8 - include/boost/url/impl/segments_view.ipp | 13 +- include/boost/url/params.hpp | 8 +- test/unit/CMakeLists.txt | 1 + test/unit/Jamfile | 1 + test/unit/const_string.cpp | 5 +- test/unit/segments_view.cpp | 12 +- test/unit/snippets.cpp | 885 +++++++++++++++++++++++ 28 files changed, 2491 insertions(+), 767 deletions(-) delete mode 100644 doc/qbk/2.0.basic_usage.qbk delete mode 100644 doc/qbk/3.6.segment_views.qbk delete mode 100644 doc/qbk/3.7.params_views.qbk delete mode 100644 doc/qbk/3.8.authority_view.qbk delete mode 100644 doc/qbk/4.0.Modification.qbk create mode 100644 doc/qbk/4.0.modifying.qbk delete mode 100644 example/CMakeLists.txt delete mode 100644 example/Jamfile delete mode 100644 example/quicklook.cpp create mode 100644 test/unit/snippets.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b2f90927..834811c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -128,8 +128,3 @@ endif() if(BOOST_URL_BUILD_TESTS) add_subdirectory(test) endif() - -if(BOOST_URL_BUILD_EXAMPLES) - add_subdirectory(example) -endif() - diff --git a/doc/qbk/0.main.qbk b/doc/qbk/0.main.qbk index c05009fa..b2899390 100644 --- a/doc/qbk/0.main.qbk +++ b/doc/qbk/0.main.qbk @@ -45,10 +45,13 @@ [def __MoveConstructible__ [@https://en.cppreference.com/w/cpp/named_req/MoveConstructible ['MoveConstructible]]] [def __SemiRegular__ [@https://en.cppreference.com/w/cpp/concepts/semiregular ['SemiRegular]]] [def __Swappable__ [@https://en.cppreference.com/w/cpp/named_req/Swappable ['Swappable]]] -[def __CharSet__ [link url.charset ['CharSet]]] +[def __CharSet__ [link url.grammar.charset ['CharSet]]] [def __std_swap__ [@https://en.cppreference.com/w/cpp/algorithm/swap `std::swap`]] [def __authority_view__ [link url.ref.boost__urls__authority_view `authority_view`]] +[def __segments_view__ [link url.ref.boost__urls__segments_view `segments_view`]] +[def __segments_encoded_view__ [link url.ref.boost__urls__segments_encoded_view `segments_encoded_view`]] +[def __query_param_view__ [link url.ref.boost__urls__query_param_view `query_param_view`]] [def __error_code__ [link url.ref.boost__urls__error_code `error_code`]] [def __parse_uri__ [link url.ref.boost__urls__parse_uri `parse_uri`]] [def __result__ [link url.ref.boost__urls__result `result`]] @@ -67,9 +70,12 @@ [/-----------------------------------------------------------------------------] +[import ../../test/unit/snippets.cpp] + +[/-----------------------------------------------------------------------------] + [include 1.0.overview.qbk] [include 2.0.quicklook.qbk] -[include 2.0.basic_usage.qbk] [section Parsing] [include 3.0.parsing.qbk] @@ -78,14 +84,18 @@ [include 3.3.path.qbk] [include 3.4.query.qbk] [include 3.5.fragment.qbk] -[include 3.6.segment_views.qbk] -[include 3.7.params_views.qbk] -[include 3.8.authority_view.qbk] [endsect] -[include CharSet.qbk] +[include 4.0.modifying.qbk] + +[section Allocators] +[endsect] + [include 5.0.grammars.qbk] +[section Examples] +[endsect] + [section:ref Reference] [xinclude quickref.xml] [block''''''] diff --git a/doc/qbk/2.0.basic_usage.qbk b/doc/qbk/2.0.basic_usage.qbk deleted file mode 100644 index 3629e23a..00000000 --- a/doc/qbk/2.0.basic_usage.qbk +++ /dev/null @@ -1,99 +0,0 @@ -[/ - Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com) - - Distributed under the Boost Software License, Version 1.0. (See accompanying - file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - - Official repository: https://github.com/CPPAlliance/url -] - -[section Basic Usage] -[block''''''] - -[/-----------------------------------------------------------------------------] - -[section Nomenclature] - -This library standardizes on the term "URL" to refer to all strings which -follow the syntax in __rfc3986__. The reason is that the terms URI and IRI -cause confusion. In practice a single algorithm is used for both so -keeping them distinct yields little value. When discussing particular -grammars, the name of the grammar is used exactly as it appears in the -literature. - -[endsect] - -[/-----------------------------------------------------------------------------] - -[section:bnf_notation BNF Notation] - -This documentation uses the Augmented Backus-Naur Form (ABNF) -notation of -[@https://tools.ietf.org/html/rfc2234 rfc2234] -to specify particular grammars used by algorithms and containers. While -a complete understanding of the notation is not a requirement for using the -library, it may help for understanding how valid components of URLs are -defined. In particular, this will be of interest to users who wish to -compose parsing algorithms using the combinators provided by the library. - -[endsect] - -[/-----------------------------------------------------------------------------] - -[section:result_return_type `result` Return Type] - -In many places, functions in the library have a return type which uses the -__result__ alias template. Below is a synopsis of the type. For complete -information please consult the full `result` documentation in Boost.System: -``` - // A variant-like object which hold the - // value T upon success, otherwise holds - // an ``__error_code__``. - // - template< class T > - class ``__result__`` - { - public: - // Return true if the result contains an error - constexpr bool has_error() const noexcept; - - // These return true if the result contains a value - constexpr bool has_value() const noexcept; - constexpr explicit operator bool() const noexcept; - - // Return the value or throw an exception if has_value()==false - constexpr T& value(); - constexpr T& operator*(); - constexpr T const& value() const; - constexpr T const& operator*() const; - - // Return the error, which is default constructed if has_error()==false - constexpr ``__error_code__`` error() const noexcept; - - ``['more...]`` -``` - -[endsect] - -[/-----------------------------------------------------------------------------] - -[section:percent_encoding Percent-Encoding] -[endsect] - -[/-----------------------------------------------------------------------------] - -[section:source_listings Source Listings] - -[note - All code in this documentation is written as if the - following declarations are in effect: -``` - #include - - using namespace boost::urls; -``` -] - -[endsect] - -[endsect] diff --git a/doc/qbk/2.0.quicklook.qbk b/doc/qbk/2.0.quicklook.qbk index 301ee23e..2b917743 100644 --- a/doc/qbk/2.0.quicklook.qbk +++ b/doc/qbk/2.0.quicklook.qbk @@ -8,18 +8,16 @@ Official repository: https://github.com/CPPAlliance/url ] -[section:quick_look Quick Look] +[section Quick Look] -[h5 Headers] +[heading Headers] This section is intended to give the reader a brief overview of the features and interface style of the library. We begin by including the library header file which brings all the symbols into scope. [c++] -``` - #include -``` +[snippet_headers_1] Alternatively, individual headers may be included to obtain the declarations for specific types. @@ -29,6 +27,7 @@ Alternatively you can use the header-only configuration simply by including this header file in ['exactly one] of your new or existing source files: +[c++] ``` #include ``` @@ -36,60 +35,55 @@ existing source files: [note Sample code and identifiers used throughout are written as if the following declarations are in effect: - ``` - #include - using namespace boost::urls; - ``` + + [snippet_headers_3] ] -[h5 Parsing] +[heading Parsing] Say you have the following URL that you want to parse: -[teletype] -``` - https://user:pass@www.example.com:443/path/to/my%2dfile.txt?id=42&name=John%20Doe#anchor -``` +[c++] +[snippet_parsing_1] You can parse it by calling this function: -[c++] -``` - __url_view__ r = __parse_uri__( "https://user:pass@www.example.com:443/path/to/my%2dfile.txt?id=42&name=John%20Doe#page%20anchor" ).value(); -``` +[snippet_parsing_2] The function __parse_uri__ returns an object of type `__result__<__url_view__>` which is a container resembling a variant that holds either an error or an object. -[note - In this example we immediately call `result::value` which throws an exception - on a parsing error. The functions `result::has_value` and `result::has_error` - could also be used to check if the string has been parsed without throwing - errors. -] +We can immediately call `result::value` to obtain a __url_view__. + +[snippet_parsing_3] + +`result::value` throws an exception on a parsing error. + +[snippet_parsing_4] + +Alternatively, the functions `result::has_value` and +`result::has_error` could also be used to check if +the string has been parsed without errors. + +[snippet_parsing_5] When there are no errors, it `result::value` returns an instance of __url_view__, which holds the parsed result. -It is worth noting that __parse_uri__ allocates no memory and throws no exceptions. -Like a __string_view__, a __url_view__ does not retain ownership of the underlying -string buffer. Constructed URL views always contain a valid URL in its correctly -serialized form. +[note + It is worth noting that __parse_uri__ allocates no memory and throws no exceptions. + Like a __string_view__, a __url_view__ does not retain ownership of the underlying + string buffer. Constructed URL views always contain a valid URL in its correctly + serialized form. +] -[h5 Accessing] +[heading Accessing] Accessing the parts of the URL is easy: [table [[Code][Output]] [[ [c++] -``` - std::cout << - "scheme : " << u.``[link url.ref.boost__urls__url_view.scheme `scheme`]``() << '\n' << - "authority : " << u.``[link url.ref.boost__urls__url_view.encoded_authority `encoded_authority`]``() << '\n' << - "path : " << u.``[link url.ref.boost__urls__url_view.encoded_path `encoded_path`]``() << '\n' << - "query : " << u.``[link url.ref.boost__urls__url_view.encoded_query `encoded_query`]``() << '\n' << - "fragment : " << u.``[link url.ref.boost__urls__url_view.encoded_fragment `encoded_fragment`]``() << '\n'; -``` +[snippet_accessing_1] ][ [teletype] ``` @@ -106,17 +100,47 @@ Functions like return string views, simply referencing the relevant portion of the URL string without the need to allocate memory. -[h5 Decoding] - -To get a decoded string, call the same function without the word `encoded`: +These functions might return empty strings both for empty and absent +components. [table [[Code][Output]] [[ [c++] +[snippet_accessing_2] +][ +[teletype] ``` - std::cout << - "query : " << u.``[link url.ref.boost__urls__url_view.query `query`]``() << '\n' << - "fragment : " << u.``[link url.ref.boost__urls__url_view.fragment `fragment`]``() << '\n'; + fragment 1 : + + fragment 2 : ``` +]]] + +To differentiate between empty and absent components, we can use +functions such as [link url.ref.boost__urls__url_view.has_fragment `has_fragment`]: + +[table [[Code][Output]] [[ +[c++] +[snippet_accessing_2] +][ +[teletype] +``` + has fragment 1 : 0 + fragment 1 : + + has fragment 2 : 1 + fragment 2 : + +``` +]]] + +[heading Decoding] + +To get a decoded string, most observers provide the corresponding +function without the word `encoded`: + +[table [[Code][Output]] [[ +[c++] +[snippet_decoding_1] ][ [teletype] ``` @@ -126,20 +150,105 @@ To get a decoded string, call the same function without the word `encoded`: ]]] The decoded functions return a __const_string__, which is a library type -that models a read-only string. __const_string__ retains ownership of the -underlying buffer and uses a type-erased allocator. An extra built-in -buffer is also provided to avoid allocations for any string smaller than -this [link url.ref.boost__urls__const_string.builtin_capacity `builtin_capacity`]. +that models a read-only string. This class retains ownership of the +underlying buffer and uses a type-erased allocator. -[note - __url_view__ does not provide decoded functions for compound elements, such as - [link url.ref.boost__urls__url_view.encoded_authority `encoded_authority`] and - [link url.ref.boost__urls__url_view.encoded_path `encoded_path`]. Compound elements - are made up of multiple sub-elements whose encoded delimiters might be ambiguous. +These are the encoded versions of the observers: + +[table [[Component][Decoded][Encoded][Check]] +[ + [authority] + [] + [[link url.ref.boost__urls__url_view.encoded_authority `encoded_authority`]] + [[link url.ref.boost__urls__url_view.has_authority `has_authority`]]] + +[ + [fragment] + [[link url.ref.boost__urls__url_view.fragment `fragment`]] + [[link url.ref.boost__urls__url_view.encoded_fragment `encoded_fragment`]] + [[link url.ref.boost__urls__url_view.has_fragment `has_fragment`]] +] +[ + [host] + [[link url.ref.boost__urls__url_view.host `host`]] + [[link url.ref.boost__urls__url_view.encoded_host `encoded_host`]] + [] +] +[ + [host_and_port] + [] + [[link url.ref.boost__urls__url_view.encoded_host_and_port `encoded_host_and_port`]] + [] +] +[ + [origin] + [] + [[link url.ref.boost__urls__url_view.encoded_origin `encoded_origin`]] + [] +] +[ + [params] + [[link url.ref.boost__urls__url_view.params `params`]] + [[link url.ref.boost__urls__url_view.encoded_params `encoded_params`]] + [] +] +[ + [password] + [[link url.ref.boost__urls__url_view.password `password`]] + [[link url.ref.boost__urls__url_view.encoded_password `encoded_password`]] + [[link url.ref.boost__urls__url_view.has_password `has_password`]] +] +[ + [path] + [] + [[link url.ref.boost__urls__url_view.encoded_path `encoded_path`]] + [] +] +[ + [query] + [[link url.ref.boost__urls__url_view.query `query`]] + [[link url.ref.boost__urls__url_view.encoded_query `encoded_query`]] + [[link url.ref.boost__urls__url_view.has_query `has_query`]] +] +[ + [segments] + [[link url.ref.boost__urls__url_view.segments `segments`]] + [[link url.ref.boost__urls__url_view.encoded_segments `encoded_segments`]] + [] +] +[ + [user] + [[link url.ref.boost__urls__url_view.user `user`]] + [[link url.ref.boost__urls__url_view.encoded_user `encoded_user`]] + [] +] +[ + [userinfo] + [[link url.ref.boost__urls__url_view.userinfo `userinfo`]] + [[link url.ref.boost__urls__url_view.encoded_userinfo `encoded_userinfo`]] + [[link url.ref.boost__urls__url_view.has_userinfo `has_userinfo`]] +] ] +Note that __url_view__ does not provide decoded functions for +[link compound-elements compound elements], such as +[link url.ref.boost__urls__url_view.encoded_authority `encoded_authority`] +and [link url.ref.boost__urls__url_view.encoded_path `encoded_path`]. +Compound elements are made up of multiple sub-elements whose encoded delimiters might be ambiguous. -[h5 Allocators] +Many components also do not have corresponding functions such as +[link url.ref.boost__urls__url_view.has_authority `has_authority`] +to check for their existence. This happens because some URL +components are mandatory. + +Although URL query strings are often used to represent key/value pairs, they +are not a compound element because this interpretation is not defined by +__rfc3986__. Users can treat the query as a single entity. +__url_view__ provides the function +[link url.ref.boost__urls__url_view.params `params`] to extract this view +of key/value pairs. + +[heading Allocators] All functions that return __const_string__ accept an optional __Allocator__. When omitted, this parameter defaults to the standard allocator. @@ -147,20 +256,22 @@ When omitted, this parameter defaults to the standard allocator. Here is the same code which prints the query with percent-decoding applied, except using a local stack-based allocator instead of the heap: +[table [[Code][Output]] [[ [c++] +[snippet_allocators_1] +][ +[teletype] ``` - ``__static_pool__``< 1024 > sp; - - std::cout << "query : " << u.query( sp.allocator() ) << '\n'; + query : id=42&name=John Doe + fragment : page anchor ``` +]]] The __static_pool__ type is provided by the library as an easy alternative to apply percent-decoding to long strings with no memory allocations. -Note that short strings can already fit in the -[link url.ref.boost__urls__const_string.builtin_capacity `builtin_capacity`] -of a __const_string__ with no memory allocations. -[h5 Compound elements] +[#compound-elements] +[heading Compound elements] The path and query parts of the URL are treated specially by the library. While they can be accessed as individual encoded strings, they can also be @@ -170,14 +281,8 @@ This code calls [link url.ref.boost__urls__url_view.encoded_segments `encoded_segments`] to obtain the path segments as a container that returns encoded strings: [table [[Code][Output]] [[ -``` - ``[link url.ref.boost__urls__segments_encoded_view `segments_encoded_view`]`` segs = u.``[link url.ref.boost__urls__url_view.encoded_segments `encoded_segments`]``(); - - for( auto v : segs ) - { - std::cout << v << '\n'; - } -``` +[c++] +[snippet_compound_elements_1] ][ ``` path @@ -190,7 +295,7 @@ As with other __url_view__ functions which return encoded strings, the encoded segments container does not allocate memory. Instead it returns views to the corresponding portions of the underlying encoded buffer referenced by the URL. -[h5 Encoded compound elements] +[heading Encoded compound elements] Alternatively, the function [link url.ref.boost__urls__url_view.segments `segments`] may be called to obtain the path segments as strings with percent-decoding @@ -200,14 +305,7 @@ With no arguments the container returns strings using the default allocator: [table [[Code][Output]] [[ [c++] -``` - ``[link url.ref.boost__urls__segments_view `segments_view`]`` segs = u.``[link url.ref.boost__urls__url_view.segments `segments`]``(); - - for( auto v : segs ) - { - std::cout << v << '\n'; - } -``` +[snippet_encoded_compound_elements_1] ][ [teletype] ``` @@ -222,16 +320,7 @@ when acquiring the container: [table [[Code][Output]] [[ [c++] -``` - __static_pool__< 1024 > pool; - - ``[link url.ref.boost__urls__segments_view `segments_view`]`` segs = u.``[link url.ref.boost__urls__url_view.segments `segments`]``( pool.allocator() ); - - for( auto v : segs ) - { - std::cout << v << '\n'; - } -``` +[snippet_encoded_compound_elements_2] ][ [teletype] ``` @@ -247,16 +336,7 @@ as percent-encoded key and value pair strings: [table [[Code][Output]] [[ [c++] -``` - ``[link url.ref.boost__urls__params_encoded_view `params_encoded_view`]`` params = u.``[link url.ref.boost__urls__url_view.encoded_params `encoded_params`]``(); - - for( auto v : params ) - { - std::cout << - "key = " << v.key << - ", value = " << v.value << "\n"; - } -``` +[snippet_encoded_compound_elements_3] ][ [teletype] ``` @@ -270,18 +350,7 @@ obtain the query parameters as allocated strings with percent-decoding applied, using a stack-based allocator which does not use the heap: [table [[Code][Output]] [[ [c++] -``` - __static_pool__< 1024 > pool; - - ``[link url.ref.boost__urls__params_view `params_view`]`` params = u.``[link url.ref.boost__urls__url_view.params `params`]``( pool.allocator() ); - - for( auto v : params ) - { - std::cout << - "key = " << v.key << - ", value = " << v.value << "\n"; - } -``` +[snippet_encoded_compound_elements_4] ][ [teletype] ``` @@ -292,7 +361,7 @@ using a stack-based allocator which does not use the heap: [/-----------------------------------------------------------------------------] -[h5 Modification] +[heading Modification] The library provides the container __url__ for supporting modification of the contents of a URL. Unlike the __url_view__, which does not gain ownership of @@ -302,9 +371,7 @@ allocator to control a resizable character buffer which it owns. A __url__ must be constructed from an existing __url_view__. For example using a parse function: [c++] -``` - ``__url__`` u = ``__parse_uri__``( "http://user:pass@www.example.com:443/path/to/my%2dfile.txt?id=42&name=John%20Doe#page%20anchor" ).value(); -``` +[snippet_modification_1] Objects of type __url__ are first-class. They can be copied, moved, and assigned as needed. They support all of the inspection functions of @@ -313,28 +380,15 @@ __url_view__, and also provide functions to modify all parts of the URL. Changing the scheme is easy: -``` - u.set_scheme( "https" ); -``` +[snippet_modification_2] Or we can use a predefined constant: -``` - u.set_scheme( scheme::https ); // equivalent to u.set_scheme( "https" ); -``` +[snippet_modification_3] The scheme must be valid, however, or an exception is thrown: -``` - try - { - u.set_scheme( "100" ); // illegal, must start with a letter - } - catch( std::invalid_argument const& ) - { - // this happens - } -``` +[snippet_modification_4] All modification functions perform validation on their input. Attemping to set part of the URL to an invalid string will result in an exception. @@ -343,11 +397,7 @@ It is not possible for a __url__ to hold a syntactically illegal text. Modification functions return a reference to the object, so chaining is possible: -``` - v.set_host( parse_ipv4_address( "192.168.0.1" ).value() ) - .set_port( 8080 ) - .remove_userinfo(); -``` +[snippet_modification_5] All non-const operations offer the strong exception safety guarantee. @@ -356,11 +406,7 @@ modifiable range functionality, using member functions of the container: [table [[Code][Output]] [[ [c++] -``` - params p = u.params(); - p.emplace_at(p.find("name"), "name", "Vinnie Falco"); - std::cout << u << '\n'; -``` +[snippet_modification_6] ][ [teletype] ``` diff --git a/doc/qbk/3.0.parsing.qbk b/doc/qbk/3.0.parsing.qbk index 20497185..84f06f59 100644 --- a/doc/qbk/3.0.parsing.qbk +++ b/doc/qbk/3.0.parsing.qbk @@ -7,6 +7,15 @@ Official repository: https://github.com/CPPAlliance/url ] +[#section.url_view] +[section URL] + +[heading Notation] + +Following the syntax in __rfc3986__, a single algorithm is used for URLs, URIs +and IRIs. When discussing particular grammars, its rules are presented +exactly as it appears in the literature. + A URL string can be parsed using one of the parsing functions. Each function parses according to a particular grammar specified in __rfc3986__: @@ -29,9 +38,8 @@ in __rfc3986__: ]] The collective grammars parsed by these algorithms are specified below. -To understand the syntax of the BNF specification, or to understand the -rest of the elements such as "scheme" below please refer to __rfc3986__: +[teletype] ``` absolute-URI = scheme ":" hier-part [ "?" query ] @@ -52,23 +60,40 @@ rest of the elements such as "scheme" below please refer to __rfc3986__: / path-empty ``` -Each of these functions accepts a __string_view__ and returns a __url_view__ -wrapped in a __result__ type. The following example parses a string literal +[heading Example] + +The following is an example URI and its main parts: + +[teletype] +``` + foo://example.com:8042/over/there?name=ferret#nose + \_/ \______________/\_________/ \_________/ \__/ + | | | | | +scheme authority path query fragment +``` + + +For the complete specification please refer to __rfc3986__: + +[note + This documentation refers to the Augmented Backus-Naur Form (ABNF) + notation of + [@https://tools.ietf.org/html/rfc2234 rfc2234] + to specify particular grammars used by algorithms and containers. While + a complete understanding of the notation is not a requirement for using the + library, it may help for understanding how valid components of URLs are + defined. In particular, this will be of interest to users who wish to + compose parsing algorithms using the combinators provided by the library. +] + +[heading Functions] + +All parsing functions accept a __string_view__ and return a +`__result__<__url_view__>`. The following example parses a string literal containing a URI: -``` -result< url_view > r = parse_uri( "https://www.example.com/path/to/file.txt" ); -if( r.has_value() ) // parsing was successful -{ - url_view u = r.value(); // extract the url_view - - std::cout << u; // format the URL to cout -} -else -{ - std::cout << r.error().message(); // parsing failure; print error -} -``` +[c++] +[snippet_parsing_url_1] The function throws nothing and returns the result in a variant-like container which holds a __url_view__ or an __error_code__ in the case where the parsing @@ -76,10 +101,8 @@ failed. Note that like a string view, the URL view does not own the underlying character buffer. Instead, it references the string passed to the parsing function. The caller is required to ensure that the lifetime of the string extends until the view is destroyed. -A URL view containing a non-empty string cannot be constructed directly; -instead, it must be created using a parsing function. This guarantees that -any constructed view contains a syntactically valid URL already in its -serialized form. + +[heading Copying] The function [link url.ref.boost__urls__url_view.collect `url_view::collect`] @@ -87,24 +110,39 @@ may be used to create a copy of the underlying character buffer and attach ownership of the buffer to a newly returned view, which is wrapped in a shared pointer. The following code calls `collect` to create a read-only copy: -``` - // This will hold our copy - std::shared_ptr sp; - { - // result::value() will throw an exception if an error occurs - url_view u = parse_relative_ref( "/path/to/file.txt" ).value(); - // create a copy with ownership and string lifetime extension - sp = u.collect(); - - // At this point the string literal goes out of scope - } - - // but `*sp` remains valid since it has its own copy - std::cout << *sp; -``` +[c++] +[snippet_parsing_url_2] The interface of __url_view__ decomposes the URL into its individual parts and allows for inspection of the various parts as well as returning metadata about the URL itself. These non-modifying observer operations are described in the sections that follow. + +To create a mutable copy of the __url_view__, one can just create a __url__: + +[c++] +[snippet_parsing_url_3] + +[heading Return Type] + +In many places, functions in the library have a return type which uses the +__result__ alias template. This class allows the parsing algorithms to +report errors without referring to exceptions. + +The functions `result::has_value` and `result::has_error` can be used to +check if the result contains an error. + +[snippet_parsing_url_1] + +This ensures `result::value` will not throw an error. In contexts where +it is acceptable to throw errors, `result::value` can be used directly: + +[snippet_parsing_4] + +Check the reference for __result__ for a synopsis of the type. For complete +information please consult the full +[@boost:/libs/system/doc/html/system.html#ref_resultt_e `result`] +documentation in [@boost:/libs/system/doc/html/system.html Boost.System]. + +[endsect] diff --git a/doc/qbk/3.1.scheme.qbk b/doc/qbk/3.1.scheme.qbk index 038ea5f2..6aadb057 100644 --- a/doc/qbk/3.1.scheme.qbk +++ b/doc/qbk/3.1.scheme.qbk @@ -11,20 +11,28 @@ [section Scheme] +[heading Notation] + The scheme is the top-level hierarchical element which defines the syntax and semantics of the rest of the URL. The scheme identifier -is always followed by a colon when it appears in a URL. Here are -some examples of URLs which contain a scheme: - ``` - https://www.example.com/path/to/file.txt?page=2 +is always followed by a colon when it appears in a URL. +Here are some examples of URLs with the schemes `https` and `file`: + +[teletype] +``` + https://www.example.com/path/to/file.txt?page=2 +``` + +``` file:///usr/local/bin/ - ``` +``` A scheme must start with a letter, and may contain only letters, digits, plus and minus signs, and periods: [table Scheme BNF [[ +[teletype] ``` scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) @@ -34,8 +42,71 @@ digits, plus and minus signs, and periods: ``` ]]] +[heading Member Functions] + +The functions for inspecting the scheme in a __url_view__ are as follows: + +[table Scheme Observers [ + [Function] + [Description] +][ + [[link url.ref.boost__urls__url_view.has_scheme `has_scheme`]] + [Return true if a scheme is present] +][ + [[link url.ref.boost__urls__url_view.scheme `scheme`]] + [Return the scheme as a string] +][ + [[link url.ref.boost__urls__url_view.scheme_id `scheme_id`]] + [Return the scheme as a known-scheme enumeration constant] +]] + +[note + None of these functions throw exceptions. If the URL has no scheme, + [link url.ref.boost__urls__url_view.scheme `scheme`] returns an empty + string. If the function + [link url.ref.boost__urls__url_view.scheme_id `scheme_id`] identifies a valid + but unknown scheme, the value [link url.ref.boost__urls__scheme `scheme::unknown`] + is returned. +] + +[heading Observers] + +The function [link url.ref.boost__urls__url_view.scheme `scheme`] can +be used to obtain the scheme from a __url_view__: + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_scheme_1] +][ +[teletype] +``` + mailto +``` +]]] + +If the URL has no scheme, this function returns an empty string. To check whether +a URL contains a scheme the function +[link url.ref.boost__urls__url_view.has_scheme `url_view::has_scheme`] might be used. + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_scheme_2] +][ +[teletype] +``` + mailto +``` +]]] + +[heading Common schemes] + The library defines an enumeration of values for some well-known scheme -identifiers. These may be used instead of their corresponding strings: +identifiers. + +[c++] +[snippet_parsing_scheme_3] + +These may be used instead of their corresponding strings: [table Scheme IDs [ [ID] @@ -66,22 +137,111 @@ identifiers. These may be used instead of their corresponding strings: [Secure WebSocket Protocol] ]] -The functions for inspecting the scheme in a __url_view__ are as follows: -[table Scheme Observers [ - [Function] - [Description] +[heading Use Cases] + +A number of schemes are used to define the semantics of URLs. For instance, +the term web address is often used informally to describe URLs with the +`http` scheme, whose semantics are defined by +[@https://datatracker.ietf.org/doc/html/rfc2616#section-3.2.2 rfc3986]. +One of the conventions of the HTTP scheme is that when the port 80 is +implicitly assumed when it is not provided. Such conventions are not +part of the URL protocol. + +Schemes are also different from protocols. Although the scheme `http` is +used to interact with resources via the HTTP protocol, the scheme `file` +has no corresponding protocol. + +Some noteworthy IANA-registered schemes are + +[table Scheme IDs [ + [Scheme] + [Resource] ][ - [[link url.ref.boost__urls__url_view.has_scheme `has_scheme`]] - [Return true if a scheme is present] + [[@https://tools.ietf.org/html/rfc2392 `cid`]] + [SMTP/MIME messages] ][ - [[link url.ref.boost__urls__url_view.scheme `scheme`]] - [Return the scheme as a string] + [[@https://tools.ietf.org/html/rfc2397 `data`]] + [Inline data] ][ - [[link url.ref.boost__urls__url_view.scheme_id `scheme_id`]] - [Return the scheme as a known-scheme enumeration constant] + [[@https://tools.ietf.org/html/rfc4918 `dav`]] + [WebDAV] +][ + [[@https://tools.ietf.org/html/rfc4501 `dns`]] + [Domain Name System] +][ + [[@https://tools.ietf.org/html/rfc8089 `file`]] + [File systems] +][ + [[@https://tools.ietf.org/html/rfc1738 `ftp`]] + [FTP resources] +][ + [[@https://www.iana.org/assignments/uri-schemes/prov/git `git`]] + [GIT repository] +][ + [[@https://tools.ietf.org/html/rfc7230 `http`]] + [HTTP resources] +][ + [[@https://tools.ietf.org/html/rfc7230 `https`]] + [HTTP secured using SSL/TLS] +][ + [[@https://tools.ietf.org/html/rfc6068 `mailto`]] + [Secure WebSocket Protocol] +][ + [[@http://magnet-uri.sourceforge.net/ `magnet`]] + [Identify files by content] +][ + [[@https://tools.ietf.org/html/rfc2224 `nfs`]] + [Network File System] +][ + [[@https://tools.ietf.org/html/rfc2384 `pop`]] + [POP3] +][ + [[@https://docs.aws.amazon.com/cli/latest/reference/s3/ `s3`]] + [Amazon S3] +][ + [[@https://tools.ietf.org/html/rfc5724 `sms`]] + [SMS messages] +][ + [[@https://www.iana.org/assignments/uri-schemes/prov/svn `svn`]] + [Subversion (SVN) repository] +][ + [[@https://tools.ietf.org/html/rfc2806 `tel`]] + [Telephone number] +][ + [[@https://www.iana.org/assignments/uri-schemes/prov/udp `udp`]] + [Streaming protocols over UDP] +][ + [[@https://tools.ietf.org/html/rfc2141 `urn`]] + [Uniform Resource Names] +][ + [[@https://tools.ietf.org/html/rfc6455 `ws`]] + [WebSocket Protocol] +][ + [[@https://tools.ietf.org/html/rfc6455 `wss`]] + [Secure WebSocket Protocol] ]] +Many other valid but unofficial schemes are common: + +[table Scheme IDs [ + [Scheme] + [Resource] +][ + [[@http://tools.ietf.org/html/draft-paskin-doi-uri `doi`]] + [Digital Object Identifier] +][ + [[@https://datatracker.ietf.org/doc/html/draft-hoehrmann-javascript-scheme `javascript`]] + [Javascript Code] +][ + [[@https://datatracker.ietf.org/doc/html/draft-patrick-lambert-odbc-uri-scheme `odbc`]] + [Open Database Connectivity] +][ + [[@https://api.slack.com/reference/deep-linking `slack`]] + [Slack Client] +]] + + [/-----------------------------------------------------------------------------] [endsect] diff --git a/doc/qbk/3.2.authority.qbk b/doc/qbk/3.2.authority.qbk index 14037769..0cdf92c2 100644 --- a/doc/qbk/3.2.authority.qbk +++ b/doc/qbk/3.2.authority.qbk @@ -11,14 +11,26 @@ [section Authority] +[heading Notation] + The authority is a hierarchical element which names an entity governing -the name space defined by the remainder of the URL. The host component -of the authority can be a registered name or server addressed with -optional port and user information. In a URL, the authority component -is always preceded by a double slash ("//"). +the name space defined by the remainder of the URL. It divides into +three subcomponents: + +[teletype] +``` + authority = [userinfo "@"] host [":" port] +``` + +* The `host` subcomponent of the authority can be a registered name or an IP address. +* The optional `port` number subcomponent is preceded by a colon ":" +* The optional `userinfo` subcomponent consists of a username and an optional password + +In a URL, the authority component is always preceded by a double slash ("//"). [table Authority BNF [ [ + [teletype] ``` authority = [ userinfo "@" ] host [ ":" port ] @@ -38,8 +50,7 @@ is always preceded by a double slash ("//"). ] ]] -Note that if an authority is present, the host is always defined even if it -is the empty string (corresponding to a zero-length ['reg-name] in the BNF). +[heading Member Functions] The functions for inspecting all or part of the authority in a __url_view__ are as follows: @@ -117,6 +128,399 @@ __url_view__ are as follows: [Return the authority as a percent-encoded string.] ]] -[/-----------------------------------------------------------------------------] +[heading Observers] + +The function [link url.ref.boost__urls__url_view.encoded_authority `encoded_authority`] can +be used to obtain the authority from a __url_view__: + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_authority_3] +][ +[teletype] +``` + https://www.boost.org/users/download/ + scheme: https + has authority: 1 + authority: www.boost.org + path: /users/download/ +``` +]]] + +These functions do not throw. If the URL has no authority, +[link url.ref.boost__urls__url_view.encoded_authority `encoded_authority`] returns an empty string. + +The function [link url.ref.boost__urls__url_view.has_authority `has_authority`] can be used to check +whether this empty string means there is no authority or an empty authority in the URL. + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_authority_1] +][ +[teletype] +``` + https:///path/to_resource + scheme: https + has authority: 1 + authority: + path: /path/to_resource +``` +]] +[[ +[c++] +[snippet_parsing_authority_6] +][ +[teletype] +``` + mailto://John.Doe@example.com + scheme: mailto + has authority: 1 + authority: John.Doe@example.com + path: +``` +]]] + +Notice that there is no decoded counterpart of +[link url.ref.boost__urls__url_view.encoded_authority `encoded_authority`]. +The reason is any decoded character `/` could +make it ambiguous with the path component. + +[heading Host] + +The host subcomponent represents where resources +are located. The functions +[link url.ref.boost__urls__url_view.encoded_host `encoded_host`] +and [link url.ref.boost__urls__url_view.host `host`] +can be used to obtain the host from a __url_view__, while +[link url.ref.boost__urls__url_view.encoded_host_and_port `encoded_host_and_port`] +allows us to directly obtain the host with the corresponding port number. + +The host might be a registered name + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_authority_8] +][ +[teletype] +``` + https://john.doe@www.example.com:123/forum/questions/ + encoded host: www.example.com + host: www.example.com + host and port: www.example.com:123 + port: 123 + port number: 123 +``` +]]] + +or an IP address + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_authority_9] +][ +[teletype] +``` + https://john.doe@192.168.2.1:123/forum/questions/ + encoded host: 192.168.2.1 + host: 192.168.2.1 + host and port: 192.168.2.1:123 + port: 123 + port number: 123 +``` +]]] + +Although this is not mandatory, note that the encoded host is rarely +different from its encoded counterpart. +The function [link url.ref.boost__urls__url_view.port_number `port_number`] +returns the decoded port as an integer. + +Registered names usually need to be handled differently from IP addresses. +The function +[link url.ref.boost__urls__url_view.host_type `host_type`] +can be used to identify which type of host is described in the URL. + +[c++] +[snippet_parsing_authority_10] + +When the [link url.ref.boost__urls__url_view.host_type `host_type`] +matches an IP address, the functions +[link url.ref.boost__urls__url_view.ipv4_address `ipv4_address`], +[link url.ref.boost__urls__url_view.ipv6_address `ipv6_address`] +can be used to obtain the decoded addresses as integers. + +[heading Userinfo] + +The optional `userinfo` subcomponent consists of a user name and +an optional password. The function +[link url.ref.boost__urls__url_view.encoded_userinfo `encoded_userinfo`] +can be used to retrieve the userinfo from a __url_view__. Analogous +functions are provided for the userinfo subcomponents. + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_authority_11] +][ +[teletype] +``` + https://john.doe:123456@www.somehost.com/forum/questions/ + + has_userinfo: 1 + encoded_userinfo: john.doe:123456 + userinfo: john.doe:123456 + + encoded_user: john.doe + user: john.doe + + has_password: 1 + encoded_password: 123456 + password: 123456 +``` +]]] + +Analogous to other observers, the functions +[link url.ref.boost__urls__url_view.has_userinfo `has_userinfo`] and +[link url.ref.boost__urls__url_view.has_password `has_password`] are provided +to differentiate empty components from absent components. + +Note that there is no function `has_user`. The user component is available +whenever `userinfo` exists. + +[note Although the specification allows the format `username:password`, +the password component should be used with care. + +It is not recommended to transfer password data through URLs +unless this is an empty string indicating no password.] + +[heading Authority View] + +In contexts where an authority can appear by itself, the library provides the +__authority_view__, a read-only container to a non-owning character buffer +containing a valid authority. + +As an example, the grammar for the +[@https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.1 ['request-target]] +of an HTTP/1 CONNECT request uses +[@https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 ['authority-form]]. +This is what such a request looks like: + +[teletype] +``` + CONNECT www.example.com:80 HTTP/1.1 +``` + +In that case, we have an authority that cannot be parsed directly +with __parse_uri__ as a URL. Instead, we can use the analogous function +[link url.ref.boost__urls__parse_authority `parse_authority`] to +obtain an __authority_view__. + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_authority_12] +][ +[teletype] +``` + www.example.com:80 + + encoded_host_and_port: www.example.com:80 + encoded_host: www.example.com + host: www.example.com + port: 80 + port number: 80 + + has_userinfo: 0 + encoded_userinfo: + userinfo: + + encoded_user: + user: + + has_password: 0 + encoded_password: + password: +``` +]]] + +The authority view provides the subset of observer member functions found in +__url_view__ which are relevant to the authority. However, when an authority +is parsed on its own, the leading double slashes ("//") are not present. + +The following authority string is also valid for +[link url.ref.boost__urls__parse_authority `parse_authority`]: + +[teletype] +``` + user:pass@www.example.com:443 +``` + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_authority_13] +][ +[teletype] +``` + user:pass@www.example.com:443 + + encoded_host_and_port: www.example.com:443 + encoded_host: www.example.com + host: www.example.com + port: 443 + port number: 443 + + has_userinfo: 1 + encoded_userinfo: user:pass + userinfo: user:pass + + encoded_user: user + user: user + + has_password: 1 + encoded_password: pass + password: pass +``` +]]] + + +[heading Use Cases] + +Note that if an authority is present, the host is always defined even if it +is the empty string (corresponding to a zero-length ['reg-name] in the BNF). + +[teletype] +``` + https:///path/to_resource + \____/\/\_______________/ + | | | +scheme authority path +``` + +[table + [[Component] [Value] ] + [[URL] [`https:///path/to_resource`] ] + [[Scheme] [`https`] ] + [[Has authority] [Yes] ] + [[Authority] [] ] + [[Path] [`/path/to_resource`] ] +] + +The authority component also influences how we should interpret the URL path. +If the authority is present, the path component must either be empty or begin +with a slash. + +This is a common pattern where the path is empty: + +[teletype] +``` + https://www.boost.org + \___/ \___________/ + scheme authority (path is empty) +``` + +[table + [[Component] [Value] ] + [[URL] [`https://www.boost.org`] ] + [[Scheme] [`https`] ] + [[Has authority] [Yes] ] + [[Authority] [`www.boost.org`] ] + [[Path] [] ] +] + +When both the authority and path exist, the path needs to begin with a slash: + +[teletype] +``` + https://www.boost.org/users/download/ + \___/ \___________/\______________/ + scheme authority path (begins with a slash) +``` + +[table + [[Component] [Value] ] + [[URL] [`https://www.boost.org/users/download/`] ] + [[Scheme] [`https`] ] + [[Has authority] [Yes] ] + [[Authority] [`www.boost.org`] ] + [[Path] [`/users/download/`] ] +] + +This rule also affects the path "`/`": + +[teletype] +``` + https://www.boost.org/ + \___/ \___________/\/ + scheme authority path (begins with a slash) +``` + +[table + [[Component] [Value] ] + [[URL] [`https://www.boost.org/`] ] + [[Scheme] [`https`] ] + [[Has authority] [Yes] ] + [[Authority] [`www.boost.org`] ] + [[Path] [`/`] ] +] + +When there is no authority component, the path cannot begin with an empty +segment. This means the path cannot begin with two slashes `//` to avoid these +characters being interpreted as the beginning of the authority component. + +For instance, consider the following valid URL: + +[teletype] +``` + mailto:John.Doe@example.com + \____/ \__________________/ + scheme path +``` + +[table + [[Component] [Value] ] + [[URL] [`mailto:John.Doe@example.com`] ] + [[Scheme] [`mailto`] ] + [[Has authority] [No] ] + [[Authority] [] ] + [[Path] [`John.Doe@example.com`] ] +] + +Note how including a double slash would make the path be interpreted as the authority: + +[teletype] +``` + mailto://John.Doe@example.com + \____/ \____________________/ + scheme authority +``` + +[table + [[Component] [Value] ] + [[URL] [`mailto://John.Doe@example.com`] ] + [[Scheme] [`mailto`] ] + [[Has authority] [Yes] ] + [[Authority] [`John.Doe@example.com`] ] + [[Path] [] ] +] + +In complete authority components, we can also extract the `userinfo` and `port` subcomponents. + +[teletype] +``` + userinfo host port + /------\ /-------------\ /-\ + https://john.doe@www.example.com:123/forum/questions/ + \___/ \__________________________/\_______________/ + scheme authority path +``` + +[table + [[Component] [Value] ] + [[URL] [`https://john.doe@www.example.com:123/forum/questions/`] ] + [[Scheme] [`https`] ] + [[Has authority] [Yes] ] + [[Authority] [`john.doe@www.example.com:123`] ] + [[Host] [`www.example.com`] ] + [[Userinfo] [`john.doe`] ] + [[Port] [`123`] ] + [[Path] [`/forum/questions/`] ] +] [endsect] diff --git a/doc/qbk/3.3.path.qbk b/doc/qbk/3.3.path.qbk index a10247d2..d42c656c 100644 --- a/doc/qbk/3.3.path.qbk +++ b/doc/qbk/3.3.path.qbk @@ -11,22 +11,32 @@ [section Path] -The path contains data, usually organized hierarchically which is combined -with the ['query] (explained in the next section) to identify a resource -within the scope of the scheme and authority (if any). Most schemes -interpret the path as a sequence of slash delimited ['segments]. In -addition to interacting with the path as a single string, the library +[heading Notation] + +The path contains data, usually organized hierarchically, which is combined +with the [link section.query query] to identify a resource within the scope of +the scheme and authority. + +Most schemes interpret the path as a sequence of slash delimited ['segments]. +These segments can map to file system paths, which is useful for file servers, +but do not always need to imply this relationship. + +In addition to interacting with the path as a single string, the library provides container adaptors modeling ranges of individual path segments. -The URL below contains a path with three segments: + +The URL below contains a path `/path/to/file.txt` with the three segments +`path`, `to`, and `file.txt`: + +[teletype] ``` http://www.example.com/path/to/file.txt ``` Depending on the type of URL, there are various syntactic rules for how the -path may be formulated in a URL. The BNF for these formulations is defined -thusly: +path may be formulated in a URL. The BNF for these formulations is defined: [table Path BNF [[ +[teletype] ``` path = path-abempty ; begins with "/" or is empty / path-absolute ; begins with "/" but not "//" @@ -42,6 +52,8 @@ thusly: ``` ]]] +[heading Member Functions] + The functions for interacting with the path in a __url_view__ are as follows: [table Path Observers [ @@ -58,6 +70,240 @@ The functions for interacting with the path in a __url_view__ are as follows: [Return the path segments as a read-only container of strings with percent-decoding applied.] ]] +A URL path is usually interpreted as segments. The library +provides two read-only containers for interacting with the segments +in a URL's path: + +[table Segment View Types [ + [Type] + [Description] +][ + [[link url.ref.boost__urls__segments_encoded_view `segments_encoded_view`]] + [A read-only forward range of path segments returned as percent-encoded strings.] +][ + [[link url.ref.boost__urls__segments_view `segments_view`]] + [A read-only forward range of path segments returned as strings with percent-decoding applied.] +]] + +These views can be directly created by the parsing functions below. This provides +the guarantee that all constructed views contain valid path segments: + +[table Path Parsing Functions [ + [Function] + [Grammar] +][ + [[link url.ref.boost__urls__parse_path `parse_path`]] + [['any path]] +][ + [[link url.ref.boost__urls__parse_path_abempty `parse_path_abempty`]] + [[@https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 ['path-abempty]]] +][ + [[link url.ref.boost__urls__parse_path_absolute `parse_path_absolute`]] + [[@https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 ['path-absolute]]] +][ + [[link url.ref.boost__urls__parse_path_noscheme `parse_path_noscheme`]] + [[@https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 ['path-noscheme]]] +][ + [[link url.ref.boost__urls__parse_path_rootless `parse_path_rootless`]] + [[@https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 ['path-rootless]]] +]] + +[heading Observers] + +The function [link url.ref.boost__urls__url_view.encoded_path `encoded_path`] can +be used to obtain the path from a __url_view__: + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_path_1] +][ +[teletype] +``` + https://www.boost.org/doc/libs/ + path: /doc/libs/ + encoded segments: /doc/libs/ + segments: /doc/libs/ +``` +]]] + +These functions do not throw. There is no function analogous to `has_path` because +all URLs have valid paths, even when the path is empty. + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_path_3] +][ +[teletype] +``` + https://www.boost.org + path: + encoded segments: + segments: +``` +]]] + +Notice that there is also no decoded counterpart for +[link url.ref.boost__urls__url_view.encoded_path `encoded_path`]. +The reason is any decoded character `/` could form an ambiguous +path segment. + +[heading Segments View] + +These containers are lightweight references to the underlying path string. +Ownership of the string is not transferred; the caller is responsible for +ensuring that the lifetime of the string extends until the container is +destroyed. + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_path_2] +][ +[teletype] +``` + 2 segments + segment: doc + segment: libs +``` +]]] + +In contexts where a path can appear by itself, such as HTTP requests, +segment views may not be constructed directly from strings. Instead, +we can use the analogous function +[link url.ref.boost__urls__parse_path `parse_path`] to obtain a +__segments_encoded_view__ or __segments_view__. + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_path_9] +][ +[teletype] +``` + path: /doc/libs + 2 segments + segment: doc + segment: libs +``` +]]] + +[heading Use Cases] + +The path comes after the URL authority, including the initial slash `/`: + +[table + [[Component] [Value] ] + [[URL] [`https://www.boost.org/doc/libs/`] ] + [[Path] [`/doc/libs/`] ] +] + +In this example, the path has three segments: + +[table + [[Component] [Value] ] + [[URL] [`https://www.boost.org/doc/libs/`] ] + [[Segment 1] [`doc`] ] + [[Segment 2] [`libs`] ] + [[Segment 3] [(empty segment)] ] +] + +Note that the final slash in `/doc/libs/` implies an extra +empty segment that would not exist in the path `/doc/libs`: + +[table + [[Component] [Value] ] + [[URL] [`https://www.boost.org/doc/libs`] ] + [[Segment 1] [`doc`] ] + [[Segment 2] [`libs`] ] +] + +A URL always contains a path, even if it is empty: + +[table + [[Component] [Value] ] + [[URL] [`https://www.boost.org`] ] + [[Path] [] ] +] + +Empty segments are also possible, resulting in consecutive slashes. + +[table + [[Component] [Value] ] + [[URL] [`https://www.boost.org//doc///libs`] ] + [[Path] [`//doc///libs`] ] + [[Segment 1] [(empty)] ] + [[Segment 2] [`doc`] ] + [[Segment 3] [(empty)] ] + [[Segment 4] [(empty)] ] + [[Segment 5] [`libs`] ] +] + +If the authority is present, the path needs to be empty or start with a +slash `/`. + +[table + [[Component] [Value] ] + [[URL] [`https://www.boost.org`] ] + [[Host] [`www.boost.org`] ] + [[Path] [] ] + [[Segments] [0] ] +] + +[table + [[Component] [Value] ] + [[URL] [`https://www.boost.org/`] ] + [[Host] [`www.boost.org`] ] + [[Path] [\/] ] + [[Segments] [0] ] +] + +[table + [[Component] [Value] ] + [[URL] [`https://www.boost.org//`] ] + [[Host] [`www.boost.org`] ] + [[Path] [\//] ] + [[Segments] [2] ] +] + +A path might begin with two slashes to indicate its first segment is empty. + +[table + [[Component] [Value] ] + [[URL] [`https://www.boost.org//doc/libs/`] ] + [[Authority] [`www.boost.org`] ] + [[Path] [`//doc/libs/`] ] + [[Segment 1] [(empty)] ] + [[Segment 2] [`doc`] ] + [[Segment 3] [`libs`] ] + [[Segment 4] [(empty)] ] +] + +However, beginning the path with double slashes is not possible when the +authority is absent, as the first segment path would be interpreted as the +authority. + +[table + [[Component] [Value] ] + [[URL] [`https://doc/libs/`] ] + [[Authority] [`doc`] ] + [[Path] [`/libs/`] ] + [[Segment 1] [`libs`] ] + [[Segment 2] [(empty)] ] +] + +For this reason, paths beginning with two slashes are typically avoided +altogether. + +Of the reserved character set for URLs, `:` and `@` may appear unencoded within +paths. + +[table + [[Component] [Value] ] + [[URL] [`https://www.boost.org/doc@folder/libs:boost`] ] + [[Authority] [`www.boost.org`] ] + [[Path] [`/doc@folder/libs:boost`] ] + [[Segment 1] [`doc@folder`] ] + [[Segment 2] [`libs:boost`] ] +] + [/-----------------------------------------------------------------------------] [endsect] diff --git a/doc/qbk/3.4.query.qbk b/doc/qbk/3.4.query.qbk index 5291e4c1..cda4b851 100644 --- a/doc/qbk/3.4.query.qbk +++ b/doc/qbk/3.4.query.qbk @@ -9,12 +9,32 @@ [/-----------------------------------------------------------------------------] -[section Query] +[#section.query] +[section:query Query] -The query component of a URL contains non-hierarchical data which augments the -information in the path to identify a resource within the scope of the URL's -scheme and authority (if any). A query is indicated by a leading question -mark ('?') character as seen in the BNF below: +[heading Notation] + +The query component of a URL augments the information in the path to identify +a resource within the scope of the URL's scheme and authority. Unlike the URL +path, the query string contains non-hierarchical data. + +Although there is no mandatory syntax for interpreting queries, its strings +are often interpreted as key-value parameters delimited by the '&' or ';' +character. In addition to interacting with the query as a single string, +the library provides container adaptors modeling ranges of individual query +parameters. + +The URL below contains the query "[teletype]`?id=409&name=Joe&individual`" +with the three parameters "[teletype]`id=409`", "[teletype]`name=Joe`", and +"[teletype]`individual`": + +[teletype] +``` + https://www.example.com/get-customer.php?id=409&name=Joe&individual +``` + +A query is indicated by a leading question mark ('?') character as seen in +the BNF below: [table Query BNF [[ ``` @@ -30,30 +50,7 @@ mark ('?') character as seen in the BNF below: ``` ]]] -The most common formulation for the query in a URL is to define a set of -key and value pairs of percent-encoded strings, using the ampersand ('&') -character to delimit each pair after the first. In contexts where a query -is interpreted as key/value pairs, it is called the ['query parameters], -['query params], or just [*params]. In addition to accessor functions -which treat the query as a single string, this library provides -container adaptors modeling ranges of query parameters. The -following URL contains three query parameters: -``` - https://www.example.com/get-customer.php?id=409&name=Joe&individual -``` - -A query parameter has an optional key and an optional value. This means -that a query parameter may be completely empty. In this case the parameter -is said to have a zero-length or empty key, and no value. -The presence of a value is indicated by the presence of an equals ('=') -sign appearing after the key. The value may be zero length or empty, or -it may contain characters. The URL below demonstrate all the ways that -keys and values may appear in query parameters: -``` - ?key-1=value-1&key-2=&key-3&=value-2 -``` - -This table shows the BNF for query parameters: +This table shows the BNF for a query string interpreted as parameters: [table Query Params BNF [[ ``` @@ -72,28 +69,216 @@ This table shows the BNF for query parameters: ``` ]]] +[heading Member Functions] + The functions for interacting with the query in a __url_view__ are as follows: [table Query Observers [ [Function] [Description] -][ - [[link url.ref.boost__urls__url_view.encoded_params `encoded_params`]] - [Return the query parameters as a read-only container of percent-encoded strings.] -][ - [[link url.ref.boost__urls__url_view.encoded_query `encoded_query`]] - [Return the percent-encoded query.] ][ [[link url.ref.boost__urls__url_view.has_query `has_query`]] [Return true if a query is present] ][ - [[link url.ref.boost__urls__url_view.params `params`]] - [Return the query parameters as a read-only container of strings with percent-decoding applied.] + [[link url.ref.boost__urls__url_view.encoded_query `encoded_query`]] + [Return the percent-encoded query.] ][ [[link url.ref.boost__urls__url_view.query `query`]] [Return the query as a string with percent-decoding applied.] ]] +A URL query is usually interpreted as parameters. The library +provides two observers and read-only containers for interacting with the parameters +in a URL's query: + +[table Query Params Observers [ + [Function] + [Description] +][ + [[link url.ref.boost__urls__url_view.encoded_params `encoded_params`]] + [Return the query parameters as a read-only container of percent-encoded strings.] +][ + [[link url.ref.boost__urls__url_view.params `params`]] + [Return the query parameters as a read-only container of strings with percent-decoding applied.] +]] + +[table Params View Types [ + [Type] + [Description] +][ + [[link url.ref.boost__urls__params_encoded_view `params_encoded_view`]] + [A read-only forward range of query parameters returned as percent-encoded strings.] +][ + [[link url.ref.boost__urls__params_view `params_view`]] + [A read-only forward range of query parameters returned as strings with percent-decoding applied.] +]] + +[heading Observers] + +The function [link url.ref.boost__urls__url_view.encoded_query `encoded_query`] can +be used to obtain the query string from a __url_view__: + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_query_4] +][ +[teletype] +``` + https://www.example.com/get-customer.php?name=joe + encoded query: name=joe +``` +]]] + +These functions do not throw. If the URL has no query, +[link url.ref.boost__urls__url_view.encoded_query `encoded_query`] +returns an empty string. The function +[link url.ref.boost__urls__url_view.has_query `has_query`] +can be used to determine whether this empty string means there is +no query or an empty query string in the URL. + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_query_5] +][ +[teletype] +``` + https://www.example.com/get-customer.php + has query: 0 + encoded query: +``` +]]] + +We can also use the function +[link url.ref.boost__urls__url_view.query `query`] +to obtain the decoded counterpart of the query string. + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_query_6] +][ +[teletype] +``` + https://www.example.com/get-customer.php?name=John%20Doe + has query: 1 + encoded query: name=John%20Doe + query: name=John Doe +``` +]]] + +When using the query string as parameters, note that decoded +query strings might include ambiguous `&` and `=` characters. + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_query_7] +][ +[teletype] +``` + https://www.example.com/get-customer.php?name=John%26Doe + has query: 1 + encoded query: name=John%26Doe + query: name=John&Doe +``` +]]] + +In this example, the decoded query seems to imply there are +two query parameters while there is only one parameter whose +value includes a `&` character. + +The reason the decoded variant of query string is still allowed +is because query strings are not required to be interpreted as +query parameters, in which case the `&` character is not +ambiguous. + +[heading Parameter View] + +Parameter views are lightweight references to the underlying path string. +Ownership of the string is not transferred; the caller is responsible for +ensuring that the lifetime of the string extends until the container is +destroyed. + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_query_1] +][ +[teletype] +``` + https://www.example.com/get-customer.php?id=409&name=Joe&individual + has query: 1 + encoded query: id=409&name=Joe&individual + query: id=409&name=Joe&individual + 3 parameters + parameter: + parameter: + parameter: individual +``` +]]] + +Each parameter is represented as a __query_param_view__ structure, with +fields to refer to the key and value. An extra field `has_value` is used +to indicate whether the value is absent. + +[heading Use Cases] + +The most common formulation for the query in a URL is to define a set of +key and value pairs of percent-encoded strings, using the ampersand ('&') +character to delimit each pair after the first. In contexts where a query +is interpreted as key/value pairs, it is called the ['query parameters], +['query params], or just [*params]. + +In addition to accessor functions which treat the query as a single string, +the library provides container adaptors modeling ranges of query parameters. +The following URL contains three query parameters: + +[table + [[Component] [Value] ] + [[URL] [`https://www.example.com/get-customer.php?id=409&name=Joe&individual`] ] + [[Has Query] [Yes] ] + [[Query] [`id=409&name=Joe&individual`] ] + [[Parameter 1] [Key `id`, Value `409`] ] + [[Parameter 2] [Key `name`, Value `Joe`] ] + [[Parameter 3] [Key `individual`, No value] ] +] + +Note that a parameter value might be either empty or absent. The +presence of a value is indicated by the presence of an equals ('=') +sign appearing after the key. This means the value may be absent, +empty, or contain characters. + +The key of a query parameter might also be empty. This means that +a query parameter may be completely empty. In this case the +parameter is said to have a zero-length or empty key, and +no value. + +The URL below demonstrate all the ways that keys and values may +appear in query parameters: + +[table + [[Component] [Value] ] + [[URL] [`https://www.example.com/get-customer.php?key-1=value-1&key-2=&key-3&&=value-5`] ] + [[Has Query] [Yes] ] + [[Query] [`key-1=value-1&key-2=&key-3&&=value-5`] ] + [[Parameter 1] [Key `key-1`, Value `value-1`] ] + [[Parameter 2] [Key `key-2`, Value (empty)] ] + [[Parameter 3] [Key `key-3`, No value] ] + [[Parameter 4] [Key (empty), No value] ] + [[Parameter 5] [Key (empty), Value `value-5`] ] +] + +The URL reserved characters `:`, `@`, `?`, and `/` may appear +unencoded with URL queries, as they are not ambiguous with +other URL components. + +[table + [[Component] [Value] ] + [[URL] [`https://www.example.com/get-customer.php?email=joe@email.com&code=a:2@/!`] ] + [[Has Query] [Yes] ] + [[Query] [`email=joe@email.com&code=a:2@/!`] ] + [[Parameter 1] [Key `email`, Value `joe@email.com`] ] + [[Parameter 2] [Key `code`, Value `a:2@/!`] ] +] + + [/-----------------------------------------------------------------------------] [endsect] diff --git a/doc/qbk/3.5.fragment.qbk b/doc/qbk/3.5.fragment.qbk index 3b197d5e..bdb7990e 100644 --- a/doc/qbk/3.5.fragment.qbk +++ b/doc/qbk/3.5.fragment.qbk @@ -11,21 +11,35 @@ [section Fragment] +[heading Notation] + The fragment identifier in a URL provides further refinement of the specification of the resource, including additional identifying information. -The semantics of the fragment vary depending on the scheme, authority, path, +It provides directions to a secondary resource related to such main resource, +such as the section in an article or a time-point in a video. + +As usual, its semantics vary depending on the scheme, authority, path, and media type of the resource. In HTML, fragments are used as internal page references. This usage is called a "named anchor," referring to a -section within a web page. The URL below contains the fragment "section2": +section within a web page. + +The URL below contains the fragment "section2": + +[teletype] ``` https://www.example.com/index.html#section2 + \____/\_______________/\_________/\_______/ + scheme authority path fragment ``` A fragment appearing in a URL is always preceded by the number sign ('#'). A -URL with a fragment of zero length is distinguishable from a URL with no -fragment. The fragment grammar is defined as follows: +This makes a URL with a fragment of zero length distinguishable from a URL +with no fragment. + +The fragment grammar is defined as follows: [table Fragment BNF [[ +[teletype] ``` fragment = *( pchar / "/" / "?" ) @@ -37,22 +51,118 @@ fragment. The fragment grammar is defined as follows: ``` ]]] +[heading Member Functions] + The functions for inspecting the fragment in a __url_view__ are as follows: [table Fragment Observers[ [Function] [Description] +][ + [[link url.ref.boost__urls__url_view.has_fragment `has_fragment`]] + [Return true if a fragment is present] ][ [[link url.ref.boost__urls__url_view.encoded_fragment `encoded_fragment`]] [Return the fragment as a percent-encoded string.] ][ [[link url.ref.boost__urls__url_view.fragment `fragment`]] [Return the fragment as a string with percent-decoding applied.] -][ - [[link url.ref.boost__urls__url_view.has_fragment `has_fragment`]] - [Return true if a fragment is present] ]] +[heading Observers] + +Analogous to other components, the functions +[link url.ref.boost__urls__url_view.encoded_fragment `encoded_fragment`] +and [link url.ref.boost__urls__url_view.encoded_fragment `fragment`] +can be used to obtain the fragment from a __url_view__: + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_fragment_1] +][ +[teletype] +``` + https://www.example.com/index.html#section%202 + has fragment: 1 + encoded fragment: section%202 + fragment: section 2 +``` +]]] + +These functions do not throw. If the URL has no fragment, +[link url.ref.boost__urls__url_view.encoded_fragment `encoded_fragment`] +returns an empty string. The function +[link url.ref.boost__urls__url_view.has_fragment `has_fragment`] +can be used to determine whether this empty string means there is +no fragment or an empty fragment string in the URL. + +[table [[Code][Output]] [[ +[c++] +[snippet_parsing_fragment_2_a] +][ +[teletype] +``` + https://www.example.com/index.html# + has fragment: 1 + encoded fragment: + fragment: +``` +]] +[[ +[c++] +[snippet_parsing_fragment_2_b] +][ +[teletype] +``` + https://www.example.com/index.html + has fragment: 0 + encoded fragment: + fragment: +``` +]]] + +[heading Use Cases] + +URL fragments are usually interpreted as a single string. + +[table + [[Component] [Value] ] + [[URL] [`https://www.example.com/index.html#section%202`] ] + [[Has Fragment] [Yes] ] + [[Encoded Fragment] [`section%202`] ] + [[Fragment] [`section 2`] ] +] + +The URL fragment might also be empty or absent. + +[table + [[Component] [Value] ] + [[URL] [`https://www.example.com/index.html#`] ] + [[Has Fragment] [Yes] ] + [[Encoded Fragment] [(empty)] ] + [[Fragment] [(empty)] ] +] + +[table + [[Component] [Value] ] + [[URL] [`https://www.example.com/index.html`] ] + [[Has Fragment] [No] ] + [[Encoded Fragment] [(No fragment)] ] + [[Fragment] [(No fragment)] ] +] + +The URL reserved characters `:`, `@`, `?`, and `/` may appear +unencoded with URL fragments, as they are not ambiguous with +other URL components. + +[table + [[Component] [Value] ] + [[URL] [`https://www.example.com/index.html#code%20:a@b?c/d`] ] + [[Has Fragment] [Yes] ] + [[Encoded Fragment] [`code%20:a@b?c/d`] ] + [[Fragment] [`code :a@b?c/d`] ] +] + [/-----------------------------------------------------------------------------] [endsect] diff --git a/doc/qbk/3.6.segment_views.qbk b/doc/qbk/3.6.segment_views.qbk deleted file mode 100644 index 87abd488..00000000 --- a/doc/qbk/3.6.segment_views.qbk +++ /dev/null @@ -1,58 +0,0 @@ -[/ - Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com) - - Distributed under the Boost Software License, Version 1.0. (See accompanying - file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - - Official repository: https://github.com/CPPAlliance/url -] - -[section Segment Views] - -[/-----------------------------------------------------------------------------] - -The library provides two read-only containers for interacting with the segments -in a URL's path: - -[table Segment View Types [ - [Type] - [Description] -][ - [[link url.ref.boost__urls__segments_encoded_view `segments_encoded_view`]] - [A read-only forward range of path segments returned as percent-encoded strings.] -][ - [[link url.ref.boost__urls__segments_view `segments_view`]] - [A read-only forward range of path segments returned as strings with percent-decoding applied.] -]] - -These containers are lightweight references to the underlying path string. -Ownership of the string is not transferred; the caller is responsible for -ensuring that the lifetime of the string extends until the container is -destroyed. Containers may not be constructed directly from strings. -Instead, they are created by the parsing functions below. This provides -the guarantee that all constructed views contain valid path segments: - -[table Path Parsing Functions [ - [Function] - [Grammar] -][ - [[link url.ref.boost__urls__parse_path `parse_path`]] - [['any path]] -][ - [[link url.ref.boost__urls__parse_path_abempty `parse_path_abempty`]] - [[@https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 ['path-abempty]]] -][ - [[link url.ref.boost__urls__parse_path_absolute `parse_path_absolute`]] - [[@https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 ['path-absolute]]] -][ - [[link url.ref.boost__urls__parse_path_noscheme `parse_path_noscheme`]] - [[@https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 ['path-noscheme]]] -][ - [[link url.ref.boost__urls__parse_path_rootless `parse_path_rootless`]] - [[@https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 ['path-rootless]]] -]] - - -[/-----------------------------------------------------------------------------] - -[endsect] diff --git a/doc/qbk/3.7.params_views.qbk b/doc/qbk/3.7.params_views.qbk deleted file mode 100644 index f8e57a0f..00000000 --- a/doc/qbk/3.7.params_views.qbk +++ /dev/null @@ -1,27 +0,0 @@ -[/ - Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com) - - Distributed under the Boost Software License, Version 1.0. (See accompanying - file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - - Official repository: https://github.com/CPPAlliance/url -] - -[/-----------------------------------------------------------------------------] - -[section Params Views] - -[table Params View Types [ - [Type] - [Description] -][ - [[link url.ref.boost__urls__params_encoded_view `params_encoded_view`]] - [A read-only forward range of query parameters returned as percent-encoded strings.] -][ - [[link url.ref.boost__urls__params_view `params_view`]] - [A read-only forward range of query parameters returned as strings with percent-decoding applied.] -]] - -[/-----------------------------------------------------------------------------] - -[endsect] diff --git a/doc/qbk/3.8.authority_view.qbk b/doc/qbk/3.8.authority_view.qbk deleted file mode 100644 index 16dfaa77..00000000 --- a/doc/qbk/3.8.authority_view.qbk +++ /dev/null @@ -1,36 +0,0 @@ -[/ - Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com) - - Distributed under the Boost Software License, Version 1.0. (See accompanying - file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - - Official repository: https://github.com/CPPAlliance/url -] - -[section Authority View] - -[/-----------------------------------------------------------------------------] - -For contexts where an authority can appear by itself, the library provides the -__authority_view__, a read-only container to a non-owning character buffer -containing a valid authority. As an example, the grammar for the -[@https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.1 ['request-target]] -of an HTTP/1 CONNECT request uses -[@https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 ['authority-form]]. -An authority view cannot be constructed from a string directly. Instead, it -is returned from the function -[link url.ref.boost__urls__parse_authority `parse_authority`]. - -The authority view provides the subset of observer member functions found in -__url_view__ which are relevant to the authority. However, when an authority -is parsed on its own, the leading double slashes ("//") are not present. The -following authority string is valid for `parse_authority`: - -[teletype] -``` - user:pass@www.example.com:443 -``` - -[/-----------------------------------------------------------------------------] - -[endsect] diff --git a/doc/qbk/4.0.Modification.qbk b/doc/qbk/4.0.Modification.qbk deleted file mode 100644 index 9178839d..00000000 --- a/doc/qbk/4.0.Modification.qbk +++ /dev/null @@ -1,112 +0,0 @@ -[/ - Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com) - - Distributed under the Boost Software License, Version 1.0. (See accompanying - file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - - Official repository: https://github.com/CPPAlliance/url -] - - - -A URL string can be parsed using one of the parsing functions. -Each function parses according to a particular grammar specified -in __rfc3986__: - -[table Parsing Functions [ - [Function] - [Grammar] -][ - [[link url.ref.boost__urls__parse_absolute_uri `parse_absolute_uri`]] - [[@https://datatracker.ietf.org/doc/html/rfc3986#section-4.3 ['absolute-URI]]] -][ - [[link url.ref.boost__urls__parse_relative_ref `parse_relative_ref`]] - [[@https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 ['relative-ref]]] -][ - [[link url.ref.boost__urls__parse_uri `parse_uri`]] - [[@https://datatracker.ietf.org/doc/html/rfc3986#section-3 ['URI]]] -][ - [[link url.ref.boost__urls__parse_uri_reference `parse_uri_reference`]] - [[@https://datatracker.ietf.org/doc/html/rfc3986#section-4.1 ['URI-reference]]] -]] - -The collective grammars parsed by these algorithms are specified below. -To understand the syntax of the BNF specification, or to understand the -rest of the elements such as "scheme" below please refer to __rfc3986__: - -``` - absolute-URI = scheme ":" hier-part [ "?" query ] - - relative-ref = relative-part [ "?" query ] [ "#" fragment ] - - URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] - - URI-reference = URI / relative-ref - - hier-part = "//" authority path-abempty - / path-absolute - / path-rootless - / path-empty - - relative-part = "//" authority path-abempty - / path-absolute - / path-noscheme - / path-empty -``` - -Each of these functions accepts a __string_view__ and returns a __url_view__ -wrapped in a __result__ type. The following example parses a string literal -containing a URI: -``` -result< url_view > r = parse_uri( "https://www.example.com/path/to/file.txt" ); - -if( r.has_value() ) // parsing was successful -{ - url_view u = r.value(); // extract the url_view - - std::cout << u; // format the URL to cout -} -else -{ - std::cout << r.error().message(); // parsing failure; print error -} -``` - -The function throws nothing and returns the result in a variant-like container -which holds a __url_view__ or an __error_code__ in the case where the parsing -failed. Note that like a string view, the URL view does not own the underlying -character buffer. Instead, it references the string passed to the parsing -function. The caller is required to ensure that the lifetime of the string -extends until the view is destroyed. -A URL view containing a non-empty string cannot be constructed directly; -instead, it must be created using a parsing function. This guarantees that -any constructed view contains a syntactically valid URL already in its -serialized form. - -The function -[link url.ref.boost__urls__url_view.collect `url_view::collect`] -may be used to create a copy of the underlying character buffer and attach -ownership of the buffer to a newly returned view, which is wrapped in a -shared pointer. The following code calls `collect` to create a read-only -copy: -``` - // This will hold our copy - std::shared_ptr sp; - { - // result::value() will throw an exception if an error occurs - url_view u = parse_relative_ref( "/path/to/file.txt" ).value(); - - // create a copy with ownership and string lifetime extension - sp = u.collect(); - - // At this point the string literal goes out of scope - } - - // but `*sp` remains valid since it has its own copy - std::cout << *sp; -``` - -The interface of __url_view__ decomposes the URL into its individual parts and -allows for inspection of the various parts as well as returning metadata about -the URL itself. These non-modifying observer operations are described in the -sections that follow. diff --git a/doc/qbk/4.0.modifying.qbk b/doc/qbk/4.0.modifying.qbk new file mode 100644 index 00000000..1822e0a4 --- /dev/null +++ b/doc/qbk/4.0.modifying.qbk @@ -0,0 +1,124 @@ +[/ + Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + Official repository: https://github.com/CPPAlliance/url +] + +[section Modifying URLs] + +The class __url__ is a container used to store and produce URLs. +The [link section.url_view URL parsing] functions can be used to create +a new container: + +[snippet_modifying_1] + +All __url_view__ observers are also available for a __url__: + +[snippet_modifying_2] + +The interface of __url_view__ decomposes the URL into its individual parts and +allows for inspection of the various parts as well as returning metadata about +the URL itself. These non-modifying observer operations are described in the +sections that follow. + +[heading Modifiers] + +For each observer function in __url_view__, an instance of __url__ provides a +corresponding `set` function to define the value of the specified component. + +[table [[Component][Decoded][Encoded]] +[ + [authority] + [] + [[link url.ref.boost__urls__url.set_encoded_authority `set_encoded_authority`]] +] +[ + [fragment] + [[link url.ref.boost__urls__url.set_fragment `set_fragment`]] + [[link url.ref.boost__urls__url.set_encoded_fragment `set_encoded_fragment`]] +] +[ + [host] + [[link url.ref.boost__urls__url.set_host `set_host`]] + [[link url.ref.boost__urls__url.set_encoded_host `set_encoded_host`]] +] +[ + [password] + [[link url.ref.boost__urls__url.set_password `set_password`]] + [[link url.ref.boost__urls__url.set_encoded_password `set_encoded_password`]] +] +[ + [path] + [[link url.ref.boost__urls__url.set_path `set_path`]] + [[link url.ref.boost__urls__url.set_encoded_path `set_encoded_path`]] +] +[ + [path_absolute] + [[link url.ref.boost__urls__url.set_path_absolute `set_path_absolute`]] + [[link url.ref.boost__urls__url.set_path_absolute `set_path_absolute`]] +] +[ + [port] + [[link url.ref.boost__urls__url.set_port `set_port`]] + [[link url.ref.boost__urls__url.set_port `set_port`]] +] +[ + [query] + [[link url.ref.boost__urls__url.set_query `set_query`]] + [[link url.ref.boost__urls__url.set_encoded_query `set_encoded_query`]] +] +[ + [scheme] + [[link url.ref.boost__urls__url.set_scheme `set_scheme`]] + [[link url.ref.boost__urls__url.set_scheme `set_scheme`]] +] +[ + [user] + [[link url.ref.boost__urls__url.set_user `set_user`]] + [[link url.ref.boost__urls__url.set_encoded_user `set_encoded_user`]] +] +[ + [userinfo] + [[link url.ref.boost__urls__url.set_userinfo `set_userinfo`]] + [[link url.ref.boost__urls__url.set_encoded_userinfo `set_encoded_userinfo`]] +] +] + +[heading Encoded Modifiers] + +The encoded modifier functions require that the encoded strings are valid +for the specified component. + +[table [[Code][Output]] [[ +[c++] +[snippet_modifying_3] +][ +[teletype] +``` + http://www.example.com +``` +]]] + +If the input string contains an invalid value, an exception is thrown. + +[snippet_modifying_4] + +[heading Decoded Modifiers] + +The decoded modifier functions will encode any input string that is +invalid as + +[table [[Code][Output]] [[ +[c++] +[snippet_modifying_5] +][ +[teletype] +``` + http://www.my%20example.com +``` +]]] + +[endsect] \ No newline at end of file diff --git a/doc/qbk/5.0.grammars.qbk b/doc/qbk/5.0.grammars.qbk index 35afde61..80fa54d9 100644 --- a/doc/qbk/5.0.grammars.qbk +++ b/doc/qbk/5.0.grammars.qbk @@ -8,7 +8,7 @@ Official repository: https://github.com/CPPAlliance/url ] -[section Grammars] +[section Grammar] [heading Design of grammar rules] @@ -26,4 +26,6 @@ grammar rules as part of the same architecture that might include arbitrary gram These new function overloads may be defined in other namespaces. As with __std_swap__, the design relies on [@https://en.cppreference.com/w/cpp/language/adl argument-dependent lookup] to find these overloads. +[include CharSet.qbk] + [endsect] diff --git a/doc/qbk/quickref.xml b/doc/qbk/quickref.xml index f0772a0d..b52d54d9 100644 --- a/doc/qbk/quickref.xml +++ b/doc/qbk/quickref.xml @@ -106,7 +106,7 @@ Concepts - CharSet + CharSet diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt deleted file mode 100644 index 8d9dca5f..00000000 --- a/example/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -# -# Copyright (c) 2021 Alan Freitas (alandefreitas@gmail.com) -# -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -# -# Official repository: https://github.com/CppAlliance/url -# - -source_group("" FILES - quicklook.cpp -) - -# - -add_executable(quicklook - quicklook.cpp -) -set_property(TARGET quicklook PROPERTY FOLDER "example") -target_link_libraries(quicklook PRIVATE Boost::url) - -# diff --git a/example/Jamfile b/example/Jamfile deleted file mode 100644 index 6226db44..00000000 --- a/example/Jamfile +++ /dev/null @@ -1,12 +0,0 @@ -# -# Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com) -# -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -# -# Official repository: https://github.com/CppAlliance/url -# - -project : requirements /boost/url//boost_url ; - -exe quicklook : quicklook.cpp ; diff --git a/example/quicklook.cpp b/example/quicklook.cpp deleted file mode 100644 index e7662e52..00000000 --- a/example/quicklook.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// -// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -// -// Official repository: https://github.com/CppAlliance/url -// - -#include -#include - -using namespace boost::urls; - -int -main(int, char**) -{ - result r = parse_uri( "https://user:pass@www.example.com:443/path/to/my%2dfile.txt?id=42&name=John%20Doe#page%20anchor" ); - if (r.has_error()) - { - std::cerr << r.error().what() << '\n'; - return EXIT_FAILURE; - } - - url_view u = r.value(); - std::cout << u << 'n'; - - std::cout << - "scheme : " << u.scheme() << '\n' << - "authority : " << u.encoded_authority() << '\n' << - "path : " << u.encoded_path() << '\n' << - "query : " << u.encoded_query() << '\n' << - "fragment : " << u.encoded_fragment() << '\n'; - - std::cout << - "query : " << u.query() << '\n' << - "fragment : " << u.fragment() << '\n'; - - { - segments_encoded_view segs = u.encoded_segments(); - for( auto v : segs ) - { - std::cout << v << "\n"; - } - } - - { - segments_view segs = u.segments(); - - for( auto v : segs ) - { - std::cout << v << "\n"; - } - } - - { - static_pool< 1024 > pool; - - segments_view segs = u.segments( pool.allocator() ); - - for( auto v : segs ) - { - std::cout << v << "\n"; - } - } - - { - params_encoded_view params = u.encoded_params(); - - for( auto v : params ) - { - std::cout << - "key = " << v.key << - ", value = " << v.value << "\n"; - } - } - - { - static_pool< 1024 > pool; - - params_view params = u.params( pool.allocator() ); - - for( auto v : params ) - { - std::cout << - "key = " << v.key << - ", value = " << v.value << "\n"; - } - } - - { - url v = parse_uri( "http://user:pass@www.example.com:443/path/to/my%2dfile.txt?id=42&name=John%20Doe#page%20anchor" ).value(); - v.set_scheme( "https" ); - v.set_scheme( scheme::https ); // equivalent to u.set_scheme( "https" ); - try - { - v.set_scheme( "100" ); // illegal, must start with a letter - } - catch( std::invalid_argument const& ) - { - // this happens - } - v.set_host( parse_ipv4_address( "192.168.0.1" ).value() ) - .set_port( 8080 ) - .remove_userinfo(); - - params p = v.params(); - p.emplace_at(p.find("name"), "name", "Vinnie Falco"); - std::cout << v << '\n'; - } - - return EXIT_SUCCESS; -} diff --git a/include/boost/url/const_string.hpp b/include/boost/url/const_string.hpp index a6866553..838724aa 100644 --- a/include/boost/url/const_string.hpp +++ b/include/boost/url/const_string.hpp @@ -43,18 +43,10 @@ namespace urls { */ class const_string : public string_view { -public: - /** Built-in capacity - - Strings whose length is up to - this size will not require - dynamic allocation. - */ static constexpr std::size_t builtin_capacity = 32; -private: struct base; struct result; diff --git a/include/boost/url/impl/segments_view.ipp b/include/boost/url/impl/segments_view.ipp index e8e6b4f5..00b0d140 100644 --- a/include/boost/url/impl/segments_view.ipp +++ b/include/boost/url/impl/segments_view.ipp @@ -207,14 +207,13 @@ operator<<( { auto it = vw.begin(); auto const end = vw.end(); - if(! vw.is_absolute()) - goto skip; - while(it != end) + if( it != end ) { - os << '/'; - skip: - auto s(*it++); - os << s; + if( vw.is_absolute() ) + os << "/"; + os << *it; + while( ++it != end ) + os << '/' << *it; } return os; } diff --git a/include/boost/url/params.hpp b/include/boost/url/params.hpp index c5a998e6..7acafe84 100644 --- a/include/boost/url/params.hpp +++ b/include/boost/url/params.hpp @@ -70,12 +70,8 @@ class params public: /** A random-access iterator referencing parameters in a url query. - This iterator lazily constructs instances of @ref params::reference, which - contain @ref const_string decoded representations of the current pair - in the underlying @ref url. - - The @ref params underlying allocator is used to construct the instances - of @ref params::reference. + Memory for allocated strings uses the allocator specified at construction + of the container. */ #ifdef BOOST_URL_DOCS diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 232a6dc2..7be6e622 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -36,6 +36,7 @@ set(BOOST_URL_TESTS_FILES segments_encoded.cpp segments_encoded_view.cpp segments_view.cpp + snippets.cpp static_pool.cpp static_url.cpp const_string.cpp diff --git a/test/unit/Jamfile b/test/unit/Jamfile index beb65bf2..6dea12c2 100644 --- a/test/unit/Jamfile +++ b/test/unit/Jamfile @@ -40,6 +40,7 @@ local SOURCES = segments_encoded.cpp segments_encoded_view.cpp segments_view.cpp + snippets.cpp static_pool.cpp static_url.cpp const_string.cpp diff --git a/test/unit/const_string.cpp b/test/unit/const_string.cpp index fe671410..1bf4b6ef 100644 --- a/test/unit/const_string.cpp +++ b/test/unit/const_string.cpp @@ -24,12 +24,11 @@ struct const_string_test string_view big_; string_view big2_; string_view small_; - static constexpr std::size_t C = - const_string::builtin_capacity; + static constexpr std::size_t C = 32; const_string_test() noexcept { - string_view cs = + string_view cs = "*" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"; diff --git a/test/unit/segments_view.cpp b/test/unit/segments_view.cpp index fb7b78b9..6978a1ee 100644 --- a/test/unit/segments_view.cpp +++ b/test/unit/segments_view.cpp @@ -390,6 +390,15 @@ public: void testOutput() { + { + // empty + std::stringstream ss; + segments_view sv = parse_path( + "").value().decoded(); + BOOST_TEST(!sv.is_absolute()); + ss << sv; + BOOST_TEST(ss.str() == ""); + } { // absolute std::stringstream ss; @@ -397,7 +406,8 @@ public: "/%70%61%74%68/%74%6f/%66%69%6c%65%2e%74%78%74" ).value().decoded(); ss << sv; - BOOST_TEST(ss.str() == "/path/to/file.txt"); + auto str = ss.str(); + BOOST_TEST(str == "/path/to/file.txt"); } { // relative diff --git a/test/unit/snippets.cpp b/test/unit/snippets.cpp new file mode 100644 index 00000000..4011cfa2 --- /dev/null +++ b/test/unit/snippets.cpp @@ -0,0 +1,885 @@ +// +// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/CPPAlliance/url +// + +#include "test_suite.hpp" + +//[snippet_headers_1 +#include +//] + +#if 0 +//[snippet_headers_2 +#include +//] +#endif + +#include + +//[snippet_headers_3 +#include +using namespace boost::urls; +//] + +void +using_url_views() +{ + //[snippet_parsing_1 + string_view s = "https://user:pass@www.example.com:443/path/to/my%2dfile.txt?id=42&name=John%20Doe#page%20anchor"; + //] + + { + //[snippet_parsing_2 + result r = parse_uri( s ); + //] + } + + { + //[snippet_parsing_3 + url_view u = parse_uri( s ).value(); + //] + } + + { + //[snippet_parsing_4 + try + { + url_view u = parse_uri(s).value(); + } + catch (std::invalid_argument const&) + { + // handle error + } + //] + } + + { + //[snippet_parsing_5 + result r = parse_uri( s ); + if (r.has_value()) + { + url_view u = r.value(); + } + else + { + // handle error + } + //] + } + + url_view u = parse_uri( s ).value(); + + //[snippet_accessing_1 + std::cout << + "scheme : " << u.scheme() << "\n" + "authority : " << u.encoded_authority() << "\n" + "path : " << u.encoded_path() << "\n" + "query : " << u.encoded_query() << "\n" + "fragment : " << u.encoded_fragment() << "\n"; + //] + + { + //[snippet_accessing_2 + url_view u1 = parse_uri( "http://www.example.com" ).value(); + std::cout << "fragment 1 : " << u1.encoded_fragment() << "\n\n"; + + url_view u2 = parse_uri( "http://www.example.com/#" ).value(); + std::cout << "fragment 2 : " << u2.encoded_fragment() << "\n\n"; + //] + } + + { + //[snippet_accessing_3 + url_view u1 = parse_uri( "http://www.example.com" ).value(); + std::cout << "has fragment 1 : " << u1.has_fragment() << "\n"; + std::cout << "fragment 1 : " << u1.encoded_fragment() << "\n\n"; + + url_view u2 = parse_uri( "http://www.example.com/#" ).value(); + std::cout << "has fragment 2 : " << u2.has_fragment() << "\n"; + std::cout << "fragment 2 : " << u2.encoded_fragment() << "\n\n"; + //] + } + + //[snippet_decoding_1 + std::cout << + "query : " << u.query() << "\n" + "fragment : " << u.fragment() << "\n"; + //] + + { + //[snippet_allocators_1 + static_pool< 1024 > sp; + std::cout << + "query : " << u.query(sp.allocator()) << "\n" + "fragment : " << u.fragment(sp.allocator()) << "\n"; + //] + } + + { + //[snippet_compound_elements_1 + segments_encoded_view segs = u.encoded_segments(); + for( auto v : segs ) + { + std::cout << v << "\n"; + } + //] + } + + { + //[snippet_encoded_compound_elements_1 + segments_view segs = u.segments(); + + for( auto v : segs ) + { + std::cout << v << "\n"; + } + //] + } + + { + //[snippet_encoded_compound_elements_2 + static_pool< 1024 > pool; + + segments_view segs = u.segments( pool.allocator() ); + + for( auto v : segs ) + { + std::cout << v << "\n"; + } + //] + } + + { + //[snippet_encoded_compound_elements_3 + params_encoded_view params = u.encoded_params(); + + for( auto v : params ) + { + std::cout << + "key = " << v.key << + ", value = " << v.value << "\n"; + } + //] + } + + { + //[snippet_encoded_compound_elements_4 + static_pool< 1024 > pool; + + params_view params = u.params( pool.allocator() ); + + for( auto v : params ) + { + std::cout << + "key = " << v.key << + ", value = " << v.value << "\n"; + } + //] + } +} + +void +using_urls() +{ + string_view s = "https://user:pass@www.example.com:443/path/to/my%2dfile.txt?id=42&name=John%20Doe#page%20anchor"; + + //[snippet_modification_1 + url u = parse_uri( s ).value(); + //] + + //[snippet_modification_2 + u.set_scheme( "https" ); + //] + + //[snippet_modification_3 + u.set_scheme( scheme::https ); // equivalent to u.set_scheme( "https" ); + //] + + //[snippet_modification_4 + try + { + u.set_scheme( "100" ); // illegal, must start with a letter + } + catch( std::invalid_argument const& ) + { + // handle error + } + //] + + //[snippet_modification_5 + u.set_host( parse_ipv4_address( "192.168.0.1" ).value() ) + .set_port( 8080 ) + .remove_userinfo(); + //] + + //[snippet_modification_6 + params p = u.params(); + p.emplace_at(p.find("name"), "name", "Vinnie Falco"); + std::cout << u << "\n"; + //] +} + +void +parsing_urls() +{ + //[snippet_parsing_url_1 + result< url_view > r = parse_uri( "https://www.example.com/path/to/file.txt" ); + + if( r.has_value() ) // parsing was successful + { + url_view u = r.value(); // extract the url_view + + std::cout << u; // format the URL to cout + } + else + { + std::cout << r.error().message(); // parsing failure; print error + } + //] + + //[snippet_parsing_url_2 + // This will hold our copy + std::shared_ptr sp; + { + std::string s = "/path/to/file.txt"; + + // result::value() will throw an exception if an error occurs + url_view u = parse_relative_ref( s ).value(); + + // create a copy with ownership and string lifetime extension + sp = u.collect(); + + // At this point the string goes out of scope + } + + // but `*sp` remains valid since it has its own copy + std::cout << *sp << "\n"; + //] + + { + //[snippet_parsing_url_3 + // This will hold our mutable copy + url v; + { + std::string s = "/path/to/file.txt"; + + // result::value() will throw an exception if an error occurs + v = parse_relative_ref(s).value(); + + // At this point the string goes out of scope + } + + // but `v` remains valid since it has its own copy + std::cout << v << "\n"; + + // and it's mutable + v.set_encoded_fragment("anchor"); + + std::cout << v << "\n"; + //] + } +} + +void +parsing_scheme() +{ + { + //[snippet_parsing_scheme_1 + string_view s = "mailto:name@email.com"; + url_view u = parse_uri( s ).value(); + std::cout << u.scheme() << "\n"; + //] + } + { + string_view s = "mailto:name@email.com"; + //[snippet_parsing_scheme_2 + url_view u = parse_uri( s ).value(); + if (u.has_scheme()) + { + std::cout << u.scheme() << "\n"; + } + //] + } + { + //[snippet_parsing_scheme_3 + string_view s = "file://host/path/to/file"; + url_view u = parse_uri( s ).value(); + if (u.scheme_id() == scheme::file) + { + // handle file + } + //] + } +} + +void +parsing_authority() +{ + { + //[snippet_parsing_authority_1 + string_view s = "https:///path/to_resource"; + url_view u = parse_uri( s ).value(); + std::cout << u << "\n" + "scheme: " << u.scheme() << "\n" + "has authority: " << u.has_authority() << "\n" + "authority: " << u.encoded_authority() << "\n" + "path: " << u.encoded_path() << "\n"; + //] + } + { + //[snippet_parsing_authority_2 + string_view s = "https://www.boost.org"; + url_view u = parse_uri( s ).value(); + std::cout << "scheme: " << u.scheme() << "\n" + "has authority: " << u.has_authority() << "\n" + "authority: " << u.encoded_authority() << "\n" + "path: " << u.encoded_path() << "\n"; + //] + } + { + //[snippet_parsing_authority_3 + string_view s = "https://www.boost.org/users/download/"; + url_view u = parse_uri( s ).value(); + std::cout << u << "\n" + "scheme: " << u.scheme() << "\n" + "has authority: " << u.has_authority() << "\n" + "authority: " << u.encoded_authority() << "\n" + "path: " << u.encoded_path() << "\n"; + //] + } + { + //[snippet_parsing_authority_4 + string_view s = "https://www.boost.org/"; + url_view u = parse_uri( s ).value(); + std::cout << "scheme: " << u.scheme() << "\n" + "has authority: " << u.has_authority() << "\n" + "authority: " << u.encoded_authority() << "\n" + "path: " << u.encoded_path() << "\n"; + //] + } + { + //[snippet_parsing_authority_5 + string_view s = "mailto:John.Doe@example.com"; + url_view u = parse_uri( s ).value(); + std::cout << "scheme: " << u.scheme() << "\n" + "has authority: " << u.has_authority() << "\n" + "authority: " << u.encoded_authority() << "\n" + "path: " << u.encoded_path() << "\n"; + //] + } + { + //[snippet_parsing_authority_6 + string_view s = "mailto://John.Doe@example.com"; + url_view u = parse_uri( s ).value(); + std::cout << u << "\n" + "scheme: " << u.scheme() << "\n" + "has authority: " << u.has_authority() << "\n" + "authority: " << u.encoded_authority() << "\n" + "path: " << u.encoded_path() << "\n"; + //] + } + { + //[snippet_parsing_authority_7 + string_view s = "https://john.doe@www.example.com:123/forum/questions/"; + url_view u = parse_uri( s ).value(); + std::cout << "scheme: " << u.scheme() << "\n" + "has authority: " << u.has_authority() << "\n" + "authority: " << u.encoded_authority() << "\n" + "host: " << u.encoded_host() << "\n" + "userinfo: " << u.encoded_userinfo() << "\n" + "port: " << u.port() << "\n" + "path: " << u.encoded_path() << "\n"; + //] + } + { + //[snippet_parsing_authority_8 + string_view s = "https://john.doe@www.example.com:123/forum/questions/"; + url_view u = parse_uri( s ).value(); + std::cout << u << "\n" + "encoded host: " << u.encoded_host() << "\n" + "host: " << u.host() << "\n" + "host and port: " << u.encoded_host_and_port() << "\n" + "port: " << u.port() << "\n" + "port number: " << u.port_number() << "\n"; + //] + } + { + //[snippet_parsing_authority_9 + string_view s = "https://john.doe@192.168.2.1:123/forum/questions/"; + url_view u = parse_uri( s ).value(); + std::cout << u << "\n" + "encoded host: " << u.encoded_host() << "\n" + "host: " << u.host() << "\n" + "host and port: " << u.encoded_host_and_port() << "\n" + "port: " << u.port() << "\n" + "port number: " << u.port_number() << "\n"; + //] + } + { + //[snippet_parsing_authority_10 + string_view s = "https://www.boost.org/users/download/"; + url_view u = parse_uri( s ).value(); + switch (u.host_type()) + { + case host_type::name: + // resolve name + case host_type::ipv4: + case host_type::ipv6: + case host_type::ipvfuture: + // connect to ip + break; + case host_type::none: + // handle empty host URL + break; + } + //] + } + { + //[snippet_parsing_authority_11 + string_view s = "https://john.doe:123456@www.somehost.com/forum/questions/"; + url_view u = parse_uri( s ).value(); + std::cout << u << "\n\n" + // userinfo + "has_userinfo: " << u.has_userinfo() << "\n" + "encoded_userinfo: " << u.encoded_userinfo() << "\n" + "userinfo: " << u.userinfo() << "\n\n" + // user + "encoded_user: " << u.encoded_user() << "\n" + "user: " << u.user() << "\n\n" + // password + "has_password: " << u.has_password() << "\n" + "encoded_password: " << u.encoded_password() << "\n" + "password: " << u.password() << "\n"; + //] + } + { + //[snippet_parsing_authority_12 + string_view s = "www.example.com:80"; + authority_view a = parse_authority( s ).value(); + std::cout << a << "\n\n" + // host and port + "encoded_host_and_port: " << a.encoded_host_and_port() << "\n" + "encoded_host: " << a.encoded_host() << "\n" + "host: " << a.host() << "\n" + "port: " << a.port() << "\n" + "port number: " << a.port_number() << "\n\n" + // userinfo + "has_userinfo: " << a.has_userinfo() << "\n" + "encoded_userinfo: " << a.encoded_userinfo() << "\n" + "userinfo: " << a.userinfo() << "\n\n" + // user + "encoded_user: " << a.encoded_user() << "\n" + "user: " << a.user() << "\n\n" + // password + "has_password: " << a.has_password() << "\n" + "encoded_password: " << a.encoded_password() << "\n" + "password: " << a.password() << "\n"; + //] + } + { + //[snippet_parsing_authority_13 + string_view s = "user:pass@www.example.com:443"; + authority_view a = parse_authority( s ).value(); + std::cout << a << "\n\n" + // host and port + "encoded_host_and_port: " << a.encoded_host_and_port() << "\n" + "encoded_host: " << a.encoded_host() << "\n" + "host: " << a.host() << "\n" + "port: " << a.port() << "\n" + "port number: " << a.port_number() << "\n\n" + // userinfo + "has_userinfo: " << a.has_userinfo() << "\n" + "encoded_userinfo: " << a.encoded_userinfo() << "\n" + "userinfo: " << a.userinfo() << "\n\n" + // user + "encoded_user: " << a.encoded_user() << "\n" + "user: " << a.user() << "\n\n" + // password + "has_password: " << a.has_password() << "\n" + "encoded_password: " << a.encoded_password() << "\n" + "password: " << a.password() << "\n"; + //] + } +} + +void +parsing_path() +{ + { + //[snippet_parsing_path_1 + string_view s = "https://www.boost.org/doc/libs/"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + << "path: " << u.encoded_path() << "\n" + << "encoded segments: " << u.encoded_segments() << "\n" + << "segments: " << u.segments() << "\n"; + //] + + //[snippet_parsing_path_1_b + std::cout << u.encoded_segments().size() << " segments\n"; + for (auto seg: u.encoded_segments()) + { + std::cout << "segment: " << seg << "\n"; + } + //] + } + + { + //[snippet_parsing_path_2 + string_view s = "https://www.boost.org/doc/libs"; + url_view u = parse_uri(s).value(); + std::cout << u.encoded_segments().size() << " segments\n"; + for (auto seg: u.encoded_segments()) + { + std::cout << "segment: " << seg << "\n"; + } + //] + } + + { + //[snippet_parsing_path_3 + string_view s = "https://www.boost.org"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + << "path: " << u.encoded_path() << "\n" + << "encoded segments: " << u.encoded_segments() << "\n" + << "segments: " << u.segments() << "\n"; + //] + } + + { + //[snippet_parsing_path_4 + string_view s = "https://www.boost.org//doc///libs"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "path: " << u.encoded_path() << "\n" + "encoded segments: " << u.encoded_segments() << "\n" + "segments: " << u.segments() << "\n"; + std::cout << u.encoded_segments().size() << " segments\n"; + for (auto seg: u.encoded_segments()) + { + std::cout << "segment: " << seg << "\n"; + } + //] + } + + { + { + //[snippet_parsing_path_5_a + string_view s = "https://www.boost.org"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + << "path: " << u.encoded_host() << "\n" + << "path: " << u.encoded_path() << "\n" + << "segments: " << u.encoded_segments().size() << "\n"; + //] + } + { + //[snippet_parsing_path_5_b + string_view s = "https://www.boost.org/"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + << "host: " << u.encoded_host() << "\n" + << "path: " << u.encoded_path() << "\n" + << "segments: " << u.encoded_segments().size() << "\n"; + //] + } + { + //[snippet_parsing_path_5_c + string_view s = "https://www.boost.org//"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + << "host: " << u.encoded_host() << "\n" + << "path: " << u.encoded_path() << "\n" + << "segments: " << u.encoded_segments().size() << "\n"; + //] + } + } + + { + //[snippet_parsing_path_6 + string_view s = "https://www.boost.org//doc/libs/"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "authority: " << u.encoded_authority() << "\n" + "path: " << u.encoded_path() << "\n"; + std::cout << u.encoded_segments().size() << " segments\n"; + for (auto seg: u.encoded_segments()) + { + std::cout << "segment: " << seg << "\n"; + } + //] + } + + { + //[snippet_parsing_path_7 + string_view s = "https://doc/libs/"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "authority: " << u.encoded_authority() << "\n" + "path: " << u.encoded_path() << "\n"; + std::cout << u.encoded_segments().size() << " segments\n"; + for (auto seg: u.encoded_segments()) + { + std::cout << "segment: " << seg << "\n"; + } + //] + } + + { + //[snippet_parsing_path_8 + string_view s = "https://www.boost.org/doc@folder/libs:boost"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "authority: " << u.encoded_authority() << "\n" + "path: " << u.encoded_path() << "\n"; + std::cout << u.encoded_segments().size() << " segments\n"; + for (auto seg: u.encoded_segments()) + { + std::cout << "segment: " << seg << "\n"; + } + //] + } + + { + //[snippet_parsing_path_9 + string_view s = "/doc/libs"; + segments_encoded_view p = parse_path(s).value(); + std::cout << "path: " << p << "\n"; + std::cout << p.size() << " segments\n"; + for (auto seg: p) + { + std::cout << "segment: " << seg << "\n"; + } + //] + } +} + +void +parsing_query() +{ + { + //[snippet_parsing_query_1 + string_view s = "https://www.example.com/get-customer.php?id=409&name=Joe&individual"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "has query: " << u.has_query() << "\n" + "encoded query: " << u.encoded_query() << "\n" + "query: " << u.query() << "\n"; + std::cout << u.encoded_params().size() << " parameters\n"; + for (auto p: u.encoded_params()) + { + if (p.has_value) + { + std::cout << + "parameter: <" << p.key << + ", " << p.value << ">\n"; + } else { + std::cout << "parameter: " << p.key << "\n"; + } + } + //] + } + { + //[snippet_parsing_query_2 + string_view s = "https://www.example.com/get-customer.php?key-1=value-1&key-2=&key-3&&=value-2"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "has query: " << u.has_query() << "\n" + "encoded query: " << u.encoded_query() << "\n" + "query: " << u.query() << "\n"; + std::cout << u.encoded_params().size() << " parameters\n"; + for (auto p: u.encoded_params()) + { + if (p.has_value) + { + std::cout << + "parameter: <" << p.key << + ", " << p.value << ">\n"; + } else { + std::cout << "parameter: " << p.key << "\n"; + } + } + //] + } + { + //[snippet_parsing_query_3 + string_view s = "https://www.example.com/get-customer.php?email=joe@email.com&code=a:2@/!"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "has query: " << u.has_query() << "\n" + "encoded query: " << u.encoded_query() << "\n" + "query: " << u.query() << "\n"; + std::cout << u.encoded_params().size() << " parameters\n"; + for (auto p: u.encoded_params()) + { + if (p.has_value) + { + std::cout << + "parameter: <" << p.key << + ", " << p.value << ">\n"; + } else { + std::cout << "parameter: " << p.key << "\n"; + } + } + //] + } + { + //[snippet_parsing_query_4 + string_view s = "https://www.example.com/get-customer.php?name=joe"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "encoded query: " << u.encoded_query() << "\n"; + //] + } + { + //[snippet_parsing_query_5 + string_view s = "https://www.example.com/get-customer.php"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "has query: " << u.has_query() << "\n" + "encoded query: " << u.encoded_query() << "\n"; + //] + } + { + //[snippet_parsing_query_6 + string_view s = "https://www.example.com/get-customer.php?name=John%20Doe"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "has query: " << u.has_query() << "\n" + "encoded query: " << u.encoded_query() << "\n" + "query: " << u.query() << "\n"; + //] + } + { + //[snippet_parsing_query_7 + string_view s = "https://www.example.com/get-customer.php?name=John%26Doe"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "has query: " << u.has_query() << "\n" + "encoded query: " << u.encoded_query() << "\n" + "query: " << u.query() << "\n"; + //] + } +} + +void +parsing_fragment() +{ + { + //[snippet_parsing_fragment_1 + string_view s = "https://www.example.com/index.html#section%202"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "has fragment: " << u.has_fragment() << "\n" + "encoded fragment: " << u.encoded_fragment() << "\n" + "fragment: " << u.fragment() << "\n"; + //] + } + { + //[snippet_parsing_fragment_2_a + string_view s = "https://www.example.com/index.html#"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "has fragment: " << u.has_fragment() << "\n" + "encoded fragment: " << u.encoded_fragment() << "\n" + "fragment: " << u.fragment() << "\n"; + //] + } + { + //[snippet_parsing_fragment_2_b + string_view s = "https://www.example.com/index.html"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "has fragment: " << u.has_fragment() << "\n" + "encoded fragment: " << u.encoded_fragment() << "\n" + "fragment: " << u.fragment() << "\n"; + //] + } + { + //[snippet_parsing_fragment_3 + string_view s = "https://www.example.com/index.html#code%20:a@b?c/d"; + url_view u = parse_uri(s).value(); + std::cout << u << "\n" + "has fragment: " << u.has_fragment() << "\n" + "encoded fragment: " << u.encoded_fragment() << "\n" + "fragment: " << u.fragment() << "\n"; + //] + } +} + +void +using_modifying() +{ + { + //[snippet_modifying_1 + string_view s = "https://www.example.com"; + url_view u = parse_uri(s).value(); + url v(u); + //] + + //[snippet_modifying_2 + std::cout << v << "\n" + "scheme: " << v.scheme() << "\n" + "has authority: " << v.has_authority() << "\n" + "authority: " << v.encoded_authority() << "\n" + "path: " << v.encoded_path() << "\n"; + //] + + //[snippet_modifying_3 + v.set_scheme("http"); + std::cout << v << "\n"; + //] + + //[snippet_modifying_4 + try + { + v.set_scheme("http"); + } + catch( std::exception const& ) + { + // handle error + } + //] + + //[snippet_modifying_5 + v.set_host("www.my example.com"); + std::cout << v << "\n"; + //] + + + } +} + +namespace boost { +namespace urls { + +class snippets_test +{ +public: + void + run() + { + using_url_views(); + using_urls(); + parsing_urls(); + parsing_scheme(); + parsing_authority(); + parsing_path(); + parsing_query(); + parsing_fragment(); + using_modifying(); + + BOOST_TEST_PASS(); + } +}; + +TEST_SUITE(snippets_test, "boost.url.snippets"); + +} // urls +} // boost