diff --git a/doc/modules/ROOT/images/AuthorityDiagram.svg b/doc/modules/ROOT/images/AuthorityDiagram.svg new file mode 100644 index 00000000..2c5ab605 --- /dev/null +++ b/doc/modules/ROOT/images/AuthorityDiagram.svg @@ -0,0 +1,154 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + http://user:pass@www.example.com:80 + + + + + + + + + + + + + + + + + + userinfo + + + + + + host + + + + + + port + + + + + + + + \ No newline at end of file diff --git a/doc/modules/ROOT/images/ClassHierarchy.svg b/doc/modules/ROOT/images/ClassHierarchy.svg new file mode 100644 index 00000000..add6300c --- /dev/null +++ b/doc/modules/ROOT/images/ClassHierarchy.svg @@ -0,0 +1,152 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + url_view_base + + + + + + + + url_base + + + + + + + + url + + + + + + + + static_url + + + + + + + + url_view + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/modules/ROOT/images/HelpCard.svg b/doc/modules/ROOT/images/HelpCard.svg new file mode 100644 index 00000000..019a2858 --- /dev/null +++ b/doc/modules/ROOT/images/HelpCard.svgsyntax + + + + + + + :// + + + + + + + url, url_view + + + + + + + + httpsscheme + + + + + + + + + + + + + Boost + + + + + + + Boost.URL Reference Card + + + + + + + + Pathis_path_absoluteencoded_pathencoded_segmentssegmentsset_path_absoluteset_encoded_pathnormalize_path + + + + + + + + Userinfo has_userinfohas_passwordpassworduseruserinfoencoded_passwordencoded_userencoded_userinfoset_passwordset_userset_userinfoset_encoded_passwordset_encoded_userset_encoded_userinforemove_userinforemove_password + + + + + + + + Fragmenthas_fragmentencoded_fragmentfragmentset_fragmentset_encoded_fragmentremove_fragmentnormalize_fragment + + + + + + + + Queryhas_queryparamsqueryencoded_paramsencoded_queryset_queryset_encoded_queryremove_querynormalize_query + + + + + + + + Host and Port encoded_hostencoded_host_addressencoded_host_namehosthost_addresshost_ipv4_addresshost_ipv6_addresshost_ipvfuturehost_namehost_typehas_portportport_numberencoded_host_and_portset_encoded_hostset_encoded_host_addressset_encoded_host_nameset_hostset_host_addressset_host_ipv4set_host_ipv6set_host_ipvfutureset_host_nameset_portremove_port + + + + + + + + Scheme schemescheme_idhas_schemeset_schemeset_scheme_id remove_schemenormalize_scheme + + + + + + + + Authorityauthorityhas_authorityencoded_authorityremove_authorityset_encoded_authority + + + + + + + + Other Partsencoded_originencoded_resourceencoded_targetremove_origin + + + + + + + Other + + + + + + + + Pathsegments_encoded_refsegments_encoded_viewsegments_refsegments_viewparse_pathsegments_basesegments_encoded_base + + + + + + + + URL Parsingparse_absolute_uriparse_authorityparse_origin_formparse_relative_refparse_uriparse_uri_reference + + + + + + + + Percent-Encodingencodeencoded_sizemake_pct_string_viewdecode_viewencoding_optspct_string_view + + + + + + + @ + + + + + + + + user:passuserinfo + + + + + + + ? + + + + + + + + /path/to/file.txtpath + + + + + + + + www.example.com:80host and port + id34 + + + + + + + # + + + + + + + + cn=text&cachedquery + + + + + + + + morefragment + + + + + + + + Queryparams_encoded_viewparams_encoded_refparams_viewparams_refparamparam_pct_viewparam_viewparse_queryparams_baseparams_encoded_base + + + + + + + authority + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/modules/ROOT/images/PartsDiagram.svg b/doc/modules/ROOT/images/PartsDiagram.svg new file mode 100644 index 00000000..02634734 --- /dev/null +++ b/doc/modules/ROOT/images/PartsDiagram.svg @@ -0,0 +1,145 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #” fragment + + + + + + + + ?” query + + + + + + + + path + + + + + + + + scheme “:” + + + + + + + + //” authority + + + + + + + + \ No newline at end of file diff --git a/doc/modules/ROOT/images/repo-logo.png b/doc/modules/ROOT/images/repo-logo.png new file mode 100644 index 00000000..192ec262 Binary files /dev/null and b/doc/modules/ROOT/images/repo-logo.png differ diff --git a/doc/modules/ROOT/nav.adoc b/doc/modules/ROOT/nav.adoc index c29c00e3..f0472b57 100644 --- a/doc/modules/ROOT/nav.adoc +++ b/doc/modules/ROOT/nav.adoc @@ -1,22 +1,29 @@ -* xref:quicklook.adoc[Quick Look] -* xref:urls/index.adoc[URLs] -** xref:urls/parsing.adoc[Parsing] -** xref:urls/containers.adoc[Containers] -** xref:urls/segments.adoc[Segments] -** xref:urls/params.adoc[Params] -** xref:urls/normalization.adoc[Normalization] -** xref:urls/stringtoken.adoc[String Token] -** xref:urls/percent-encoding.adoc[Percent Encoding] -** xref:urls/formatting.adoc[Formatting] -* xref:grammar/index.adoc[Grammar] -** xref:grammar/rules.adoc[Parse Rules] -** xref:grammar/charset.adoc[Character Sets] -** xref:grammar/combinators.adoc[Compound Rules] -** xref:grammar/range.adoc[Ranges] -** xref:grammar/rfc3986.adoc[RFC 3986] -* xref:concepts/index.adoc[Concepts] -** xref:concepts/CharSet.adoc[CharSet] -** xref:concepts/Rule.adoc[Rule] -** xref:concepts/StringToken.adoc[StringToken] -* xref:examples.adoc[Examples] -* xref:HelpCard.adoc[Help Card] \ No newline at end of file +* xref:quicklook.adoc[] +* xref:urls/index.adoc[] +** xref:urls/parsing.adoc[] +** xref:urls/containers.adoc[] +** xref:urls/segments.adoc[] +** xref:urls/params.adoc[] +** xref:urls/normalization.adoc[] +** xref:urls/stringtoken.adoc[] +** xref:urls/percent-encoding.adoc[] +** xref:urls/formatting.adoc[] +* xref:grammar/index.adoc[] +** xref:grammar/rules.adoc[] +** xref:grammar/charset.adoc[] +** xref:grammar/combinators.adoc[] +** xref:grammar/range.adoc[] +** xref:grammar/rfc3986.adoc[] +* Concepts +** xref:concepts/CharSet.adoc[] +** xref:concepts/Rule.adoc[] +** xref:concepts/StringToken.adoc[] +* Examples +** xref:examples/qrcode.adoc[] +** xref:examples/finicky.adoc[] +** xref:examples/mailto.adoc[] +** xref:examples/magnet-link.adoc[] +** xref:examples/file-router.adoc[] +** xref:examples/router.adoc[] +** xref:examples/sanitize.adoc[] +* xref:HelpCard.adoc[] \ No newline at end of file diff --git a/doc/modules/ROOT/pages/HelpCard.adoc b/doc/modules/ROOT/pages/HelpCard.adoc index a4924747..a5792746 100644 --- a/doc/modules/ROOT/pages/HelpCard.adoc +++ b/doc/modules/ROOT/pages/HelpCard.adoc @@ -8,8 +8,9 @@ // -// [section:helpcard Help Card] +[#helpcard] += Help Card -// [$url/images/HelpCard.svg] +image:HelpCard.svg[] diff --git a/doc/modules/ROOT/pages/clipboard.adoc b/doc/modules/ROOT/pages/clipboard.adoc index b44de221..df33b2b0 100644 --- a/doc/modules/ROOT/pages/clipboard.adoc +++ b/doc/modules/ROOT/pages/clipboard.adoc @@ -16,125 +16,211 @@ These member functions are available for interacting with the scheme component of URL containers: -// [table Scheme Members [ -// [Name] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_view_base.has_scheme `has_scheme`]] -// [ -// Return `true` if a scheme is present. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.scheme `scheme`]] -// [ -// Return the scheme as a string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.scheme_id `scheme_id`]] -// [ -// Return the scheme as a -// [link url.ref.boost__urls__scheme known scheme] -// constant, -// [link url.ref.boost__urls__scheme `scheme::unknown`] -// if the scheme is not well-known, or -// [link url.ref.boost__urls__scheme `scheme::none`] -// if no scheme is present. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.remove_scheme `remove_scheme`]] -// [ -// Remove the scheme if present. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_scheme `set_scheme`]] -// [ -// Set the scheme to a given string or -// [link url.ref.boost__urls__scheme known scheme] -// constant. -// ] -// ]] +[cols="a,a"] +|=== +// Headers +|Name|Description + +// Row 1, Column 1 +|`has_scheme` +// Row 1, Column 2 +| +// Row 1, Column 3 +|Return `true` if a scheme is present. +// Row 1, Column 4 +| + +// Row 2, Column 1 +|`scheme` +// Row 2, Column 2 +| +// Row 2, Column 3 +|Return the scheme as a string. +// Row 2, Column 4 +| + +// Row 3, Column 1 +|`scheme_id` +// Row 3, Column 2 +| +// Row 3, Column 3 +|Return the scheme as a + known scheme + constant, + `scheme::unknown` + if the scheme is not well-known, or + `scheme::none` + if no scheme is present. +// Row 3, Column 4 +| + +// Row 4, Column 1 +|`remove_scheme` +// Row 4, Column 2 +| +// Row 4, Column 3 +|Remove the scheme if present. +// Row 4, Column 4 +| + +// Row 5, Column 1 +|`set_scheme` +// Row 5, Column 2 +| +// Row 5, Column 3 +|Set the scheme to a given string or + known scheme + constant. +// Row 5, Column 4 +| + +|=== -// [table Userinfo Members [ -// [Name] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_view_base.has_userinfo `has_userinfo`]] -// [ -// Return `true` if a userinfo is present. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_userinfo `encoded_userinfo`]] -// [ -// Return the userinfo field as a percent-encoded string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.userinfo `userinfo`]] -// [ -// Return the userinfo field with percent-decoding applied. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_user `encoded_user`]] -// [ -// Return the user field as a percent-encoded string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.user `user`]] -// [ -// Return the user field with percent-decoding applied. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.has_password `has_password`]] -// [ -// Return `true` if a password is present. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_password `encoded_password`]] -// [ -// Return the password as a percent-encoded string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.password `password`]] -// [ -// Return the password with percent-decoding applied. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_user `set_user`]] -// [ -// Set the user field using a plain string. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.remove_password `remove_password`]] -// [ -// Remove the password field if present. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_encoded_password `set_encoded_password`]] -// [ -// Set the password field using a percent-encoded string. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_password `set_password`]] -// [ -// Set the password field using a plain string. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.remove_userinfo `remove_userinfo`]] -// [ -// Remove the entire userinfo if present. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_encoded_userinfo `set_encoded_userinfo`]] -// [ -// Set the entire userinfo using a percent-encoded string. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_userinfo `set_userinfo`]] -// [ -// Set the entire userinfo using a plain string. -// ] -// ]] + +[cols="a,a"] +|=== +// Headers +|Name|Description + +// Row 1, Column 1 +|`has_userinfo` +// Row 1, Column 2 +| +// Row 1, Column 3 +|Return `true` if a userinfo is present. +// Row 1, Column 4 +| + +// Row 2, Column 1 +|`encoded_userinfo` +// Row 2, Column 2 +| +// Row 2, Column 3 +|Return the userinfo field as a percent-encoded string. +// Row 2, Column 4 +| + +// Row 3, Column 1 +|`userinfo` +// Row 3, Column 2 +| +// Row 3, Column 3 +|Return the userinfo field with percent-decoding applied. +// Row 3, Column 4 +| + +// Row 4, Column 1 +|`encoded_user` +// Row 4, Column 2 +| +// Row 4, Column 3 +|Return the user field as a percent-encoded string. +// Row 4, Column 4 +| + +// Row 5, Column 1 +|`user` +// Row 5, Column 2 +| +// Row 5, Column 3 +|Return the user field with percent-decoding applied. +// Row 5, Column 4 +| + +// Row 6, Column 1 +|`has_password` +// Row 6, Column 2 +| +// Row 6, Column 3 +|Return `true` if a password is present. +// Row 6, Column 4 +| + +// Row 7, Column 1 +|`encoded_password` +// Row 7, Column 2 +| +// Row 7, Column 3 +|Return the password as a percent-encoded string. +// Row 7, Column 4 +| + +// Row 8, Column 1 +|`password` +// Row 8, Column 2 +| +// Row 8, Column 3 +|Return the password with percent-decoding applied. +// Row 8, Column 4 +| + +// Row 9, Column 1 +|`set_user` +// Row 9, Column 2 +| +// Row 9, Column 3 +|Set the user field using a plain string. +// Row 9, Column 4 +| + +// Row 10, Column 1 +|`remove_password` +// Row 10, Column 2 +| +// Row 10, Column 3 +|Remove the password field if present. +// Row 10, Column 4 +| + +// Row 11, Column 1 +|`set_encoded_password` +// Row 11, Column 2 +| +// Row 11, Column 3 +|Set the password field using a percent-encoded string. +// Row 11, Column 4 +| + +// Row 12, Column 1 +|`set_password` +// Row 12, Column 2 +| +// Row 12, Column 3 +|Set the password field using a plain string. +// Row 12, Column 4 +| + +// Row 13, Column 1 +|`remove_userinfo` +// Row 13, Column 2 +| +// Row 13, Column 3 +|Remove the entire userinfo if present. +// Row 13, Column 4 +| + +// Row 14, Column 1 +|`set_encoded_userinfo` +// Row 14, Column 2 +| +// Row 14, Column 3 +|Set the entire userinfo using a percent-encoded string. +// Row 14, Column 4 +| + +// Row 15, Column 1 +|`set_userinfo` +// Row 15, Column 2 +| +// Row 15, Column 3 +|Set the entire userinfo using a plain string. +// Row 15, Column 4 +| + +|=== + diff --git a/doc/modules/ROOT/pages/concepts/CharSet.adoc b/doc/modules/ROOT/pages/concepts/CharSet.adoc index 03a14fc2..404b0c43 100644 --- a/doc/modules/ROOT/pages/concepts/CharSet.adoc +++ b/doc/modules/ROOT/pages/concepts/CharSet.adoc @@ -8,19 +8,17 @@ // -// [section:charset CharSet] +[#charset] += CharSet -A __CharSet__ is a unary predicate which is invocable with -this equivalent signature: +A __CharSet__ is a unary predicate which is invocable with this equivalent signature: [source,cpp] ---- bool( char ch ) const noexcept; ---- - -The predicate returns `true` if `ch` is a member of the -set, or `false` otherwise. +The predicate returns `true` if `ch` is a member of the set, or `false` otherwise. == Related Identifiers @@ -37,74 +35,90 @@ In this table: * `c` is a value of type `char` * `first`, `last` are values of type `char const*` -// [table Valid expressions -// [[Expression] [Type] [Semantics, Pre/Post-conditions]] -// [ -// [`t(c)`] -// [`bool`] -// [ -// This function returns `true` if `c` is a member of -// the character set, otherwise it returns `false`. -// ] -// ][ -// [ -// ``` -// t.find_if(first,last) -// ``` -// ] -// [`char const*`] -// [ -// This optional member function examines the valid -// range of characters in `[first, last)` and returns -// a pointer to the first occurrence of a character -// which is in the set, or returns `last` if no such -// character. -// -// The implementation of -// [link url.ref.boost__urls__grammar__find_if `find_if`] -// calls this function if provided by the character -// set, allowing optimized or otherwise performant -// implementations to be developed. If this member -// function is not provided, a default implementation -// is used which calls `operator()`. -// ] -// ][ -// [ -// ``` -// t.find_if_not(first,last) -// ``` -// ] -// [`char const*`] -// [ -// This optional member function examines the valid -// range of characters in `[first, last)` and returns -// a pointer to the first occurrence of a character -// which is not in the set, or returns `last` if no -// such character. -// -// The implementation of -// [link url.ref.boost__urls__grammar__find_if_not `find_if_not`] -// calls this function if provided by the character -// set, allowing optimized or otherwise performant -// implementations to be developed. If this member -// function is not provided, a default implementation -// is used which calls `operator()`. -// ] -// ]] -// -// [heading Exemplar] -// -// For best results, it is suggested that all constructors and -// member functions for character sets be marked `constexpr`. -// -// [code_charset_1] -// -// [heading Models] -// -// * [link url.ref.boost__urls__grammar__alnum_chars `alnum_chars`] -// * [link url.ref.boost__urls__grammar__alpha_chars `alpha_chars`] -// * [link url.ref.boost__urls__grammar__digit_chars `digit_chars`] -// * [link url.ref.boost__urls__grammar__hexdig_chars `hexdig_chars`] -// * [link url.ref.boost__urls__grammar__lut_chars `lut_chars`] -// -// [endsect] \ No newline at end of file +[cols="a,a,a"] +|=== +// Headers +|Expression|Type|Semantics, Pre/Post-conditions + +// Row 1, Column 1 +|`t(c)` +// Row 1, Column 2 +|`bool` +// Row 1, Column 3 +|This function returns `true` if `c` is a member of +the character set, otherwise it returns `false`. + +// Row 2, Column 1 +| +[source,cpp] +---- +t.find_if(first,last) +---- + +// Row 2, Column 2 +|`char const*` +// Row 2, Column 3 +|This optional member function examines the valid range of characters in `// [first, last)` and returns +a pointer to the first occurrence of a character +which is in the set, or returns `last` if no such +character. + +The implementation of `grammar::find_if` +calls this function if provided by the character +set, allowing optimized or otherwise performant +implementations to be developed. If this member +function is not provided, a default implementation +is used which calls `operator()`. + +// Row 3, Column 1 +| +[source,cpp] +---- +t.find_if_not(first,last) +---- +// Row 3, Column 2 +|`char const*` +// Row 3, Column 3 +|This optional member function examines the valid +range of characters in `[first, last)` and returns +a pointer to the first occurrence of a character +which is not in the set, or returns `last` if no +such character. + +The implementation of `grammar::find_if_not` +calls this function if provided by the character +set, allowing optimized or otherwise performant +implementations to be developed. If this member +function is not provided, a default implementation +is used which calls `operator()`. +|=== + +== Exemplar + +For best results, it is suggested that all constructors and +member functions for character sets be marked `constexpr`. + +// code_charset_1 +[source,cpp] +---- +struct CharSet +{ + bool operator()( char c ) const noexcept; + + // These are both optional. If either or both are left + // unspecified, a default implementation will be used. + // + char const* find_if( char const* first, char const* last ) const noexcept; + char const* find_if_not( char const* first, char const* last ) const noexcept; +}; +---- + +== Models + +* `grammar::alnum_chars` +* `grammar::alpha_chars` +* `grammar::digit_chars` +* `grammar::hexdig_chars` +* `grammar::lut_chars` + + diff --git a/doc/modules/ROOT/pages/concepts/Rule.adoc b/doc/modules/ROOT/pages/concepts/Rule.adoc index 701a29dc..aa49342b 100644 --- a/doc/modules/ROOT/pages/concepts/Rule.adoc +++ b/doc/modules/ROOT/pages/concepts/Rule.adoc @@ -8,7 +8,7 @@ // -== Rule += Rule A __Rule__ defines an algorithm used to match an input buffer of ASCII characters against a set of syntactical specifications. @@ -18,11 +18,11 @@ rules for productions typically found in RFC documents. Rules are not invoked directly; instead, rule variables are used with overloads of `parse` which provide a convenient, uniform front end. -=== Related Identifiers +== Related Identifiers `is_rule`, `parse`. -=== Requirements +== Requirements In this table: @@ -31,79 +31,84 @@ In this table: * `it` is an __lvalue__ with type `char const*` * `end` is a value of type `char const*` -// [table Valid expressions -// [[Expression] [Type] [Semantics, Pre/Post-conditions]] -// [ -// [ -// ``` -// T(t) -// ``` -// ] -// [] -// [ -// Copy construction of `T` throws nothing. -// -// `std::is_nothrow_copy_constructible::value == true` -// ] -// ][ -// [ -// ``` -// T::value_type -// ``` -// ] -// [] -// [ -// Values of this type are returned by the rule when the -// parse operation is successful -// ] -// ][ -// [ -// ``` -// t.parse(it,end) -// ``` -// ] -// [`result`] -// [ -// Attempt to parse the buffer of characters defined by -// the range `[it,end)`. Upon success, the return result -// holds an instance of the rule's value type, and -// the reference parameter `it` is modified to point -// to the first unconsumed character. Otherwise, upon -// failure the result holds an error. In this case -// the implementation defines if and how the reference -// parameter `it` is modified. -// ] -// ]] -// -// [heading Exemplar] -// -// For best results, it is suggested that all constructors for -// rules be marked `constexpr`. -// -// ``` -// struct Rule -// { -// struct value_type; -// -// constexpr Rule( Rule const& ) noexcept = default; -// -// auto parse( char const*& it, char const* end ) const -> result< value_type >; -// }; -// -// // Declare a variable of type Rule for notational convenience -// constexpr Rule rule{}; -// ``` -// -// [heading Models] -// -// * __dec_octet_rule__ -// * __delim_rule__ -// * __not_empty_rule__ -// * __optional_rule__ -// * __range_rule__ -// * __token_rule__ -// * __tuple_rule__ -// * __unsigned_rule__ -// * __variant_rule__ -// -// [endsect] \ No newline at end of file +[cols="a,a,a"] +|=== +// Headers +|Expression|Type|Semantics, Pre/Post-conditions + +// Row 1, Column 1 +|[source,cpp] +---- +T(t) +---- + +// Row 1, Column 2 +| - +// Row 1, Column 3 +|Copy construction of `T` throws nothing. + +`std::is_nothrow_copy_constructible::value == true` + +// Row 2, Column 1 +|[source,cpp] +---- +T::value_type +---- + +// Row 2, Column 2 +| - +// Row 2, Column 3 +|Values of this type are returned by the rule when the + parse operation is successful + +// Row 3, Column 1 +|[source,cpp] +---- +t.parse(it,end) +---- + +// Row 3, Column 2 +|`result` +// Row 3, Column 3 +|Attempt to parse the buffer of characters defined by +the range `// [it,end)`. Upon success, the return result +holds an instance of the rule's value type, and +the reference parameter `it` is modified to point +to the first unconsumed character. Otherwise, upon +failure the result holds an error. In this case +the implementation defines if and how the reference +parameter `it` is modified. + +|=== + +== Exemplar + +For best results, it is suggested that all constructors for +rules be marked `constexpr`. + +[source,cpp] +---- +struct Rule +{ + struct value_type; + + constexpr Rule( Rule const& ) noexcept = default; + + auto parse( char const*& it, char const* end ) const -> result< value_type >; +}; + +// Declare a variable of type Rule for notational convenience +constexpr Rule rule{}; +---- + +== Model + +* `grammar::dec_octet_rule` +* `grammar::delim_rule` +* `grammar::not_empty_rule` +* `grammar::optional_rule` +* `grammar::range_rule` +* `grammar::token_rule` +* `grammar::tuple_rule` +* `grammar::unsigned_rule` +* `grammar::variant_rule` diff --git a/doc/modules/ROOT/pages/concepts/StringToken.adoc b/doc/modules/ROOT/pages/concepts/StringToken.adoc index 027d3a93..0673b63f 100644 --- a/doc/modules/ROOT/pages/concepts/StringToken.adoc +++ b/doc/modules/ROOT/pages/concepts/StringToken.adoc @@ -8,7 +8,7 @@ // -== StringToken += StringToken A string token is an rvalue passed to a function template which customizes the return type of the function and also controls how @@ -18,7 +18,7 @@ function call in which it appears as a parameter. A string token cannot be copied, moved, or assigned, and must be destroyed when the function returns or throws. -=== Requirements +== Requirements In this table: @@ -26,73 +26,81 @@ In this table: * `t` is an rvalue reference of type T * `n` is a value of type `std::size_t` -// [table Valid expressions -// [[Expression] [Result] [Semantics, Pre/Post-conditions]] -// [ -// [ -// ``` -// std::derived_from -// ``` -// ] -// [ -// ``` -// true -// ``` -// ] -// [ -// All string tokens must be publicly and -// unambiguously derived from -// [link url.ref.boost__urls__string_token__arg `string_token::arg`]. -// ] -// ][ -// [ -// ``` -// T::result_type -// ``` -// ] -// [] -// [ -// This type determines the return type of functions -// which accept a string token. -// ] -// ][ -// [ -// ``` -// t.prepare(n); -// ``` -// ] -// [ -// ``` -// char* -// ``` -// ] -// [ -// This function overrides the virtual function in the base. -// It must return a pointer to a character buffer of at least -// size `n`, otherwise throw an exception. -// ] -// ][ -// [ -// ``` -// t.result(); -// ``` -// ] -// [ -// ``` -// T::result_type -// ``` -// ] -// [ -// This function is invoked by the algorithm to receive the result -// from the string token. -// It is only invoked if `prepare` returned successfuly and the -// string token was not destroyed. -// ] -// ]] +[cols="a,a,a"] +|=== +// Headers +|Expression|Result|Semantics, Pre/Post-conditions + +// Row 1, Column 1 +|[source,cpp] +---- +std::derived_from +---- + +// Row 1, Column 2 +|[source,cpp] +---- +true +---- + +// Row 1, Column 3 +|All string tokens must be publicly and +unambiguously derived from +`string_token::arg`. + +// Row 2, Column 1 +|[source,cpp] +---- +T::result_type +---- + +// Row 2, Column 2 +| +// Row 2, Column 3 +|This type determines the return type of functions +which accept a string token. + +// Row 3, Column 1 +|[source,cpp] +---- +t.prepare(n); +---- + +// Row 3, Column 2 +|[source,cpp] +---- +char* +---- + +// Row 3, Column 3 +|This function overrides the virtual function in the base. +It must return a pointer to a character buffer of at least +size `n`, otherwise throw an exception. + +// Row 4, Column 1 +|[source,cpp] +---- +t.result(); +---- + +// Row 4, Column 3 +|[source,cpp] +---- +T::result_type +---- + +// Row 4, Column 5 +|This function is invoked by the algorithm to receive the result +from the string token. +It is only invoked if `prepare` returned successfuly and the +string token was not destroyed. + +|=== -=== Algorithm Requirements + +== Algorithm Requirements When an algorithm accepts a string token, it must meet these requirements: @@ -105,7 +113,7 @@ String tokens cannot be reused. -=== Exemplars +== Exemplars String token prototype: @@ -166,9 +174,9 @@ algorithm( StringToken&& token = {} ) -> Models -* `append_to` -* `assign_to` -* `preserve_size` -* `append_to` +* `string_token::return_string` +* `string_token::assign_to` +* `string_token::preserve_size` +* `string_token::return_string` diff --git a/doc/modules/ROOT/pages/concepts/index.adoc b/doc/modules/ROOT/pages/concepts/index.adoc index 7fb3031e..fbfe8b67 100644 --- a/doc/modules/ROOT/pages/concepts/index.adoc +++ b/doc/modules/ROOT/pages/concepts/index.adoc @@ -8,12 +8,12 @@ // -== Concepts += Concepts This section describes all of the concepts defined by the library. -// [include 5.1.CharSet.qbk] -// [include 5.2.Rule.qbk] -// [include 5.3.StringToken.qbk] + + + diff --git a/doc/modules/ROOT/pages/examples.adoc b/doc/modules/ROOT/pages/examples.adoc deleted file mode 100644 index f91d0b41..00000000 --- a/doc/modules/ROOT/pages/examples.adoc +++ /dev/null @@ -1,144 +0,0 @@ -// -// Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt) -// -// Official repository: https://github.com/boostorg/url -// - - - - -== Examples - -=== QR Code - -A QR code is a machine-readable two-dimensional barcode. They might contain data -for a identifier or a URL to a website. - -This example shows how to construct and modify URLs to consume a third party API to -generate QR Codes. - -[source,cpp] ----- -// example_qrcode ----- - - - -=== Finicky - -This example shows how to classify URLs according to a set of rules. It is -inspired by https://github.com/johnste/finicky[Finicky,window=blank_] application. - -The URLs are classified and redirected to a browser according to their -category. See the example `config.json` file. - -[source,cpp] ----- -// example_finicky ----- - - - -=== mailto URLs - -`mailto` is a URL scheme for email addresses. `mailto` URL are used on websites -to allow users to send an email to a specific address directly from an HTML document. - -This example parses a mailto URL into a new view type and prints its components to -standard output. - -[source,cpp] ----- -// example_mailto ----- - - - -=== Magnet Link - -`magnet` is a URL scheme for identifying files by their content. These files are -usually identified by cryptographic hash value. - -Magnet links are useful in peer-to-peer file sharing networks because they allow -resources to be referred to without the need for a continuously available host.. - -This example parses a magnet link into a new view type and prints its components to -standard output. - -[source,cpp] ----- -// example_magnet ----- - - - -=== File Router - -This example defines a router that associates URL paths to a directory in the filesystem. If -the specified route matches and the file exists, the example prints its contents to standard output. - -[source,cpp] ----- -// example_file_router ----- - - - -=== Router - -This example defines a router for URL paths. If the specified route matches one of the existing -routes, the example executes the underlying callback function. - -[source,cpp] ----- -// example_router ----- - - - -=== Sanitizing URLs - -This example parses a non-strict or invalid URL -into path components according to its delimiters. -This pattern can be adapted to the requirements of other -applications. - -Once the non-strict components are determined, a new URL is -created and its parts are set with the `set_encoded_X` -functions, which will encode any invalid chars accordingly. - -This sort of transformation is useful in applications that are -extremely loose in what kinds of URLs they accept, such as -browsers. The sanitized URL can later be used for machine-to-machine -communication. - -Using non-strict URLs directly is a security concern in -machine-to-machine communication, is ambiguous, and also -involve an extra cost for the transformations. - -Different transformations are required by different applications to -construct a valid URL appropriate for machine-to-machine communication. -For instance, if an invalid relative reference includes something that -looks like a host in the first path segment, browsers usually interpret -that as the host with an implicit "https" scheme. Other applications -also have other implicit schemes. - -The example also identifies whether the input url is already valid. -It includes diagnostics that can be used to help the user determine -if a URL is invalid and why it's invalid. - -Once all transformations are applied, the result is a URL -appropriate for machine-to-machine communication. - -[source,cpp] ----- -// example_sanitize_url ----- - - - - - diff --git a/doc/modules/ROOT/pages/examples/file-router.adoc b/doc/modules/ROOT/pages/examples/file-router.adoc new file mode 100644 index 00000000..d76075f9 --- /dev/null +++ b/doc/modules/ROOT/pages/examples/file-router.adoc @@ -0,0 +1,221 @@ +// +// Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/url +// + + + + += File Router + +This example defines a router that associates URL paths to a directory in the filesystem. If +the specified route matches and the file exists, the example prints its contents to standard output. + +// example_file_router +[source,cpp] +---- + +/* + This example defines a route for a URL path. + If the specified route matches and the file + exists, the example prints its contents to + standard output. +*/ + + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace urls = boost::urls; +namespace fs = boost::filesystem; +namespace core = boost::core; +using string_view = boost::core::string_view; + +/** Check if a target matches a prefix + + This function checks if the first segments + of the target match the corresponding prefix + segments. + + @param target Target segments + @param prefix Prefix segments + @return True if target matches prefix + */ +bool match_prefix( + urls::segments_view target, + urls::segments_view prefix) +{ + // Trivially reject target that cannot + // contain the prefix + if (target.size() < prefix.size()) + return false; + + // Match the prefix segments + auto it0 = target.begin(); + auto end0 = target.end(); + auto it1 = prefix.begin(); + auto end1 = prefix.end(); + while ( + it0 != end0 && + it1 != end1 && + *it0 == *it1) + { + ++it0; + ++it1; + } + return it1 == end1; +} + +/** A static route representing files in a directory + + A route is a URL logical prefix representing + static files in the specified root directory. + + The `match` function returns the corresponding + file for a given URL path. + */ +class route +{ +public: + /// Constructor + route(core::string_view prefix, fs::path root) + : prefix_(urls::parse_uri_reference(prefix).value()) + , root_(std::move(root)) + {} + + /// Constructor + route(urls::url prefix, fs::path root) + : prefix_(std::move(prefix)) + , root_(std::move(root)) + {} + + /** Match target URL path with a file + + This function attempts to match the target + URL path with the route prefix. + + If the prefix matches, the target is + considered to represent a file in the root + directory. When that happens, the target + prefix is consumed and other segments are + appended to the root path. + + The complete file path represented by the + target is returned as the output parameter + `result`. + + @param target Target URL path + @param result An out-parameter holding the resulting path + @return `true` if target matches the directory + */ + bool match( + urls::url_view target, + fs::path& result) + { + if (match_prefix( + target.segments(), + static_cast(prefix_).segments())) + { + result = root_; + auto segs = target.segments(); + auto it = segs.begin(); + auto end = segs.end(); + std::advance(it, prefix_.segments().size()); + while (it != end) + { + auto seg = *it; + result.append(seg.begin(), seg.end()); + ++it; + } + return true; + } + return false; + } + +private: + urls::url prefix_; + fs::path root_; +}; + +int +main(int argc, char **argv) +{ + namespace urls = boost::urls; + namespace fs = boost::filesystem; + + // Check command line arguments. + if (argc != 4) + { + fs::path exec = argv[0]; + exec = exec.filename(); + std::cerr + << "Usage: " << exec + << " \n" + "target: path to make a request\n" + "prefix: url prefix\n" + "doc_root: dir to look for files\n"; + return EXIT_FAILURE; + } + + try { + urls::url target = + urls::parse_uri_reference(argv[1]).value(); + target.normalize_path(); + + std::string prefix = argv[2]; + fs::path root = argv[2]; + + if (!fs::is_directory(root)) + { + std::cerr + << "Error: " << root + << " is not a directory\n"; + return EXIT_FAILURE; + } + + // Create route + route r(prefix, root); + + // Check if target matches a file + fs::path result; + if (r.match(target, result)) + { + fs::ifstream f(result); + std::string l; + while (std::getline(f, l)) + std::cout << l << '\n'; + f.close(); + } + else + { + std::cout + << "No " << target << " in prefix " + << prefix << std::endl; + } + return EXIT_SUCCESS; + } + catch (std::exception &e) + { + std::cerr << e.what() << "\n"; + return EXIT_FAILURE; + } +} +---- + + + diff --git a/doc/modules/ROOT/pages/examples/finicky.adoc b/doc/modules/ROOT/pages/examples/finicky.adoc new file mode 100644 index 00000000..cbd215d5 --- /dev/null +++ b/doc/modules/ROOT/pages/examples/finicky.adoc @@ -0,0 +1,413 @@ +// +// Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/url +// + + + + += Finicky + +This example shows how to classify URLs according to a set of rules. It is +inspired by https://github.com/johnste/finicky[Finicky,window=blank_] application. + +The URLs are classified and redirected to a browser according to their +category. See the example `config.json` file. + +// example_finicky +[source,cpp] +---- + +/* + This example shows how to classify URLs + according to a set of rules. This example is + inspired by Finicky. The URLs are classified + and redirected to a browser according to their + category. See the example config.json file. + https://github.com/johnste/finicky +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace urls = boost::urls; +namespace json = boost::json; +namespace core = boost::core; + +json::value +read_json( std::istream& is, json::error_code& ec ) +{ + json::parse_options opt; + opt.allow_comments = true; + json::stream_parser p(json::storage_ptr(), opt); + std::string line; + while( std::getline( is, line ) ) + { + p.write( line, ec ); + if( ec ) + return nullptr; + } + p.finish( ec ); + if( ec ) + return nullptr; + return p.release(); +} + +bool +glob_match( + core::string_view pattern, + core::string_view str) +{ + // regex + if (str.starts_with("/") && + str.ends_with("/")) + { + const boost::regex pr(pattern.begin() + 1, pattern.end() - 1); + return boost::regex_match(std::string(str), pr); + } + + // literal + if (!pattern.contains('*')) + { + return pattern == str; + } + + // glob + std::string p = pattern; + std::size_t i = p.find('*'); + while (i != std::string::npos) + { + auto e = std::min(p.find_first_not_of('*', i), p.size()); + std::size_t n = e - i; + if (n == 1) + { + p.replace(i, e, "[^/]*"); + i += 5; + } + else + { + p.replace(i, e, ".*"); + i += 2; + } + i = p.find('*', i); + } + const boost::regex pr(p); + return boost::regex_match(std::string(str), pr); +} + +bool +url_match( + json::value& mv, + urls::url const& u) +{ + if (mv.is_string()) + { + json::string& p = mv.as_string(); + return glob_match(u.buffer(), p); + } + else if (mv.is_array()) + { + json::array& m = mv.as_array(); + for (auto& mi: m) + { + if (!mi.is_string()) + throw std::invalid_argument( + "handle match is not a string"); + if (glob_match(mi.as_string(), u.buffer())) + return true; + } + } + else if (mv.is_object()) + { + json::object& m = mv.as_object(); + std::pair + field_values[] = { + {"protocol", u.scheme()}, + {"authority", u.encoded_authority()}, + {"username", u.encoded_user()}, + {"user", u.encoded_user()}, + {"password", u.encoded_password()}, + {"userinfo", u.encoded_userinfo()}, + {"host", u.encoded_host()}, + {"port", u.port()}, + {"path", u.encoded_path()}, + {"pathname", u.encoded_path()}, + {"query", u.encoded_query()}, + {"search", u.encoded_query()}, + {"fragment", u.encoded_fragment()}, + {"hash", u.encoded_fragment()}, + }; + for (auto& p: field_values) + { + auto it = m.find(p.first); + if (it != m.end()) + { + if (!it->value().is_string()) + throw std::invalid_argument( + "match fields should be a strings"); + if (glob_match(p.second, p.first)) + return true; + } + } + } + return false; +} + +#define CHECK(c, msg) \ + if (!(c)) \ + { \ + std::cerr << msg << "\n"; \ + return EXIT_FAILURE; \ + } + +int main(int argc, char** argv) +{ + if (argc < 3) { + std::cout << argv[0] << "\n"; + std::cout << "Usage: finicky \n" + "options:\n" + " : Configuration file\n" + " : The url to open\n" + "examples:\n" + " finicky config.json \"http://www.example.com\"\n"; + return EXIT_FAILURE; + } + + // Parse url + boost::system::result ru = urls::parse_uri(argv[2]); + CHECK(ru, "Invalid URL"); + urls::url u = *ru; + + // Open config file + std::fstream fin(argv[1]); + CHECK(fin.good(), "Cannot open configuration file"); + json::error_code ec; + json::value c = read_json(fin, ec); + CHECK(!ec.failed(), "Cannot parse configuration file"); + CHECK(c.is_object(), "Configuration file is not an object"); + json::object& o = c.as_object(); + + // Set initial browser + auto bit = o.find("defaultBrowser"); + CHECK( + bit != o.end(), + "Configuration file has no defaultBrowser"); + CHECK( + bit->value().is_string(), + "defaultBrowser should be a string"); + json::string& browser = bit->value().as_string(); + + // Apply rewrites to the input string + auto rsit = o.find("rewrite"); + if (rsit != o.end()) + { + CHECK( + rsit->value().is_array(), + "rewrite rules should be an array"); + auto& rs = rsit->value().as_array(); + for (auto& rv: rs) + { + CHECK( + rv.is_object(), + "individual rewrite rule should be an object"); + json::object& r = rv.as_object(); + + // Look for match + auto mit = r.find("match"); + CHECK( + mit != r.end(), + "rewrite rule should have a match field"); + CHECK( + mit->value().is_object() || mit->value().is_string(), + "rewrite match field is not an object"); + if (!url_match(mit->value(), u)) + continue; + + // Apply replacement rule + auto uit = r.find("url"); + CHECK( + uit != r.end(), + "rewrite rule should have a url field"); + CHECK( + uit->value().is_object() || + uit->value().is_string(), + "url field must be an object or string"); + + if (uit->value().is_string()) + { + json::string& uo = uit->value().as_string(); + auto ru1 = urls::parse_uri(uo); + CHECK(ru1, "url " << uo.c_str() << " is invalid"); + u = *ru; + } + else + { + json::object& uo = uit->value().as_object(); + auto it = uo.find("protocol"); + if (it != uo.end()) + { + CHECK( + it->value().is_string(), + "protocol field should be a string"); + u.set_scheme(it->value().as_string()); + } + + it = uo.find("authority"); + if (it != uo.end()) + { + CHECK( + it->value().is_string(), + "authority field should be a string"); + u.set_encoded_authority( + it->value().as_string().subview()); + } + + it = uo.find("username"); + if (it == uo.end()) + it = uo.find("user"); + if (it != uo.end()) + { + CHECK( + it->value().is_string(), + "username field should be a string"); + u.set_encoded_user( + it->value().as_string().subview()); + } + + it = uo.find("password"); + if (it != uo.end()) + { + CHECK( + it->value().is_string(), + "password field should be a string"); + u.set_encoded_password( + it->value().as_string().subview()); + } + + it = uo.find("userinfo"); + if (it != uo.end()) + { + CHECK( + it->value().is_string(), + "userinfo field should be a string"); + u.set_encoded_userinfo( + it->value().as_string().subview()); + } + + it = uo.find("host"); + if (it != uo.end()) + { + CHECK( + it->value().is_string(), + "host field should be a string"); + u.set_encoded_host( + it->value().as_string().subview()); + } + + it = uo.find("port"); + if (it != uo.end()) + { + CHECK( + it->value().is_string(), + "port field should be a string"); + u.set_port( + it->value().as_string().subview()); + } + + it = uo.find("path"); + if (it == uo.end()) + it = uo.find("pathname"); + if (it != uo.end()) + { + CHECK( + it->value().is_string(), + "path field should be a string"); + u.set_encoded_path( + it->value().as_string().subview()); + } + + it = uo.find("query"); + if (it == uo.end()) + it = uo.find("search"); + if (it != uo.end()) + { + CHECK( + it->value().is_string(), + "query field should be a string"); + u.set_encoded_query( + it->value().as_string().subview()); + } + + it = uo.find("fragment"); + if (it == uo.end()) + it = uo.find("hash"); + if (it != uo.end()) + { + CHECK( + it->value().is_string(), + "fragment field should be a string"); + u.set_encoded_fragment( + it->value().as_string().subview()); + } + } + } + } + + // Determine which browser should handle the url + auto hsit = o.find("handlers"); + if (hsit != o.end()) + { + CHECK( + hsit->value().is_array(), + "handler rules should be an array"); + auto& hs = hsit->value().as_array(); + for (auto& hv: hs) + { + CHECK( + hv.is_object(), + "individual handlers should be an object"); + json::object& h = hv.as_object(); + + auto mit = h.find("match"); + CHECK( + mit != h.end(), + "handle rule should have a match field"); + CHECK( + mit->value().is_string() || mit->value().is_array(), + "handle match field must be an array or a string"); + + auto hbit = h.find("browser"); + CHECK( + hbit != h.end(), + "handle rule should have a browser field"); + CHECK( + hbit->value().is_string(), + "browser field is not a string"); + + // Look for match and change browser + if (url_match(mit->value(), u)) + { + browser = hbit->value().as_string().subview(); + break; + } + } + } + + // Print command finicky would run + std::cout << "\"" << browser.c_str() << "\" " << u << '\n'; + + return EXIT_SUCCESS; +} +---- + + diff --git a/doc/modules/ROOT/pages/examples/index.adoc b/doc/modules/ROOT/pages/examples/index.adoc new file mode 100644 index 00000000..22446787 --- /dev/null +++ b/doc/modules/ROOT/pages/examples/index.adoc @@ -0,0 +1,24 @@ +// +// Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/url +// + + + + += Examples + + + + + + + + + + + diff --git a/doc/modules/ROOT/pages/examples/magnet-link.adoc b/doc/modules/ROOT/pages/examples/magnet-link.adoc new file mode 100644 index 00000000..613046e1 --- /dev/null +++ b/doc/modules/ROOT/pages/examples/magnet-link.adoc @@ -0,0 +1,733 @@ +// +// Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/url +// + + + + += Magnet Link + +`magnet` is a URL scheme for identifying files by their content. These files are +usually identified by cryptographic hash value. + +Magnet links are useful in peer-to-peer file sharing networks because they allow +resources to be referred to without the need for a continuously available host.. + +This example parses a magnet link into a new view type and prints its components to +standard output. + +// example_magnet +[source,cpp] +---- + +/* + This example parses a magnet link into a new + view type and prints its components to + standard output. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "filter_view.hpp" +#include + +namespace urls = boost::urls; +namespace core = boost::core; + +/** Callable to identify a magnet "exact topic" + + This callable evaluates if a query parameter + represents a magnet "exact topic". + + This callable is used as a filter for + the topics_view. + */ +struct is_exact_topic +{ + bool + operator()(urls::param_view p); +}; + +/** Callable to identify a magnet url parameter + + This callable evaluates if a query parameter + has a given key and a url as its value. + + These urls are percent-encoded twice, + which means we need to decode it once + before attempting to parse it. + + This callable is used as a filter for + the keys_view. + */ +class is_url_with_key +{ + core::string_view k_; +public: + is_url_with_key( + core::string_view key) + : k_(key) {} + + bool + operator()(urls::param_view p); +}; + +/** Callable to convert param values to urls + + This callable converts the value of a + query parameter into a urls::url_view. + + This callable is used as a transform + function for the topics_view. + */ +struct param_view_to_url +{ + urls::url + operator()(urls::param_view p); +}; + +/** Callable to convert param values to std::string + + This callable converts the value of a + query parameter into a std::string. + + This callable is used as a transform + function for the keys_view. + */ +struct to_decoded_value +{ + std::string + operator()(urls::param_view p) + { + return p.value; + } +}; + +/** Callable to convert param values to info_hashes + + This callable converts the value of a + query parameter into a core::string_view with + its infohash. + + The infohash hash is a parameter of an + exact topic field in the magnet link. + + This callable is used as a transform + function for the info_hashes_view. + */ +struct param_view_to_infohash +{ + core::string_view + operator()(urls::param_view p); +}; + +/** Callable to convert param values to protocols + + This callable converts the value of a + query parameter into a core::string_view with + its protocol. + + The protocol is a parameter of an exact + topic field in the magnet link. + + This callable is used as a transform + function for the protocols_view. + */ +struct to_protocol +{ + core::string_view + operator()(urls::param_view p); +}; + +struct magnet_link_rule_t; + +/** A new url type for magnet links + + This class represents a reference to a + magnet link. + + Unlike a urls::url_view, which only represents the + general syntax of urls, a magnet_link_view + represents a reference to fields that are + relevant to magnet links, while ignoring + elements of the general syntax + that are not relevant to the scheme. + + This allows us to use the general syntax + parsers to create a representation that + is more appropriate for the specified scheme + syntax. + + @par Specification + @li DHT Protocol + @li Extension for Peers to Send Metadata Files + @li Magnet URI extension + @li Magnet URI scheme + + @par References + @li magnet-uri + + */ +class magnet_link_view +{ + urls::url_view u_; + +public: + /// A view of all exact topics in the magnet_link + using topics_view = + filter_view< + urls::params_view, + urls::url, + is_exact_topic, + param_view_to_url>; + + /// A view of all info_hashes in the magnet_link + using info_hashes_view = + filter_view< + urls::params_view, + std::string, + is_exact_topic, + param_view_to_infohash>; + + /// A view of all protocols in the magnet_link + using protocols_view = + filter_view< + urls::params_view, + std::string, + is_exact_topic, + to_protocol>; + + /** A view of all urls with the specified key in the magnet_link + + A number of fields in a magnet link refer + to a list of urls with the same query + parameter keys. + */ + using keys_view = + filter_view< + urls::params_view, + std::string, + is_url_with_key, + to_decoded_value>; + + /** URNs to the file or files hashes + + An exact topic is the main field of a + magnet link. A magnet link must contain + one or more exact topics with the query + key "xt" or ["xt.1", "xt.2", ...]. + + The value of each exact topic is a URN + representing the file hash and the protocol + to access the file. + + @return A view of all exact topic URNs in the link + */ + topics_view + exact_topics() const noexcept; + + /** Info hash of the file or files + + @return A view of all info hashes in exact topics + */ + info_hashes_view + info_hashes() const noexcept; + + /** Protocol of the exact topics + + @return A view of all protocols in exact topics + */ + protocols_view + protocols() const noexcept; + + /** Return view of address trackers + + A tracker URL is used to obtain resources + for BitTorrent downloads. + + @return A view of all address trackers in the link + */ + keys_view + address_trackers() const; + + /** Return view of exact sources + + An exact source URL is a direct download + link to the file. + + @return A view of all exact sources + */ + keys_view + exact_sources() const; + + /** Return view of acceptable sources + + An acceptable source URL is a direct + download link to the file that can be + used as a fallback for exact sources. + + @return A view of all acceptable sources + */ + keys_view + acceptable_sources() const; + + /** Return keyword topic + + The keyword topic is the search keywords + to use in P2P networks. + + @par Example + kt=martin+luther+king+mp3 + + @return Keyword topic + */ + boost::optional + keyword_topic() const noexcept; + + /** Return manifest topics + + This function returns a link to the + metafile that contains a list of magneto. + + @par Specification + @li MAGnet MAnifest + + @return A view of manifest topics + */ + keys_view + manifest_topics() const; + + /** Return display name + + This function returns a filename to + display to the user. This field is + only used for convenience. + + @par Specification + @li MAGnet MAnifest + + @return Display name + */ + boost::optional + display_name() const noexcept; + + /** Return web seed + + The web seed represents the payload data + served over HTTP(S). + + @return Web seed + */ + keys_view + web_seed() const; + + /** Return extra supplement parameter + + This function returns informal options + and parameters of the magnet link. + + Query parameters whose keys have the + prefix "x." are used in magnet links + for extra parameters. These names + are guaranteed to never be standardized. + + @par Example + x.parameter_name=parameter_data + + @return Web seed + */ + boost::optional + param(core::string_view key) const noexcept; + + friend + std::ostream& + operator<<(std::ostream& os, magnet_link_view m) + { + return os << m.u_; + } + +private: + // get a query parameter as a urls::pct_string_view + boost::optional + encoded_param(core::string_view key) const noexcept; + + // get a query parameter as a urls::url_view + boost::optional + url_param(core::string_view key) const noexcept; + + friend magnet_link_rule_t; +}; + +bool +is_exact_topic:: +operator()(urls::param_view p) +{ + // These comparisons use the lazy + // operator== for urls::pct_string_view + // For instance, the comparison also works + // if the underlying key is "%78%74"/ + if (p.key == "xt") + return true; + return + p.key.size() > 3 && + *std::next(p.key.begin(), 0) == 'x' && + *std::next(p.key.begin(), 1) == 't' && + *std::next(p.key.begin(), 2) == '.' && + std::all_of( + std::next(p.key.begin(), 3), + p.key.end(), + urls::grammar::digit_chars); +} + +bool +is_url_with_key:: +operator()(urls::param_view p) +{ + if (p.key != k_) + return false; + boost::system::error_code ec; + std::string buf( + p.value.begin(), p.value.end()); + if (ec.failed()) + return false; + boost::system::result r = + urls::parse_uri(buf); + return r.has_value(); +} + +urls::url +param_view_to_url:: +operator()(urls::param_view p) +{ + // `param_view_to_url` is used in topics_view, + // where the URL is not + // percent-encoded twice. + // Thus, we can already parse the + // encoded value. + auto ur = + urls::parse_uri(p.value); + BOOST_ASSERT(ur); + urls::url u = *ur; + return u; +} + +core::string_view +param_view_to_infohash:: +operator()(urls::param_view p) +{ + urls::url_view topic = + urls::parse_uri(p.value).value(); + core::string_view t = topic.encoded_path(); + std::size_t pos = t.find_last_of(':'); + if (pos != core::string_view::npos) + return t.substr(pos + 1); + return t; +} + +core::string_view +to_protocol:: +operator()(urls::param_view p) +{ + urls::url_view topic = + urls::parse_uri(p.value).value(); + core::string_view t = topic.encoded_path(); + std::size_t pos = t.find_last_of(':'); + return t.substr(0, pos); +} + +auto +magnet_link_view::exact_topics() const noexcept + -> topics_view +{ + return {u_.params()}; +} + +auto +magnet_link_view::info_hashes() const noexcept + -> info_hashes_view +{ + return {u_.params()}; +} + +auto +magnet_link_view::protocols() const noexcept + -> protocols_view +{ + return {u_.params()}; +} + +auto +magnet_link_view::address_trackers() const + -> keys_view +{ + return { + u_.params(), + is_url_with_key{"tr"}}; +} + +auto +magnet_link_view::exact_sources() const + -> keys_view +{ + return { + u_.params(), + is_url_with_key{"xs"}}; +} + +auto +magnet_link_view::acceptable_sources() const + -> keys_view +{ + return { + u_.params(), + is_url_with_key{"as"}}; +} + +boost::optional +magnet_link_view::keyword_topic() const noexcept +{ + boost::optional o = + encoded_param("kt"); + if (o) + return o->decode(); + return boost::none; +} + +auto +magnet_link_view::manifest_topics() const + -> keys_view +{ + return { + u_.params(), + is_url_with_key{"mt"}}; +} + +boost::optional +magnet_link_view::display_name() const noexcept +{ + return encoded_param("dn"); +} + +auto +magnet_link_view::web_seed() const + -> keys_view +{ + return { + u_.params(), + is_url_with_key{"ws"}}; +} + +boost::optional +magnet_link_view::param(core::string_view key) const noexcept +{ + urls::params_view ps = u_.params(); + auto it = ps.begin(); + auto end = ps.end(); + while (it != end) + { + urls::param_view p = *it; + if (p.key.size() < 2) + { + ++it; + continue; + } + auto first = p.key.begin(); + auto mid = std::next(p.key.begin(), 2); + auto last = p.key.end(); + urls::pct_string_view prefix( + core::string_view(first, mid)); + urls::pct_string_view suffix( + core::string_view(mid, last)); + if (prefix == "x." && + suffix == key && + p.has_value) + return urls::pct_string_view(p.value); + ++it; + } + return boost::none; +} + +boost::optional +magnet_link_view::encoded_param(core::string_view key) const noexcept +{ + urls::params_encoded_view ps = u_.encoded_params(); + auto it = ps.find(key); + if (it != ps.end() && (*it).has_value) + return urls::pct_string_view((*it).value); + return boost::none; +} + +boost::optional +magnet_link_view::url_param(core::string_view key) const noexcept +{ + urls::params_encoded_view ps = u_.encoded_params(); + auto it = ps.find(key); + if (it != ps.end() && (*it).has_value) + { + boost::system::result r = + urls::parse_uri((*it).value); + if (r) + return *r; + } + return boost::none; +} + +/** Rule to match a magnet link +*/ +struct magnet_link_rule_t +{ + /// Value type returned by the rule + using value_type = magnet_link_view; + + /// Parse a sequence of characters into a magnet_link_view + boost::system::result< value_type > + parse( char const*& it, char const* end ) const noexcept; +}; + +auto +magnet_link_rule_t::parse( + char const*& it, + char const* end ) const noexcept + -> boost::system::result< value_type > +{ + // 1) Parse url with the general uri syntax + boost::system::result r = + urls::grammar::parse(it, end, urls::absolute_uri_rule); + if(!r) + return urls::grammar::error::invalid; + magnet_link_view m; + m.u_ = *r; + + // 2) Check if exact topics are valid urls + // and that we have at least one. This is the + // only mandatory field in magnet links. + auto ps = m.u_.params(); + auto pit = ps.begin(); + auto pend = ps.end(); + pit = std::find_if(pit, pend, is_exact_topic{}); + if (pit == pend) + { + // no exact topic in the magnet link + return urls::grammar::error::invalid; + } + + // all topics should parse as valid urls + if (!std::all_of(pit, pend, []( + urls::param_view p) + { + if (!is_exact_topic{}(p)) + return true; + boost::system::result u = + urls::parse_uri(p.value); + return u.has_value(); + })) + return urls::grammar::error::invalid; + + // all other fields are optional + // magnet link is OK + return m; +} + +constexpr magnet_link_rule_t magnet_link_rule{}; + +/** Return a parsed magnet link from a string, or error. + + This is a more convenient user-facing function + to parse magnet links. +*/ +boost::system::result< magnet_link_view > +parse_magnet_link( core::string_view s ) noexcept +{ + return urls::grammar::parse(s, magnet_link_rule); +} + +int main(int argc, char** argv) +{ + // This example shows how to use custom parsing + // to process alternate URI schemes, in this + // case "magnet" + if (argc != 2) { + std::cout << argv[0] << "\n"; + std::cout << "magnet \n" + "example: magnet magnet:?xt=urn:btih:d2474e86c95b19b8bcfdb92bc12c9d44667cfa36" + "&dn=Leaves+of+Grass+by+Walt+Whitman.epub" + "&tr=udp%3A%2F%2Ftracker.example4.com%3A80" + "&tr=udp%3A%2F%2Ftracker.example5.com%3A80" + "&tr=udp%3A%2F%2Ftracker.example3.com%3A6969" + "&tr=udp%3A%2F%2Ftracker.example2.com%3A80" + "&tr=udp%3A%2F%2Ftracker.example1.com%3A1337\n"; + return EXIT_FAILURE; + } + + boost::system::result r = + parse_magnet_link(argv[1]); + if (!r) + return EXIT_FAILURE; + + magnet_link_view m = *r; + std::cout << "link: " << m << "\n"; + + auto xt = m.exact_topics(); + for (auto h : xt) + std::cout << "topic: " << h << "\n"; + + auto hs = m.info_hashes(); + for (auto h : hs) + std::cout << "hash: " << h << "\n"; + + auto ps = m.protocols(); + for (auto p : ps) + std::cout << "protocol: " << p << "\n"; + + auto tr = m.address_trackers(); + for (auto h : tr) + std::cout << "tracker: " << h << "\n"; + + auto xs = m.exact_sources(); + for (auto x : xs) + std::cout << "exact source: " << x << "\n"; + + auto as = m.acceptable_sources(); + for (auto a : as) + std::cout << "topic: " << a << "\n"; + + auto mt = m.manifest_topics(); + for (auto a : mt) + std::cout << "manifest topic: " << a << "\n"; + + auto ws = m.web_seed(); + for (auto a : ws) + std::cout << "web seed: " << a << "\n"; + + auto kt = m.keyword_topic(); + if (kt) + std::cout << "keyword topic: " << *kt << "\n"; + + auto dn = m.display_name(); + if (dn) + std::cout << "display name: " << *dn << "\n"; + + return EXIT_SUCCESS; +} +---- + + diff --git a/doc/modules/ROOT/pages/examples/mailto.adoc b/doc/modules/ROOT/pages/examples/mailto.adoc new file mode 100644 index 00000000..016fb44c --- /dev/null +++ b/doc/modules/ROOT/pages/examples/mailto.adoc @@ -0,0 +1,428 @@ +// +// Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/url +// + + + + += mailto URLs + +`mailto` is a URL scheme for email addresses. `mailto` URL are used on websites +to allow users to send an email to a specific address directly from an HTML document. + +This example parses a mailto URL into a new view type and prints its components to +standard output. + +// example_mailto +[source,cpp] +---- + +/* + This example parses a mailto URL into a new + view type and prints its components to + standard output. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "rfc.hpp" + +namespace urls = boost::urls; + +// fwd-declaration for mailto_view +struct mailto_rule_t; + +/// A new url type for mailto URLs +/** + This class represents a URI with the mailto + scheme. + + Unlike a urls::url_view, which only represents + the general syntax of urls, a mailto_view + represents a reference to fields that are + relevant to mailto URLs, while ignoring + elements of the general syntax + that are not relevant to the scheme. + + This allows us to use the general syntax + parsers to create a representation that + is more appropriate for the specified scheme + syntax. + + @par Specification + @li The 'mailto' URI Scheme + @li RFC Errata Report + + @par References + @li mailto (Wikipedia) + + */ +class mailto_view +{ + urls::url_view u_; + +public: + /// Return the specified email address in the URL + /** + A mailto URL might contain multiple email + addresses separated by commas. + + The first addresses are represented in + the path. Other addresses are in + any query parameter whose key is "to". + + @param i Address index + + @return The specified address + */ + std::string + address(std::size_t i = 0) const; + + /// @copydoc address() + urls::pct_string_view + encoded_address(std::size_t i = 0) const noexcept; + + /// Return number of email addresses in the URL + std::size_t + size() const noexcept; + + /// Return the specified cc email address in the URL + /** + A mailto URL might contain multiple cc + email addresses separated by commas. + + Addresses can be represented in any query + parameter whose key is "cc". + + @param i Address index + + @return The specified cc address + */ + std::string + cc(std::size_t i) const; + + /// @copydoc cc() + urls::pct_string_view + encoded_cc(std::size_t i) const noexcept; + + /// Return number of "cc" email addresses in the URL + std::size_t + size_cc() const noexcept; + + /// Return email message subject + std::string + subject() const; + + /// @copydoc subject() + urls::pct_string_view + encoded_subject() const noexcept; + + /// Return email message body + std::string + body() const; + + /// @copydoc body() + urls::pct_string_view + encoded_body() const noexcept; + + friend + std::ostream& + operator<<(std::ostream& os, mailto_view m) + { + return os << m.u_; + } + +private: + // Count number of addresses in a string + static + std::size_t + addr_in_str(boost::core::string_view s); + + // Get the ith address from a string + static + boost::optional + get_nth_address(boost::core::string_view to, std::size_t &i) noexcept; + + // Get param value or empty otherwise + urls::pct_string_view + param_or_empty(urls::pct_string_view k) const noexcept; + + friend mailto_rule_t; +}; + +/** Rule to match a mailto URL +*/ +struct mailto_rule_t +{ + /// Value type returned by the rule + using value_type = mailto_view; + + /// Parse a sequence of characters into a mailto_view + boost::system::result< value_type > + parse( char const*& it, char const* end ) const noexcept; +}; + +constexpr mailto_rule_t mailto_rule{}; + +/** Return a parsed mailto URL from a string, or error. + + This is a more convenient user-facing function + to parse mailto URLs. +*/ +boost::system::result< mailto_view > +parse_mailto( boost::core::string_view s ) noexcept +{ + return urls::grammar::parse(s, mailto_rule); +} + +int main(int argc, char** argv) +{ + // This example shows how to use custom parsing + // to process alternate URI schemes, in this + // case "mailto" + if (argc != 2) { + std::cout << argv[0] << "\n"; + std::cout << "mailto \n" + "examples:\n" + // Single e-mail address + "mailto mailto:someone@example.com\n" + // Two e-mail addresses + "mailto mailto:someone@example.com,someoneelse@example.com\n" + // E-mail headers + "mailto mailto:someone@example.com?subject=Our%20meeting&cc=someone_else@example.com&body=Hi%21\n" + // E-mail headers only + "mailto mailto:?to=&subject=mailto%20example&body=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FMailto\n" + // All fields + "mailto mailto:someone@example.com,%73omeoneelse@me.com?to=thirdperson@example.com&subject=Our%20meeting&cc=someone_else@example.com,onemore@ex%61mple.com&body=Hi%21\n"; + return EXIT_FAILURE; + } + + boost::system::result r = + parse_mailto(argv[1]); + if (!r) + return EXIT_FAILURE; + + mailto_view m = *r; + std::cout << "link: " << m << "\n"; + + for (std::size_t i = 0; i < m.size(); ++i) + std::cout << + "to[" << i << "]: " << + m.address(i) << "\n"; + + for (std::size_t i = 0; i < m.size_cc(); ++i) + std::cout << + "cc[" << i << "]: " << + m.address(i) << "\n"; + + std::cout << "subject: " << m.subject() << "\n"; + std::cout << "body: " << m.body() << "\n"; + + return EXIT_SUCCESS; +} + +std::string +mailto_view::address(std::size_t i) const +{ + return encoded_address(i).decode(); +} + +urls::pct_string_view +mailto_view::encoded_address(std::size_t i) const noexcept +{ + // Look for ith email address in the path string + auto s = get_nth_address(u_.encoded_path(), i); + if (s) + return *s; + + // Look for ith email address in one of the "to" headers + auto ps = u_.encoded_params(); + auto it = ps.find("to", urls::ignore_case); + while (it != ps.end()) + { + s = get_nth_address((*it++).value, i); + if (s) + return *s; + it = ps.find(it, "to", urls::ignore_case); + } + return {}; +} + +std::size_t +mailto_view::size() const noexcept +{ + // Count addresses in path + std::size_t n = addr_in_str(u_.encoded_path()); + + // Count addresses in "to" headers + auto ps = u_.encoded_params(); + auto it = ps.find("to", urls::ignore_case); + while (it != ps.end()) + { + n += addr_in_str((*it++).value); + it = ps.find(it, "to", urls::ignore_case); + } + return n; +} + +std::string +mailto_view::cc(std::size_t i) const +{ + return encoded_cc(i).decode(); +} + +urls::pct_string_view +mailto_view::encoded_cc(std::size_t i) const noexcept +{ + // Look for ith email address in one of the "to" headers + auto ps = u_.encoded_params(); + auto it = ps.find("cc", urls::ignore_case); + while (it != ps.end()) + { + auto s = get_nth_address((*it++).value, i); + if (s) + return *s; + it = ps.find(it, "cc", urls::ignore_case); + } + return {}; +} + +std::size_t +mailto_view::size_cc() const noexcept +{ + // Count addresses in "to" headers + std::size_t n = 0; + auto ps = u_.encoded_params(); + auto it = ps.find("cc", urls::ignore_case); + while (it != ps.end()) + { + n += addr_in_str((*it++).value); + it = ps.find(it, "cc", urls::ignore_case); + } + return n; +} + +std::string +mailto_view::subject() const +{ + return encoded_subject().decode(); +} + +urls::pct_string_view +mailto_view::encoded_subject() const noexcept +{ + return param_or_empty("subject"); +} + +std::string +mailto_view::mailto_view::body() const +{ + return encoded_body().decode(); +} + +urls::pct_string_view +mailto_view::encoded_body() const noexcept +{ + return param_or_empty("body"); +} + +std::size_t +mailto_view::addr_in_str(boost::core::string_view s) +{ + std::size_t n = 0; + bool empty = true; + for (char c : s) + { + if (c == ',') + { + n += !empty; + empty = true; + } + else + { + empty = false; + } + } + n += !empty; + return n; +} + +boost::optional +mailto_view::get_nth_address(boost::core::string_view to, std::size_t &i) noexcept +{ + auto p = to.find(','); + while (p != boost::core::string_view::npos) + { + if (i == 0) + return urls::pct_string_view( + to.substr(0, p)); + --i; + to.remove_prefix(p + 1); + p = to.find(','); + } + if (!to.empty()) + { + if (i == 0) + return urls::pct_string_view( + to.substr(0, p)); + --i; + } + return boost::none; +} + +urls::pct_string_view +mailto_view::param_or_empty(urls::pct_string_view k) const noexcept +{ + auto ps = u_.encoded_params(); + auto it = ps.find(k, urls::ignore_case); + if (it != ps.end()) + return (*it).value; + return {}; +} + +auto +mailto_rule_t::parse( char const*& it, char const* end ) const noexcept + -> boost::system::result< value_type > +{ + // Syntax-based rules + boost::system::result r = + urls::grammar::parse(it, end, urls::absolute_uri_rule); + if (!r) + return r.error(); + + // Scheme-based rules + mailto_view m; + m.u_ = *r; + auto valid_header = [](urls::param_pct_view p) { + return + urls::grammar::parse(p.key, hfname_rule) && + urls::grammar::parse(p.value, hfvalue_rule) && + p.has_value && + (!urls::grammar::ci_is_equal(p.key, "to") || + urls::grammar::parse(p.value, addr_spec_rule)); + }; + auto ps = m.u_.encoded_params(); + if (m.u_.scheme() == "mailto" && + !m.u_.has_authority() && + urls::grammar::parse(m.u_.encoded_path(), to_rule) && + std::all_of(ps.begin(), ps.end(), valid_header)) + return m; + return urls::grammar::error::invalid; +} +---- + + diff --git a/doc/modules/ROOT/pages/examples/qrcode.adoc b/doc/modules/ROOT/pages/examples/qrcode.adoc new file mode 100644 index 00000000..5668654a --- /dev/null +++ b/doc/modules/ROOT/pages/examples/qrcode.adoc @@ -0,0 +1,105 @@ +// +// Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/url +// + + + + += QR Code + +A QR code is a machine-readable two-dimensional barcode. They might contain data +for a identifier or a URL to a website. + +This example shows how to construct and modify URLs to consume a third party API to +generate QR Codes. + +// example_qrcode +[source,cpp] +---- + +/* + This example shows how to construct and modify + URLs to consume a third party API to + generate QR Codes. + https://developers.google.com/chart/infographics/docs/qr_codes +*/ + +#include +#include +#include +#include + +namespace urls = boost::urls; +namespace core = boost::core; + +int main(int argc, char** argv) +{ + if (argc < 2) { + std::cout << argv[0] << "\n"; + std::cout << "Usage: qrcode \n" + "options:\n" + " : The data to encode (required)\n" + " : Image width (default: 100)\n" + " : Image height (default: width)\n" + " : UTF-8, Shift_JIS, ISO-8859-1 (default: utf8)\n" + " : percentage of error correction (default: 7)\n" + " : border width (default: 4)\n" + "examples:\n" + "qrcode \"Hello world\"\n"; + return EXIT_FAILURE; + } + + urls::url u = + urls::parse_uri( + "https://chart.googleapis.com/chart?cht=qr").value(); + auto ps = u.params(); + + // Data + ps.append({"chl", argv[1]}); + + // Size + std::size_t width = argc < 3 ? 100 : std::stoll(argv[2]); + std::size_t height = argc < 4 ? width : std::stoll(argv[3]); + ps.append({"chs", std::to_string(width) + "x" + std::to_string(height)}); + + // Encoding + if (argc >= 5) + { + core::string_view output_encoding = + core::string_view(argv[3]) == "Shift_JIS" || + core::string_view(argv[3]) == "ISO-8859-1" ? + argv[4] : "UTF-8"; + ps.append({"choe", output_encoding}); + } + + // Error + if (argc >= 6) + { + std::size_t err = std::stoll(argv[5]); + std::string chld; + if (err < 11) + chld = "L"; + else if (err < 20) + chld = "M"; + else if (err < 27) + chld = "Q"; + else + chld = "H"; + std::size_t margin = argc < 7 ? 4 : std::stoll(argv[6]); + chld += "|"; + chld += std::to_string(margin); + ps.append({"chld", chld}); + } + + std::cout << u << '\n'; + + return EXIT_SUCCESS; +} +---- + + diff --git a/doc/modules/ROOT/pages/examples/router.adoc b/doc/modules/ROOT/pages/examples/router.adoc new file mode 100644 index 00000000..734f9aec --- /dev/null +++ b/doc/modules/ROOT/pages/examples/router.adoc @@ -0,0 +1,333 @@ +// +// Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/url +// + + + + += Router + +This example defines a router for URL paths. If the specified route matches one of the existing +routes, the example executes the underlying callback function. + +// example_router +[source,cpp] +---- + +/* + This example defines a router for URL paths. + Each path is associated with a callback + function. +*/ + +#ifndef BOOST_URL_SOURCE +#define BOOST_URL_SOURCE +#endif + +#include "router.hpp" + +#include +#include +#include +#include +#include + +#include +#include + +namespace urls = boost::urls; +namespace core = boost::core; +namespace asio = boost::asio; +namespace beast = boost::beast; +namespace http = beast::http; +using string_view = core::string_view; +using request_t = http::request; +struct connection; +using handler = std::function; + +int +serve( + urls::router const& r, + asio::ip::address const& a, + unsigned short port, + std::string const& doc_root); + +struct connection +{ + connection(asio::io_context& ioc) + : socket(ioc) {} + + void + string_reply(core::string_view msg); + + void + file_reply(core::string_view path); + + void + error_reply(http::status, core::string_view msg); + + beast::error_code ec; + asio::ip::tcp::socket socket; + std::string doc_root; + request_t req; +}; + +int +main(int argc, char **argv) +{ + /* + * Parse cmd-line params + */ + if (argc != 4) + { + core::string_view exec = argv[0]; + auto file_pos = exec.find_last_of("/\\"); + if (file_pos != core::string_view::npos) + exec = exec.substr(file_pos + 1); + std::cerr + << "Usage: " << exec + << "
\n" + "Example: " << exec << " 0.0.0.0 8080 .\n" + "Default values:\n" + "- address: 0.0.0.0\n" + "- port: 8080\n" + "- doc_root: ./\n"; + } + auto const address = asio::ip::make_address(argc > 1 ? argv[1] : "0.0.0.0"); + auto const port = static_cast(argc > 2 ? std::atoi(argv[2]) : 8080); + auto const doc_root = std::string(argc > 3 ? argv[3] : "."); + + /* + * Create router + */ + urls::router r; + + r.insert("/", [&](connection& c, urls::matches const&) { + c.string_reply("Hello!"); + }); + + r.insert("/user/{name}", [&](connection& c, urls::matches const& m) { + std::string msg = "Hello, "; + urls::pct_string_view(m[0]).decode({}, urls::string_token::append_to(msg)); + msg += "!"; + c.string_reply(msg); + }); + + r.insert("/user", [&](connection& c, urls::matches const&) { + std::string msg = "Users: "; + auto names = {"johndoe", "maria", "alice"}; + for (auto name: names) { + msg += ""; + msg += name; + msg += " "; + } + c.string_reply(msg); + }); + + r.insert("/public/{path+}", [&](connection& c, urls::matches m) { + c.file_reply(m["path"]); + }); + + return serve(r, address, port, doc_root); +} + +#define ROUTER_CHECK(cond) if(!(cond)) { break; } +#define ROUTER_CHECK_EC(ec, cat) if(ec.failed()) { std::cerr << #cat << ": " << ec.message() << "\n"; break; } + +int +serve( + urls::router const& r, + asio::ip::address const& address, + unsigned short port, + std::string const& doc_root) +{ + /* + * Serve the routes with a simple synchronous + * server. This is an implementation detail + * in the context of this example. + */ + std::cout << "Listening on http://" << address << ":" << port << "\n"; + asio::io_context ioc(1); + asio::ip::tcp::acceptor acceptor(ioc, {address, port}); + urls::matches m; + for(;;) + { + connection c(ioc); + c.doc_root = doc_root; + acceptor.accept(c.socket); + beast::flat_buffer buffer; + for(;;) + { + // Read a request + http::read(c.socket, buffer, c.req, c.ec); + ROUTER_CHECK(c.ec != http::error::end_of_stream) + ROUTER_CHECK_EC(c.ec, read) + // Handle request + auto rpath = urls::parse_path(c.req.target()); + if (c.req.method() != http::verb::get && + c.req.method() != http::verb::head) + c.error_reply( + http::status::bad_request, + std::string("Unknown HTTP-method: ") + + std::string(c.req.method_string())); + else if (!rpath) + c.error_reply(http::status::bad_request, "Illegal request-target"); + else if (auto h = r.find(*rpath, m)) + (*h)(c, m); + else + c.error_reply( + http::status::not_found, + "The resource '" + + std::string(rpath->buffer()) + + "' was not found."); + ROUTER_CHECK_EC(c.ec, write) + ROUTER_CHECK(c.req.keep_alive()) + } + c.socket.shutdown(asio::ip::tcp::socket::shutdown_send, c.ec); + } + return EXIT_SUCCESS; +} + +#undef ROUTER_CHECK_EC +#undef ROUTER_CHECK + +void +connection:: +error_reply(http::status s, core::string_view msg) +{ + // invalid route + http::response res{s, req.version()}; + res.set(http::field::server, BOOST_BEAST_VERSION_STRING); + res.set(http::field::content_type, "text/html"); + res.keep_alive(req.keep_alive()); + res.body() = msg; + res.prepare_payload(); + http::write(socket, res, ec); +} + + +void +connection:: +string_reply(core::string_view msg) +{ + http::response res{http::status::ok, req.version()}; + res.set(http::field::server, BOOST_BEAST_VERSION_STRING); + res.set(http::field::content_type, "text/html"); + res.keep_alive(req.keep_alive()); + res.body() = msg; + res.prepare_payload(); + http::write(socket, res, ec); +} + +core::string_view +mime_type(core::string_view path); + +std::string +path_cat( + beast::string_view base, + beast::string_view path); + +void +connection:: +file_reply(core::string_view path) +{ + http::file_body::value_type body; + std::string jpath = path_cat(doc_root, path); + body.open(jpath.c_str(), beast::file_mode::scan, ec); + if(ec == beast::errc::no_such_file_or_directory) + { + error_reply( + http::status::not_found, + "The resource '" + std::string(path) + + "' was not found in " + jpath); + return; + } + auto const size = body.size(); + http::response res{ + std::piecewise_construct, + std::make_tuple(std::move(body)), + std::make_tuple(http::status::ok, req.version())}; + res.set(http::field::server, BOOST_BEAST_VERSION_STRING); + res.set(http::field::content_type, mime_type(path)); + res.content_length(size); + res.keep_alive(req.keep_alive()); + http::write(socket, res, ec); +} + +// Append an HTTP rel-path to a local filesystem path. +// The returned path is normalized for the platform. +std::string +path_cat( + core::string_view base, + core::string_view path) +{ + if (base.empty()) + return std::string(path); + std::string result(base); +#ifdef BOOST_MSVC + char constexpr path_separator = '\\'; +#else + char constexpr path_separator = '/'; +#endif + if( result.back() == path_separator && + path.starts_with(path_separator)) + result.resize(result.size() - 1); + else if (result.back() != path_separator && + !path.starts_with(path_separator)) + { + result.push_back(path_separator); + } + result.append(path.data(), path.size()); +#ifdef BOOST_MSVC + for(auto& c : result) + if(c == '/') + c = path_separator; +#endif + return result; +} + +core::string_view +mime_type(core::string_view path) +{ + using beast::iequals; + auto const ext = [&path] + { + auto const pos = path.rfind("."); + if(pos == beast::string_view::npos) + return beast::string_view{}; + return path.substr(pos); + }(); + if(iequals(ext, ".htm")) return "text/html"; + if(iequals(ext, ".html")) return "text/html"; + if(iequals(ext, ".php")) return "text/html"; + if(iequals(ext, ".css")) return "text/css"; + if(iequals(ext, ".txt")) return "text/plain"; + if(iequals(ext, ".js")) return "application/javascript"; + if(iequals(ext, ".json")) return "application/json"; + if(iequals(ext, ".xml")) return "application/xml"; + if(iequals(ext, ".swf")) return "application/x-shockwave-flash"; + if(iequals(ext, ".flv")) return "video/x-flv"; + if(iequals(ext, ".png")) return "image/png"; + if(iequals(ext, ".jpe")) return "image/jpeg"; + if(iequals(ext, ".jpeg")) return "image/jpeg"; + if(iequals(ext, ".jpg")) return "image/jpeg"; + if(iequals(ext, ".gif")) return "image/gif"; + if(iequals(ext, ".bmp")) return "image/bmp"; + if(iequals(ext, ".ico")) return "image/vnd.microsoft.icon"; + if(iequals(ext, ".tiff")) return "image/tiff"; + if(iequals(ext, ".tif")) return "image/tiff"; + if(iequals(ext, ".svg")) return "image/svg+xml"; + if(iequals(ext, ".svgz")) return "image/svg+xml"; + return "application/text"; +} +---- + + + diff --git a/doc/modules/ROOT/pages/examples/sanitize.adoc b/doc/modules/ROOT/pages/examples/sanitize.adoc new file mode 100644 index 00000000..81aec761 --- /dev/null +++ b/doc/modules/ROOT/pages/examples/sanitize.adoc @@ -0,0 +1,471 @@ +// +// Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/url +// + + + + += Sanitizing URLs + +This example parses a non-strict or invalid URL +into path components according to its delimiters. +This pattern can be adapted to the requirements of other +applications. + +Once the non-strict components are determined, a new URL is +created and its parts are set with the `set_encoded_X` +functions, which will encode any invalid chars accordingly. + +This sort of transformation is useful in applications that are +extremely loose in what kinds of URLs they accept, such as +browsers. The sanitized URL can later be used for machine-to-machine +communication. + +Using non-strict URLs directly is a security concern in +machine-to-machine communication, is ambiguous, and also +involve an extra cost for the transformations. + +Different transformations are required by different applications to +construct a valid URL appropriate for machine-to-machine communication. +For instance, if an invalid relative reference includes something that +looks like a host in the first path segment, browsers usually interpret +that as the host with an implicit "https" scheme. Other applications +also have other implicit schemes. + +The example also identifies whether the input url is already valid. +It includes diagnostics that can be used to help the user determine +if a URL is invalid and why it's invalid. + +Once all transformations are applied, the result is a URL +appropriate for machine-to-machine communication. + +// example_sanitize_url +[source,cpp] +---- + +/* + This example parses a non-strict / invalid URL + into path components according to its delimiters. + This pattern can be adapted to the requirements of other + applications. + + Once the non-strict components are determined, a new URL is + created and its parts are set with the set_encoded_X + functions, which will encode any invalid chars accordingly. + + This sort of transformation is useful in applications that are + extremely loose in what kinds of URLs they accept, such as + browsers. The sanitized URL can later be used for machine-to-machine + communication. + + Using non-strict URLs directly is a security concern in + machine-to-machine communication, is ambiguous, and also + involve an extra cost for the transformations. + + Different transformations are required by different applications to + construct a valid URL appropriate for machine-to-machine communication. + For instance, if an invalid relative reference includes something that + looks like a host in the first path segment, browsers usually interpret + that as the host with an implicit "https" scheme. Other applications + also have other implicit schemes. + + The example also identifies whether the input url is already valid. + It includes diagnostics that can be used to help the user determine + if a URL is invalid and why it's invalid. + + Once all transformations are applied, the result is a URL + appropriate for machine-to-machine communication. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace urls = boost::urls; +namespace core = boost::core; + +struct url_components +{ + core::string_view scheme; + core::string_view user; + core::string_view password; + core::string_view hostname; + core::string_view port; + core::string_view path; + core::string_view query; + core::string_view fragment; +}; + +core::string_view +port_of_scheme(core::string_view scheme_str) { + static std::array, 21> scheme_ports = + {{ + {"http", "80"}, + {"ftp", "21"}, + {"https", "443"}, + {"gopher", "70"}, + {"ldap", "389"}, + {"nntp", "119"}, + {"snews", "563"}, + {"imap", "143"}, + {"pop", "110"}, + {"sip", "5060"}, + {"rtsp", "554"}, + {"wais", "210"}, + {"z39.50r", "210"}, + {"z39.50s", "210"}, + {"prospero", "191"}, + {"nfs", "2049"}, + {"tip", "3372"}, + {"acap", "674"}, + {"telnet", "23"}, + {"ssh", "22"}, + {"", "65535"} + }}; + + auto iequals = [](core::string_view a, core::string_view b) + { + if (b.size() != a.size()) { + return false; + } + for (unsigned int i = 0; i < a.size(); ++i) { + if (std::tolower(a[i]) != std::tolower(b[i])) { + return false; + } + } + return true; + }; + + auto const& it = std::find_if( + scheme_ports.begin(), + scheme_ports.end(), + [&](std::pair const& s) { + return iequals(s.first, scheme_str); + }); + + if (it != scheme_ports.end()) { + return it->second; + } else { + return {}; + } +} + +void +extract_relative_ref( + core::string_view hostinfo_relative, + url_components &out) +{ + // split path and query#fragment + constexpr urls::grammar::lut_chars path_end_chars("?#\0"); + auto it = urls::grammar::find_if( + hostinfo_relative.begin(), + hostinfo_relative.end(), path_end_chars); + core::string_view query_and_frag = hostinfo_relative.substr(it - hostinfo_relative.begin()); + if (query_and_frag != hostinfo_relative) + out.path = hostinfo_relative.substr( + 0, query_and_frag.data() - hostinfo_relative.data()); + if (query_and_frag.empty()) + return; + + // ?query#fragment + if (query_and_frag.front() == '?') { + query_and_frag = query_and_frag.substr(1); + core::string_view::size_type hash_pos = query_and_frag.find('#'); + if (hash_pos != core::string_view::npos) { + core::string_view fragment_part = query_and_frag.substr(hash_pos); + out.fragment = fragment_part.substr(1); + out.query = query_and_frag.substr( + 0, fragment_part.data() - query_and_frag.data()); + } else { + out.query = query_and_frag; + } + return; + } + + // fragment + out.fragment = query_and_frag.substr(1); +} + +void +extract_userinfo_relative( + core::string_view relative_ref, + core::string_view userinfo_relative, + core::string_view host_info, + url_components& out) { + // We expect userinfo_relative to point to the first character of + // the hostname. If there's a port it is the first colon, + // except with IPv6. + auto host_end_pos = host_info.find(':'); + if (host_end_pos == core::string_view::npos) + { + // definitely no port + out.hostname = userinfo_relative.substr( + 0, relative_ref.data() - userinfo_relative.data()); + return extract_relative_ref(relative_ref, out); + } + + // extract hostname and port + out.hostname = userinfo_relative.substr(0, host_end_pos); + core::string_view host_relative = userinfo_relative.substr(host_end_pos + 1); + out.port = host_relative.substr(0, relative_ref.data() - host_relative.data()); + + // validate port + bool const valid_port = + urls::grammar::find_if_not( + out.port.begin(), + out.port.end(), + urls::grammar::digit_chars) + == out.port.end(); + if (!valid_port) + { + // move port to hostname where it can be encoded + out.hostname = {out.hostname.begin(), out.port.end()}; + out.port = {}; + } + + extract_relative_ref(relative_ref, out); + if (out.port.empty() && !out.scheme.empty()) + out.port = port_of_scheme(out.scheme); +} + +void +extract_scheme_relative( + core::string_view scheme_relative, + url_components &out) +{ + // hostinfo + constexpr urls::grammar::lut_chars hostinfo_end_chars("/?#\0"); + auto it = urls::grammar::find_if( + scheme_relative.begin(), + scheme_relative.end(), + hostinfo_end_chars); + auto path_offset = (std::min)( + scheme_relative.size(), + static_cast(it - scheme_relative.begin())); + core::string_view host_info = scheme_relative.substr(0, path_offset); + + // userinfo + core::string_view relative_ref = scheme_relative.substr(path_offset); + auto host_offset = host_info.find_last_of('@'); + if (host_offset == core::string_view::npos) + return extract_userinfo_relative( + relative_ref, + scheme_relative, + host_info, + out); + + // password + core::string_view userinfo_at_relative = scheme_relative.substr(host_offset); + core::string_view userinfo(host_info.data(), userinfo_at_relative.data() - host_info.data()); + auto password_offset = std::min(userinfo.size(), userinfo.find(':')); + if (password_offset != userinfo.size()) { + out.user = scheme_relative.substr(0, password_offset); + core::string_view password = scheme_relative.substr(password_offset + 1); + out.password = password.substr(0, userinfo_at_relative.data() - password.data()); + } else { + out.user = scheme_relative.substr(0, userinfo_at_relative.data() - scheme_relative.data()); + } + + // userinfo-relative + core::string_view userinfo_relative = userinfo_at_relative.substr(1); + it = urls::grammar::find_if( + userinfo_relative.begin(), + userinfo_relative.end(), + hostinfo_end_chars); + path_offset = (std::min)( + userinfo_relative.size(), + static_cast(it - userinfo_relative.begin())); + host_info = userinfo_relative.substr(0, path_offset); + extract_userinfo_relative( + relative_ref, + userinfo_relative, + host_info, + out); +} + +void +extract_uri_components( + core::string_view s, + url_components &out) +{ + if (s.starts_with("//") && !s.starts_with("///")) + return extract_scheme_relative(s.substr(2), out); + + if (s.starts_with('/')) + return extract_relative_ref(s, out); + + // extract scheme + // first char in a scheme must be letter (we accept uppercase here) + bool has_scheme = false; + if (!s.empty() && urls::grammar::alpha_chars(s.front())) { + constexpr + urls::grammar::lut_chars scheme_chars( + "0123456789+-.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); + char const* it = urls::grammar::find_if_not( + s.begin() + 1, s.end(), scheme_chars); + size_t scheme_size = (std::min)( + s.size(), static_cast(it - s.begin())); + // scheme must be non-empty and followed by ':' + if (s.size() > scheme_size && s[scheme_size] == ':') { + out.scheme = s.substr(0, scheme_size); + has_scheme = true; + } + } + + // The usual route, parse scheme first + core::string_view scheme_relative = s; + if (has_scheme) + scheme_relative = s.substr(out.scheme.size() + 1); + + const bool has_authority = scheme_relative.starts_with("//"); + const bool is_relative_ref = !has_scheme && !has_authority; + if (is_relative_ref) + { + // this is the trick browsers usually apply when 1) there's no + // authority because the "//" is missing, 2) the scheme is also missing, + // and 3) the first path segment looks like an authority + // + // This behavior is widespread, although it's ambiguous because valid + // host characters are also valid path characters. + // + // It's this rule that allows for things like "www.boost.org" in the + // browser. This is an invalid URL because it has no "//" to indicate + // this is the authority and "www.boost.org" is a perfectly valid + // path segment. + auto first_seg_offset = (std::min)(s.size(), s.find_first_of('/')); + core::string_view first_seg = s.substr(0, first_seg_offset); + auto host_delimiter_pos = first_seg.find_first_of(".:"); + bool const looks_like_authority = + urls::parse_authority(first_seg) && + host_delimiter_pos != core::string_view::npos && + host_delimiter_pos != first_seg.size() - 1; + if (looks_like_authority) + return extract_scheme_relative(s, out); + + // if the first_seg is really a seg, parse as relative ref + return extract_relative_ref(s, out); + } + + if (has_authority) + scheme_relative = scheme_relative.substr(2); + + // all that's left is a relative path + return extract_relative_ref(scheme_relative, out); +} + +void +sanitize_uri(core::string_view s, urls::url_base& dest) { + url_components o; + dest.clear(); + extract_uri_components(s, o); + if (o.scheme.data()) + dest.set_scheme(o.scheme); + if (o.user.data()) + dest.set_encoded_user(o.user); + if (o.password.data()) + dest.set_encoded_password(o.password); + if (o.hostname.data()) + dest.set_encoded_host(o.hostname); + if (o.port.data()) + dest.set_port(o.port); + if (o.path.data()) + dest.set_encoded_path(o.path); + if (o.query.data()) + dest.set_encoded_query(o.query); + if (o.fragment.data()) + dest.set_encoded_fragment(o.fragment); +} + +urls::url +sanitize_uri(core::string_view s) { + urls::url u; + sanitize_uri(s, u); + return u; +} + +void +print_url_components(urls::url_view u) +{ + std::cout << "url: " << u.buffer() << '\n'; + if (u.has_scheme()) + std::cout << "scheme: " << u.scheme() << '\n'; + if (u.has_userinfo()) + std::cout << "user: " << u.encoded_user() << '\n'; + if (u.has_password()) + std::cout << "password: " << u.encoded_password() << '\n'; + if (u.has_authority()) + std::cout << "hostname: " << u.encoded_host() << '\n'; + if (u.has_port()) + std::cout << "port: " << u.port() << '\n'; + std::cout << "path: " << u.encoded_path() << '\n'; + std::cout << "segments:\n"; + for (auto seg: u.encoded_segments()) + std::cout << "- " << seg << '\n'; + if (u.has_query()) + std::cout << "query: " << u.encoded_query() << '\n'; + std::cout << "params:\n"; + for (auto param: u.encoded_params()) + { + if (param.has_value) + std::cout << "- " << param.key << ": " << param.value << '\n'; + else + std::cout << "- " << param.key << '\n'; + } + if (u.has_fragment()) + std::cout << "fragment: " << u.encoded_fragment() << '\n'; +} + +int +main(int argc, char **argv) +{ + if (argc != 2) + { + core::string_view exec = argv[0]; + auto p = exec.find_last_of("/\\"); + if (p != core::string_view::npos) + exec = exec.substr(p); + std::cerr + << "Usage: " << exec + << " \n" + "target: a non-strict url\n"; + return EXIT_FAILURE; + } + + core::string_view uri_str = argv[1]; + + boost::system::result ru = urls::parse_uri_reference(uri_str); + if (ru) + { + urls::url_view u = *ru; + if (u.has_scheme() && u.has_fragment()) + std::cout << "Input is a valid URL\n"; + else if (u.has_scheme()) + std::cout << "Input is a valid absolute URL\n"; + else + std::cout << "Input is a valid relative URL\n"; + print_url_components(u); + return EXIT_SUCCESS; + } + + std::cout << "Sanitizing URL:\n"; + std::cout << "input: " << uri_str << '\n'; + urls::url u = sanitize_uri(uri_str); + print_url_components(u); + return EXIT_SUCCESS; +} +---- + + diff --git a/doc/modules/ROOT/pages/grammar/charset.adoc b/doc/modules/ROOT/pages/grammar/charset.adoc index e50b4d60..3d19985b 100644 --- a/doc/modules/ROOT/pages/grammar/charset.adoc +++ b/doc/modules/ROOT/pages/grammar/charset.adoc @@ -8,9 +8,9 @@ // -== Character Sets += Character Sets -A __character__ represents a subset of low-ASCII characters, +A __character set__ represents a subset of low-ASCII characters, used as a building block for constructing rules. The library models them as callable predicates invocable with this equivalent signature: @@ -22,14 +22,20 @@ bool( char ch ) const noexcept; ---- -The `CharSet` concept describes the requirements on +The __CharSet__ concept describes the requirements on syntax and semantics for these types. Here we declare a character set type that includes the horizontal and vertical whitespace characters: [source,cpp] ---- -// code_grammar_2_2 +struct ws_chars_t +{ + constexpr bool operator()( char c ) const noexcept + { + return c == '\t' || c == ' ' || c == '\r' || c == '\n'; + } +}; ---- @@ -38,7 +44,7 @@ the requirements: [source,cpp] ---- -// code_grammar_2_3 +static_assert( is_charset< ws_chars_t >::value, "CharSet requirements not met" ); ---- @@ -49,7 +55,7 @@ abstraction: [source,cpp] ---- -// code_grammar_2_4 +constexpr ws_chars_t ws_chars{}; ---- @@ -65,50 +71,73 @@ character: [source,cpp] ---- -// code_grammar_2_5 +core::string_view get_token( core::string_view s ) noexcept +{ + auto it0 = s.data(); + auto const end = it0 + s.size(); + + // find the first non-whitespace character + it0 = find_if_not( it0, end, ws_chars ); + + if( it0 == end ) + { + // all whitespace or empty string + return {}; + } + + // find the next whitespace character + auto it1 = find_if( it0, end, ws_chars ); + + // [it0, it1) is the part we want + return core::string_view( it0, it1 - it0 ); +} ---- The function can now be called thusly: +// code_grammar_2_6 [source,cpp] ---- -// code_grammar_2_6 +assert( get_token( " \t john-doe\r\n \t jane-doe\r\n") == "john-doe" ); ---- The library provides these often-used character sets: -// [table Character Sets [ -// [Value] -// [Description] -// ][ -// [[link url.ref.boost__urls__grammar__alnum_chars `alnum_chars`]] -// [ -// Contains the uppercase and lowercase letters, and digits. -// ] -// ][ -// [[link url.ref.boost__urls__grammar__alpha_chars `alpha_chars`]] -// [ -// Contains the uppercase and lowercase letters. -// ] -// ][ -// [[link url.ref.boost__urls__grammar__digit_chars `digit_chars`]] -// [ -// Contains the decimal digit characters. -// ] -// ][ -// [[link url.ref.boost__urls__grammar__hexdig_chars `hexdig_chars`]] -// [ -// Contains the uppercase and lowercase hexadecimal -// digit characters. -// ] -// ][ -// [[link url.ref.boost__urls__grammar__vchars `vchars`]] -// [ -// Contains the visible characters (i.e. non whitespace). -// ] -// ]] +[cols="a,a"] +|=== +// Headers +|Value|Description + +// Row 1, Column 1 +|`alnum_chars` +// Row 1, Column 2 +|Contains the uppercase and lowercase letters, and digits. + +// Row 2, Column 1 +|`alpha_chars` +// Row 2, Column 2 +|Contains the uppercase and lowercase letters. + +// Row 3, Column 1 +|`digit_chars` +// Row 3, Column 2 +|Contains the decimal digit characters. + +// Row 4, Column 1 +|`hexdig_chars` +// Row 4, Column 2 +|Contains the uppercase and lowercase hexadecimal + digit characters. + +// Row 5, Column 1 +|`vchars` +// Row 5, Column 2 +|Contains the visible characters (i.e. non whitespace). + +|=== + Some of the character sets in the library have implementations @@ -119,18 +148,19 @@ using Streaming SIMD Extensions 2 (https://en.wikipedia.org/wiki/SSE2[SSE2,window=blank_]), available on all x86 and x64 architectures. -=== The lut_chars Type +== The lut_chars Type -The `lut_chars` type satisfies the `CharSet` +The `lut_chars` type satisfies the __CharSet__ requirements and offers an optimized `constexpr` implementation which provides enhanced performance and notational convenience for specifying character sets. Compile-time instances can be constructed from strings: +// code_grammar_2_7 [source,cpp] ---- -// code_grammar_2_7 +constexpr lut_chars vowels = "AEIOU" "aeiou"; ---- @@ -138,9 +168,10 @@ We can use `operator+` and `operator-` notation to add and remove elements from the set at compile time. For example, sometimes the character 'y' sounds like a vowel: +// code_grammar_2_8 [source,cpp] ---- -// code_grammar_2_8 +constexpr auto vowels_and_y = vowels + 'y' + 'Y'; ---- @@ -151,7 +182,14 @@ Here we create the set of visible characters using a lambda: [source,cpp] ---- -// code_grammar_2_9 +struct is_visible +{ + constexpr bool operator()( char ch ) const noexcept + { + return ch >= 33 && ch <= 126; + } +}; +constexpr lut_chars visible_chars( is_visible{} ); // (since C++11) ---- @@ -159,23 +197,25 @@ Alternatively: [source,cpp] ---- -// code_grammar_2_10 +constexpr lut_chars visible_chars( [](char ch) { return ch >= 33 && ch <= 126; } ); // (since C++17) ---- Differences can be calculated with `operator-`: +// code_grammar_2_11 [source,cpp] ---- -// code_grammar_2_11 +constexpr auto visible_non_vowels = visible_chars - vowels; ---- We can also remove individual characters: +// code_grammar_2_12 [source,cpp] ---- -// code_grammar_2_12 +constexpr auto visible_non_vowels_or_y = visible_chars - vowels - 'y'; ---- diff --git a/doc/modules/ROOT/pages/grammar/combinators.adoc b/doc/modules/ROOT/pages/grammar/combinators.adoc index 49c6ef4f..faadc76d 100644 --- a/doc/modules/ROOT/pages/grammar/combinators.adoc +++ b/doc/modules/ROOT/pages/grammar/combinators.adoc @@ -8,21 +8,21 @@ // -== Compound Rules += Compound Rules The rules shown so far have defined -https://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols[__terminal__,window=blank_], +https://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols[__terminal symbols__,window=blank_], representing indivisible units of grammar. To parse more complex things, a -https://en.wikipedia.org/wiki/Parser_combinator[__parser__,window=blank_] -(or __compound__) is a rule which accepts as parameters one +https://en.wikipedia.org/wiki/Parser_combinator[__parser combinator__,window=blank_] +(or __compound rule__) is a rule which accepts as parameters one or more rules and combines them to form a higher order algorithm. In this section we introduce the compound rules provided by the library, and how they may be used to express more complex grammars. -=== Tuple Rule +== Tuple Rule Consider the following grammar: @@ -37,9 +37,10 @@ or more specified rules in sequence. The folllowing defines a sequence using some character literals and two decimal octets, which is a fancy way of saying a number between 0 and 255: +// code_grammar_3_1 [source,cpp] ---- -// code_grammar_3_1 +constexpr auto version_rule = tuple_rule( delim_rule( 'v' ), dec_octet_rule, delim_rule( '.' ), dec_octet_rule ); ---- @@ -48,22 +49,26 @@ to the value type of each rule specified upon construction. The decimal octets are represented by the `dec_octet_rule` which stores its result in an `unsigned char`: +// code_grammar_3_2 [source,cpp] ---- -// code_grammar_3_2 +system::result< std::tuple< core::string_view, unsigned char, core::string_view, unsigned char > > rv = parse( "v42.44800", version_rule ); ---- -To extract elements from `std::tuple` the function `std::get` +To extract elements from `std::tuple` the function https://en.cppreference.com/w/cpp/utility/tuple/get[`std::get`,window=blank_] must be used. In this case, we don't care to know the value for the matching character literals. The `tuple_rule` discards match results whose value type is `void`. We can use the `squelch` compound rule to convert a matching value type to `void`, and reformulate our rule: +// code_grammar_3_3 [source,cpp] ---- -// code_grammar_3_3 +constexpr auto version_rule = tuple_rule( squelch( delim_rule( 'v' ) ), dec_octet_rule, squelch( delim_rule( '.' ) ), dec_octet_rule ); + +system::result< std::tuple< unsigned char, unsigned char > > rv = parse( "v42.44800", version_rule ); ---- @@ -71,33 +76,61 @@ When all but one of the value types is `void`, the `std::tuple` is elided and the remaining value type is promoted to the result of the match: +// code_grammar_3_4 [source,cpp] ---- -// code_grammar_3_4 +// port = ":" unsigned-short + +constexpr auto port_rule = tuple_rule( squelch( delim_rule( ':' ) ), unsigned_rule< unsigned short >{} ); + +system::result< unsigned short > rv = parse( ":443", port_rule ); ---- -=== Optional Rule +== Optional Rule BNF elements in brackets denote optional components. These are expressed using `optional_rule`, whose value type is an `optional`. For example, we can adapt the port rule from above to be an optional component: +// code_grammar_3_5 [source,cpp] ---- -// code_grammar_3_5 +// port = [ ":" unsigned-short ] + +constexpr auto port_rule = optional_rule( tuple_rule( squelch( delim_rule( ':' ) ), unsigned_rule< unsigned short >{} ) ); + +system::result< boost::optional< unsigned short > > rv = parse( ":8080", port_rule ); + +assert( rv->has_value() && rv->value() == 8080 ); ---- In this example we build up a rule to represent an endpoint as an IPv4 address with an optional port: +// code_grammar_3_6 [source,cpp] ---- -// code_grammar_3_6 +// ipv4_address = dec-octet "." dec-octet "." dec-octet "." dec-octet +// +// port = ":" unsigned-short +// +// endpoint = ipv4_address [ port ] + +constexpr auto endpoint_rule = tuple_rule( + tuple_rule( + dec_octet_rule, squelch( delim_rule( '.' ) ), + dec_octet_rule, squelch( delim_rule( '.' ) ), + dec_octet_rule, squelch( delim_rule( '.' ) ), + dec_octet_rule ), + optional_rule( + tuple_rule( + squelch( delim_rule( ':' ) ), + unsigned_rule< unsigned short >{} ) ) ); ---- @@ -105,19 +138,27 @@ This can be simplified; the library provides `ipv4_address_rule` whose result type is `ipv4_address`, offering more utility than representing the address simply as a collection of four numbers: +// code_grammar_3_7 [source,cpp] ---- -// code_grammar_3_7 +constexpr auto endpoint_rule = tuple_rule( + ipv4_address_rule, + optional_rule( + tuple_rule( + squelch( delim_rule( ':' ) ), + unsigned_rule< unsigned short >{} ) ) ); + +system::result< std::tuple< ipv4_address, boost::optional< unsigned short > > > rv = parse( "192.168.0.1:443", endpoint_rule ); ---- -=== Variant Rule +== Variant Rule BNF elements separated by unquoted slashes represent a set of alternatives from which one element may match. We represent -them using `variant_rule`, whose value type is a `variant`. +them using `variant_rule`, whose value type is a variant. Consider the following HTTP production rule which comes from https://datatracker.ietf.org/doc/html/rfc7230#section-5.3"[rfc7230,window=blank_]: @@ -135,9 +176,16 @@ define the rule, using `origin_form_rule`, `absolute_uri_rule`, and `authority_rule` which come with the library, and obtain a result from parsing a string: +// code_grammar_3_8 [source,cpp] ---- -// code_grammar_3_8 +constexpr auto request_target_rule = variant_rule( + origin_form_rule, + absolute_uri_rule, + authority_rule, + delim_rule('*') ); + +system::result< variant2::variant< url_view, url_view, authority_view, core::string_view > > rv = parse( "/results.htm?page=4", request_target_rule ); ---- diff --git a/doc/modules/ROOT/pages/grammar/index.adoc b/doc/modules/ROOT/pages/grammar/index.adoc index e25f8953..b033c9ab 100644 --- a/doc/modules/ROOT/pages/grammar/index.adoc +++ b/doc/modules/ROOT/pages/grammar/index.adoc @@ -8,7 +8,8 @@ // -// [section:grammar Customization] +[#grammar] += Customization For a wide range of applications the library's container interfaces are sufficient for URLs using the generic syntax or the well known @@ -17,7 +18,7 @@ to go beyond what the library offers: * Create new custom containers for other schemes * Incorporate the parsing of URLs in an enclosing grammar -* Parse `rfc3986` elements in non-URL contexts +* Parse https://tools.ietf.org/html/rfc3986[rfc3986,window=blank_] elements in non-URL contexts (`authority_view` is an example of this). * Define new ABNF rules used to parse non-URL strings @@ -49,22 +50,26 @@ understanding of this notation is necessary to achieve best results for learning how to use the custom parsing features. -// [note -// Code samples and identifiers in this customization -// section are written as if the following declarations -// are in effect: -// -// ``` -// #include -// -// using namespace ::boost::urls::grammar; -// ``` -// ] +[NOTE] +==== +Code samples and identifiers in this customization +section are written as if the following declarations +are in effect: + +[source,cpp] +---- +#include + +using namespace ::boost::urls::grammar; +---- + +==== + + + + + + -// [include 4.1.rules.qbk] -// [include 4.2.charset.qbk] -// [include 4.3.combinators.qbk] -// [include 4.4.range.qbk] -// [include 4.5.rfc3986.qbk] diff --git a/doc/modules/ROOT/pages/grammar/range.adoc b/doc/modules/ROOT/pages/grammar/range.adoc index a6734f0e..6d7a9c78 100644 --- a/doc/modules/ROOT/pages/grammar/range.adoc +++ b/doc/modules/ROOT/pages/grammar/range.adoc @@ -8,7 +8,7 @@ // -== Ranges += Ranges @@ -31,9 +31,11 @@ rule allowing for a prescribed number of repetitions of a specified rule. The following rule matches the grammar for __chunk-ext__ defined above: +// code_grammar_4_1 [source,cpp] ---- -// code_grammar_4_1 +constexpr auto chunk_ext_rule = range_rule( + tuple_rule( squelch( delim_rule( ';' ) ), token_rule( alnum_chars ) ) ); ---- @@ -43,9 +45,13 @@ case, the type is `string_view` because the tuple has one unsquelched element, the `token_rule`. The range can be iterated to produce results, without allocating memory for each element. The following code: +// code_grammar_4_2 [source,cpp] ---- -// code_grammar_4_2 +system::result< range< core::string_view > > rv = parse( ";johndoe;janedoe;end", chunk_ext_rule ); + +for( auto s : rv.value() ) + std::cout << s << "\n"; ---- @@ -77,17 +83,25 @@ the minimum number of repetitions, or both the minimum and maximum number of repetitions. Since our list may not be empty, the following rule perfectly captures the __token-list__ grammar: +// code_grammar_4_3 [source,cpp] ---- -// code_grammar_4_3 +constexpr auto token_list_rule = range_rule( + token_rule( alnum_chars ), + tuple_rule( squelch( delim_rule( ',' ) ), token_rule( alnum_chars ) ), + 1 ); ---- The following code: +// code_grammar_4_4 [source,cpp] ---- -// code_grammar_4_4 +system::result< range< core::string_view > > rv = parse( "johndoe,janedoe,end", token_list_rule ); + +for( auto s : rv.value() ) + std::cout << s << "\n"; ---- @@ -102,68 +116,71 @@ end In the next section we discuss the available rules -which are specific to `rfc3986`. +which are specific to https://tools.ietf.org/html/rfc3986[rfc3986,window=blank_]. -=== More +== More These are the rules and compound rules provided by the library. For more details please see the corresponding reference sections. -// [table Grammar Symbols [ -// [Name] -// [Description] -// ][ -// [__dec_octet_rule__] -// [ -// Match an integer from 0 and 255. -// ] -// ][ -// [__delim_rule__] -// [ -// Match a character literal. -// ] -// ][ -// [__literal_rule__] -// [ -// Match a character string exactly. -// ] -// ][ -// [__not_empty_rule__] -// [ -// Make a matching empty string into an error instead. -// ] -// ][ -// [__optional_rule__] -// [ -// Ignore a rule if parsing fails, leaving -// the input pointer unchanged. -// ] -// ][ -// [__range_rule__] -// [ -// Match a repeating number of elements. -// ] -// ][ -// [__token_rule__] -// [ -// Match a string of characters from a character set. -// ] -// ][ -// [__tuple_rule__] -// [ -// Match a sequence of specified rules, in order. -// ] -// ][ -// [__unsigned_rule__] -// [ -// Match an unsigned integer in decimal form. -// ] -// ][ -// [__variant_rule__] -// [ -// Match one of a set of alternatives specified by rules. -// ] -// ]] +[cols="a,a"] +|=== +// Headers +|Name|Description + +// Row 1, Column 1 +|`dec_octet_rule` +// Row 1, Column 2 +|Match an integer from 0 and 255. + +// Row 2, Column 1 +|`delim_rule` +// Row 2, Column 2 +|Match a character literal. + +// Row 3, Column 1 +|`literal_rule` +// Row 3, Column 2 +|Match a character string exactly. + +// Row 4, Column 1 +|`not_empty_rule` +// Row 4, Column 2 +|Make a matching empty string into an error instead. + +// Row 5, Column 1 +|`optional_rule` +// Row 5, Column 2 +|Ignore a rule if parsing fails, leaving + the input pointer unchanged. + +// Row 6, Column 1 +|`range_rule` +// Row 6, Column 2 +|Match a repeating number of elements. + +// Row 7, Column 1 +|`token_rule` +// Row 7, Column 2 +|Match a string of characters from a character set. + +// Row 8, Column 1 +|`tuple_rule` +// Row 8, Column 2 +|Match a sequence of specified rules, in order. + +// Row 9, Column 1 +|`unsigned_rule` +// Row 9, Column 2 +|Match an unsigned integer in decimal form. + +// Row 10, Column 1 +|`variant_rule` +// Row 10, Column 2 +|Match one of a set of alternatives specified by rules. + +|=== + diff --git a/doc/modules/ROOT/pages/grammar/rfc3986.adoc b/doc/modules/ROOT/pages/grammar/rfc3986.adoc index 10aa5270..7001a4ad 100644 --- a/doc/modules/ROOT/pages/grammar/rfc3986.adoc +++ b/doc/modules/ROOT/pages/grammar/rfc3986.adoc @@ -8,7 +8,7 @@ // -== RFC 3986 += RFC 3986 Functions like `parse_uri` are sufficient for converting URLs but they require that the entire string is consumed. When URLs appear as @@ -16,9 +16,9 @@ components of a larger grammar, it is desired to use composition of rules based parsing to process these along with other elements potentially unrelated to resource locators. To achieve this, the library provides rules for the top-level BNF productions found -in `rfc3986` and a rule for matching percent-encoded strings. +in https://tools.ietf.org/html/rfc3986[rfc3986,window=blank_] and a rule for matching percent-encoded strings. -=== Percent Encoding +== Percent Encoding The percent-encoding mechanism is used to represent a data octet in a component when the corresponding character is outside the @@ -58,56 +58,63 @@ equality and comparison to unencoded strings, without allocating memory. In the example below we parse the string `s` as a series of zero or more `pchars`: +// code_grammar_5_1 [source,cpp] ---- -// code_grammar_5_1 +system::result< pct_string_view > rv = parse( s, pct_encoded_rule( pchars ) ); ---- These constants are used and provided by the library to specify rules for percent-encoded URL components: -// [table URL Character Sets [ -// [Name] -// [BNF] -// ][ -// [[link url.ref.boost__urls__gen_delim_chars `gen_delim_chars`]] -// [ -// ``` -// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" -// ``` -// ] -// ][ -// [[link url.ref.boost__urls__pchars `pchars`]] -// [ -// ``` -// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" -// ``` -// ] -// ][ -// [[link url.ref.boost__urls__reserved_chars `reserved_chars`]] -// [ -// (everything but -// [link url.ref.boost__urls__unreserved_chars `unreserved_chars`]) -// ] -// ][ -// [[link url.ref.boost__urls__sub_delim_chars `sub_delim_chars`]] -// [ -// ``` -// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" -// / "*" / "+" / "," / ";" / "=" -// ``` -// ] -// ][ -// [[link url.ref.boost__urls__unreserved_chars `unreserved_chars`]] -// [ -// ``` -// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" -// ``` -// ] -// ]] +[cols="a,a"] +|=== +// Headers +|Name|BNF -=== URL Rules +// Row 1, Column 1 +|`gen_delim_chars` +// Row 1, Column 2 +|[source] +---- +gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +---- + +// Row 2, Column 1 +|`pchars` +// Row 2, Column 2 +|[source] +---- +pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +---- + +// Row 3, Column 1 +|`reserved_chars` +// Row 3, Column 2 +|(everything but `unreserved_chars`) + +// Row 4, Column 1 +|`sub_delim_chars` +// Row 4, Column 2 +|[source] +---- +sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" +---- + +// Row 5, Column 1 +|`unreserved_chars` +// Row 5, Column 2 +|[source] +---- +unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +---- + +|=== + + +== URL Rules When a URL can appear in the context of a larger grammar, it may be desired to express the enclosing grammar in a single rule that @@ -119,67 +126,90 @@ elements. Here we present a rule suitable for parsing the the HTTP https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.1[__request-line__,window=blank_]: +// code_grammar_5_2 [source,cpp] ---- -// code_grammar_5_2 +// request-line = method SP request-target SP HTTP-version CRLF + +constexpr auto request_line_rule = tuple_rule( + not_empty_rule( token_rule( alpha_chars ) ), // method + squelch( delim_rule( ' ' ) ), // SP + variant_rule( + absolute_uri_rule, // absolute-uri or + relative_ref_rule), // relative-ref + squelch( delim_rule( ' ' ) ), + squelch( literal_rule( "HTTP/" ) ), // "HTTP/" + delim_rule( digit_chars ), // DIGIT + squelch( delim_rule( '.' ) ), // "." + delim_rule( digit_chars ), // DIGIT + squelch( literal_rule( "\r\n" ) ) ); // CRLF ---- The library offers these rules to allow custom rule definitions to integrate the various styles of valid URL rules: -// [table RFC3986 Rules [ -// [Name] -// [BNF] -// ][ -// [[link url.ref.boost__urls__absolute_uri_rule `absolute_uri_rule`]] -// [ -// ``` -// absolute-URI = scheme ":" hier-part [ "?" query ] -// -// hier-part = "//" authority path-abempty -// / path-absolute -// / path-rootless -// / path-empty -// ``` -// ] -// ][ -// [[link url.ref.boost__urls__authority_rule `authority_rule`]] -// [ -// ``` -// authority = [ userinfo "@" ] host [ ":" port ] -// ``` -// ] -// ][ -// [[link url.ref.boost__urls__origin_form_rule `origin_form_rule`]] -// [ -// ``` -// origin-form = absolute-path [ "?" query ] -// -// absolute-path = 1*( "/" segment ) -// ``` -// ] -// ][ -// [[link url.ref.boost__urls__relative_ref_rule `relative_ref_rule`]] -// [ -// ``` -// relative-ref = relative-part [ "?" query ] [ "#" fragment ] -// ``` -// ] -// ][ -// [[link url.ref.boost__urls__uri_reference_rule `uri_reference_rule`]] -// [ -// ``` -// URI-reference = URI / relative-ref -// ``` -// ] -// ][ -// [[link url.ref.boost__urls__uri_rule `uri_rule`]] -// [ -// ``` -// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] -// ``` -// ] -// ]] +[cols="a,a"] +|=== +// Headers +|Name|BNF + +// Row 1, Column 1 +|`absolute_uri_rule` +// Row 1, Column 2 +|[source] +---- +absolute-URI = scheme ":" hier-part [ "?" query ] + +hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty +---- + +// Row 2, Column 1 +|`authority_rule` +// Row 2, Column 3 +|[source,cpp] +---- +authority = [ userinfo "@" ] host [ ":" port ] +---- + +// Row 3, Column 1 +|`origin_form_rule` +// Row 3, Column 2 +|[source] +---- +origin-form = absolute-path [ "?" query ] + +absolute-path = 1*( "/" segment ) +---- + +// Row 4, Column 1 +|`relative_ref_rule` +// Row 4, Column 2 +|[source] +---- +relative-ref = relative-part [ "?" query ] [ "#" fragment ] +---- + +// Row 5, Column 1 +|`uri_reference_rule` +// Row 5, Column 2 +|[source,cpp] +---- +URI-reference = URI / relative-ref +---- + +// Row 6, Column 1 +|`uri_rule` +// Row 6, Column 2 +|[source,cpp] +---- +URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +---- + +|=== + diff --git a/doc/modules/ROOT/pages/grammar/rules.adoc b/doc/modules/ROOT/pages/grammar/rules.adoc index 268c41b9..7a24252a 100644 --- a/doc/modules/ROOT/pages/grammar/rules.adoc +++ b/doc/modules/ROOT/pages/grammar/rules.adoc @@ -8,9 +8,9 @@ // -== Parse Rules += Parse Rules -A `Rule` is an object which tries to match the beginning of +A __Rule__ is an object which tries to match the beginning of an input character buffer against a particular syntax. It returns a `result` containing a value if the match was successful, or an `error_code` if the match failed. @@ -24,19 +24,38 @@ parsed sequentially: [source,cpp] ---- -// code_grammar_1_1 +template< class Rule > +auto parse( core::string_view s, Rule const& r) -> system::result< typename Rule::value_type >; + +template< class Rule > +auto parse( char const *& it, char const* end, Rule const& r) -> system::result< typename Rule::value_type >; ---- -To satisfy the `Rule` concept, a `class` or `struct` must declare +To satisfy the __Rule__ concept, a `class` or `struct` must declare the nested type `value_type` indicating the type of value returned upon success, and a `const` member function `parse` with a prescribed signature. In the following code we define a rule that matches a single comma: +// code_grammar_1_2 [source,cpp] ---- -// code_grammar_1_2 +struct comma_rule_t +{ + // The type of value returned upon success + using value_type = core::string_view; + + // The algorithm which checks for a match + system::result< value_type > + parse( char const*& it, char const* end ) const + { + if( it != end && *it == ',') + return core::string_view( it++, 1 ); + + return error::mismatch; + } +}; ---- @@ -44,18 +63,22 @@ Since rules are passed by value, we declare a `constexpr` variable of the type for syntactical convenience. Variable names for rules are usually suffixed with `_rule`: +// code_grammar_1_3 [source,cpp] ---- -// code_grammar_1_3 +constexpr comma_rule_t comma_rule{}; ---- Now we can call `parse` with the string of input and the rule variable thusly: +// code_grammar_1_4 [source,cpp] ---- -// code_grammar_1_4 +system::result< core::string_view > rv = parse( ",", comma_rule ); + +assert( rv.has_value() && rv.value() == "," ); ---- @@ -65,9 +88,10 @@ an unsigned decimal integer. Here we construct the rule at run time and specify the type of unsigned integer used to hold the result with a template parameter: +// code_grammar_1_5 [source,cpp] ---- -// code_grammar_1_5 +system::result< unsigned short > rv = parse( "16384", unsigned_rule< unsigned short >{} ); ---- @@ -76,13 +100,14 @@ passed character literal. This is a more general version of the comma rule which we defined earlier. There is also an overload which matches exactly one character from a character set. +// code_grammar_1_6 [source,cpp] ---- -// code_grammar_1_6 +system::result< core::string_view > rv = parse( ",", delim_rule(',') ); ---- -=== Error Handling +== Error Handling When a rule fails to match, or if the rule detects a unrecoverable problem with the input, it returns a @@ -97,7 +122,7 @@ in the input, or to the `end` pointer if all input was consumed. It is the responsibilty of library and user-defined -implementations of __compound__ (explained later) +implementations of __compound rules__ (explained later) to rewind their internal pointer if a parsing operation was unsuccessful, and they wish to attempt parsing the same input using a different rule. diff --git a/doc/modules/ROOT/pages/index.adoc b/doc/modules/ROOT/pages/index.adoc index 8883a379..05955dfa 100644 --- a/doc/modules/ROOT/pages/index.adoc +++ b/doc/modules/ROOT/pages/index.adoc @@ -7,16 +7,12 @@ // Official repository: https://github.com/boostorg/url // - -== Boost.URL - - - += Boost.URL Boost.URL is a portable C++ library which provides containers and algorithms which model a "URL," more formally described using the https://datatracker.ietf.org/doc/html/rfc3986[Uniform Resource Identifier (URI),window=blank_] -specification (henceforth referred to as `rfc3986`). A URL is a compact sequence +specification (henceforth referred to as https://tools.ietf.org/html/rfc3986[rfc3986,window=blank_]). A URL is a compact sequence of characters that identifies an abstract or physical resource. For example, this is a valid URL: @@ -31,7 +27,7 @@ This library understands the grammars related to URLs and provides functionality to validate, parse, examine, and modify urls, and apply normalization or resolution algorithms. -=== Features +== Features While the library is general purpose, special care has been taken to ensure that the implementation and data representation are friendly to network @@ -47,23 +43,25 @@ Boost.URL offers these features: * C++11 as only requirement * Fast compilation, few templates -* Strict compliance with `rfc3986` +* Strict compliance with https://tools.ietf.org/html/rfc3986[rfc3986,window=blank_] * Containers that maintain valid URLs * Parsing algorithms that work without exceptions * Control over storage and allocation for URLs * Support for `-fno-exceptions`, detected automatically * Features that work well on embedded devices -// [note -// Currently the library does not handle -// [@https://www.rfc-editor.org/rfc/rfc3987.html Internationalized Resource Identifiers] (IRIs). -// These are different from URLs, come from Unicode strings instead of -// low-ASCII strings, and are covered by a separate specification. -// ] +[NOTE] +==== +Currently the library does not handle +https://www.rfc-editor.org/rfc/rfc3987.html[Internationalized Resource Identifiers,window=blank_] (IRIs). +These are different from URLs, come from Unicode strings instead of +low-ASCII strings, and are covered by a separate specification. +==== -=== Requirements + +== Requirements The library requires a compiler supporting at least C++11. @@ -79,7 +77,7 @@ desired. -=== Tested Compilers +== Tested Compilers Boost.URL has been tested with the following compilers: @@ -91,7 +89,7 @@ and these architectures: x86, x64, ARM64, S390x. We do not test and support gcc 8.0.1. -=== Quality Assurance +== Quality Assurance The development infrastructure for the library includes these per-commit analyses: @@ -102,7 +100,7 @@ these per-commit analyses: -=== Nomenclature +== Nomenclature Various names have been used historically to refer to different flavors of resource identifiers, including __URI__, __URL__, __URN__, @@ -110,13 +108,13 @@ and even __IRI__. Over time, the distinction between URIs and URLs has disappeared when discussed in technical documents and informal works. In this library we use the term **URL** to refer to all strings which are valid according to the -top-level grammar rules found in `rfc3986`. +top-level grammar rules found in https://tools.ietf.org/html/rfc3986[rfc3986,window=blank_]. -=== ABNF +== ABNF This documentation uses the Augmented https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form[Backus-Naur Form,window=blank_] @@ -131,7 +129,7 @@ the library. -=== Acknowledgments +== Acknowledgments This library wouldn't be where it is today without the help of https://github.com/pdimov[Peter Dimov,window=blank_] @@ -139,6 +137,6 @@ for design advice and general assistance. -// [include 2.0.quicklook.qbk] + diff --git a/doc/modules/ROOT/pages/quicklook.adoc b/doc/modules/ROOT/pages/quicklook.adoc index 6b901f24..21fdff94 100644 --- a/doc/modules/ROOT/pages/quicklook.adoc +++ b/doc/modules/ROOT/pages/quicklook.adoc @@ -8,19 +8,26 @@ // -== Quick Look += Quick Look This section is intended to give the reader a brief overview of the features and interface style of the library. -=== Integration +== Integration + +[NOTE] +==== +Sample code and identifiers used throughout are written as if +the following declarations are in effect: + +[source,cpp] +---- +#include +using namespace boost::urls; +---- + +==== -// [note -// Sample code and identifiers used throughout are written as if -// the following declarations are in effect: -// -// [snippet_headers_3] -// ] We begin by including the library header file which brings all the symbols into scope. @@ -28,7 +35,7 @@ scope. [source,cpp] ---- -// snippet_headers_1 +#include ---- @@ -41,14 +48,15 @@ You must install binaries in a location that can be found by your linker. If you followed the http://www.boost.org/doc/libs/release/more/getting_started/index.html[Boost Getting Started,window=blank_] instructions, that's already been done for you. -=== Parsing +== Parsing Say you have the following URL that you want to parse: +// code_urls_parsing_1 [source,cpp] ---- -// code_urls_parsing_1 +boost::core::string_view s = "https://user:pass@example.com:443/path/to/my%2dfile.txt?id=42&name=John%20Doe+Jingleheimer%2DSchmidt#page%20anchor"; ---- @@ -59,9 +67,10 @@ The library namespace includes the aliases `string_view`, `error_code`, and You can parse the string by calling this function: +// code_urls_parsing_2 [source,cpp] ---- -// code_urls_parsing_2 +boost::system::result r = parse_uri( s ); ---- @@ -71,17 +80,19 @@ A number of functions are available to parse different types of URL. We can immediately call `result::value` to obtain a `url_view`. +// snippet_parsing_3 [source,cpp] ---- -// snippet_parsing_3 +url_view u = r.value(); ---- Or simply +// snippet_parsing_4 [source,cpp] ---- -// snippet_parsing_4 +url_view u = *r; ---- @@ -95,52 +106,85 @@ https://www.boost.org/doc/libs/1_83_0//libs/system/doc/html/system.html#ref_quer https://www.boost.org/doc/libs/1_83_0//libs/system/doc/html/system.html#ref_queries[`result::has_error`,window=blank_] could also be used to check if the string has been parsed without errors. -// [note -// It is worth noting that __parse_uri__ does not allocate any memory dynamically. -// Like a __string_view__, a __url_view__ does not retain ownership of the underlying -// string buffer. -// -// As long as the contents of the original string are unmodified, constructed -// URL views always contain a valid URL in its correctly serialized form. -// -// If the input does not match the URL grammar, an error code -// is reported through __result__ rather than exceptions. -// Exceptions only thrown on excessive input length. -// ] +[NOTE] +==== +It is worth noting that `parse_uri` does not allocate any memory dynamically. +Like a `string_view`, a `url_view` does not retain ownership of the underlying +string buffer. -=== Accessing +As long as the contents of the original string are unmodified, constructed +URL views always contain a valid URL in its correctly serialized form. + +If the input does not match the URL grammar, an error code +is reported through `result` rather than exceptions. +Exceptions only thrown on excessive input length. +==== + + +== Accessing Accessing the parts of the URL is easy: +// snippet_accessing_1 [source,cpp] ---- -// snippet_accessing_1 +url_view u( "https://user:pass@example.com:443/path/to/my%2dfile.txt?id=42&name=John%20Doe+Jingleheimer%2DSchmidt#page%20anchor" ); +assert(u.scheme() == "https"); +assert(u.authority().buffer() == "user:pass@example.com:443"); +assert(u.userinfo() == "user:pass"); +assert(u.user() == "user"); +assert(u.password() == "pass"); +assert(u.host() == "example.com"); +assert(u.port() == "443"); +assert(u.path() == "/path/to/my-file.txt"); +assert(u.query() == "id=42&name=John Doe Jingleheimer-Schmidt"); +assert(u.fragment() == "page anchor"); ---- URL paths can be further divided into path segments with the function `url_view::segments`. Although URL query strings are often used to represent key/value pairs, this -interpretation is not defined by `rfc3986`. +interpretation is not defined by https://tools.ietf.org/html/rfc3986[rfc3986,window=blank_]. Users can treat the query as a single entity. `url_view` provides the function `url_view::params` to extract this view of key/value pairs. -// [table [[Code][Output]] [[ -// [c++] -// [snippet_accessing_1b] -// ][ -// [teletype] -// ``` -// path -// to -// my-file.txt -// -// id: 42 -// name: John Doe Jingleheimer-Schmidt -// ``` -// ]]] +[cols="a,a"] +|=== +// Headers +|Code|Output + +// Row 1, Column 1 +| +// snippet_accessing_1b +[source,cpp] +---- +for (auto seg: u.segments()) + std::cout << seg << "\n"; +std::cout << "\n"; + +for (auto param: u.params()) + std::cout << param.key << ": " << param.value << "\n"; +std::cout << "\n"; +---- + +// Row 1, Column 2 +| +[source] +---- +path +to +my-file.txt + +id: 42 +name: John Doe Jingleheimer-Schmidt +---- + + +|=== + These functions return views referring to substrings and sub-ranges of the underlying URL. @@ -148,33 +192,44 @@ By simply referencing the relevant portion of the URL string internally, its components can represent percent-decoded strings and be converted to other types without any previous memory allocation. +// snippet_token_1 [source,cpp] ---- -// snippet_token_1 +std::string h = u.host(); +assert(h == "example.com"); ---- A special `string_token` type can also be used to specify how a portion of the URL should be encoded and returned. +// snippet_token_2 [source,cpp] ---- -// snippet_token_2 +std::string h = "host: "; +u.host(string_token::append_to(h)); +assert(h == "host: example.com"); ---- These functions might also return empty strings +// snippet_accessing_2a [source,cpp] ---- -// snippet_accessing_2a +url_view u1 = parse_uri( "http://www.example.com" ).value(); +assert(u1.fragment().empty()); +assert(!u1.has_fragment()); ---- for both empty and absent components +// snippet_accessing_2b [source,cpp] ---- -// snippet_accessing_2b +url_view u2 = parse_uri( "http://www.example.com/#" ).value(); +assert(u2.fragment().empty()); +assert(u2.has_fragment()); ---- @@ -187,40 +242,80 @@ When applicable, the encoded components can also be directly accessed through a `string_view` without any need to allocate memory: -// [table [[Code][Output]] [[ -// [c++] -// [snippet_accessing_4] -// ][ -// [teletype] -// ``` -// url : https://user:pass@example.com:443/path/to/my%2dfile.txt?id=42&name=John%20Doe+Jingleheimer%2DSchmidt#page%20anchor -// scheme : https -// authority : user:pass@example.com:443 -// userinfo : user:pass -// user : user -// password : pass -// host : example.com -// port : 443 -// path : /path/to/my%2dfile.txt -// query : id=42&name=John%20Doe+Jingleheimer%2DSchmidt -// fragment : page%20anchor -// ``` -// ]]] +[cols="a,a"] +|=== +// Headers +|Code|Output -=== Percent-Encoding +// Row 1, Column 1 +| +// snippet_accessing_4 +[source,cpp] +---- +std::cout << + "url : " << u << "\n" + "scheme : " << u.scheme() << "\n" + "authority : " << u.encoded_authority() << "\n" + "userinfo : " << u.encoded_userinfo() << "\n" + "user : " << u.encoded_user() << "\n" + "password : " << u.encoded_password() << "\n" + "host : " << u.encoded_host() << "\n" + "port : " << u.port() << "\n" + "path : " << u.encoded_path() << "\n" + "query : " << u.encoded_query() << "\n" + "fragment : " << u.encoded_fragment() << "\n"; +---- + +// Row 1, Column 2 +| +[source] +---- +url : https://user:pass@example.com:443/path/to/my%2dfile.txt?id=42&name=John%20Doe+Jingleheimer%2DSchmidt#page%20anchor +scheme : https +authority : user:pass@example.com:443 +userinfo : user:pass +user : user +password : pass +host : example.com +port : 443 +path : /path/to/my%2dfile.txt +query : id=42&name=John%20Doe+Jingleheimer%2DSchmidt +fragment : page%20anchor +---- + + +|=== + + +== Percent-Encoding An instance of `decode_view` provides a number of functions to persist a decoded string: -// [table [[Code][Output]] [[ -// [c++] -// [snippet_decoding_1] -// ][ -// [teletype] -// ``` -// id=42&name=John Doe Jingleheimer-Schmidt -// ``` -// ]]] +[cols="a,a"] +|=== +// Headers +|Code|Output + +// Row 1, Column 1 +| +// snippet_decoding_1 +[source,cpp] +---- +decode_view dv("id=42&name=John%20Doe%20Jingleheimer%2DSchmidt"); +std::cout << dv << "\n"; +---- + +// Row 1, Column 2 +| +[source] +---- +id=42&name=John Doe Jingleheimer-Schmidt +---- + + +|=== + `decode_view` and its decoding functions are designed to perform no memory allocations unless the algorithm where its being @@ -243,15 +338,32 @@ If `u2.host()` returned a value type, then two memory allocations would be necessary for this operation. Another common use case is converting URL path segments into filesystem paths: -// [table [[Code][Output]] [[ -// [c++] -// [snippet_decoding_3] -// ][ -// [teletype] -// ``` -// path: "path/to/my-file.txt" -// ``` -// ]]] +[cols="a,a"] +|=== +// Headers +|Code|Output + +// Row 1, Column 1 +| +// snippet_decoding_3 +[source,cpp] +---- +boost::filesystem::path p; +for (auto seg: u.segments()) + p.append(seg.begin(), seg.end()); +std::cout << "path: " << p << "\n"; +---- + +// Row 1, Column 2 +| +[source] +---- +path: "path/to/my-file.txt" +---- + + +|=== + In this example, only the internal allocations of `filesystem::path` need to happen. In many common @@ -260,9 +372,21 @@ such as finding the appropriate route for a URL in a web server: +// snippet_decoding_4a [source,cpp] ---- -// snippet_decoding_4a +auto match = []( + std::vector const& route, + url_view u) +{ + auto segs = u.segments(); + if (route.size() != segs.size()) + return false; + return std::equal( + route.begin(), + route.end(), + segs.begin()); +}; ---- @@ -270,14 +394,20 @@ This allows us to easily match files in the document root directory of a web server: +// snippet_decoding_4b [source,cpp] ---- -// snippet_decoding_4b +std::vector route = + {"community", "reviews.html"}; +if (match(route, u)) +{ + handle_route(route, u); +} ---- // [#compound-elements] -=== Compound elements +== Compound elements The path and query parts of the URL are treated specially by the library. While they can be accessed as individual encoded strings, they can also be @@ -286,16 +416,34 @@ accessed through special view types. This code calls `encoded_segments` to obtain the path segments as a container that returns encoded strings: -// [table [[Code][Output]] [[ -// [c++] -// [snippet_compound_elements_1] -// ][ -// ``` -// path -// to -// my-file.txt -// ``` -// ]]] +[cols="a,a"] +|=== +// Headers +|Code|Output + +// Row 1, Column 1 +| +// snippet_compound_elements_1 +[source,cpp] +---- +segments_encoded_view segs = u.encoded_segments(); +for( auto v : segs ) +{ + std::cout << v << "\n"; +} +---- + +// Row 1, Column 2 +|[source,cpp] +---- +path +to +my-file.txt +---- + + +|=== + As with other `url_view` functions which return encoded strings, the encoded segments container does not allocate memory. Instead it returns views to the @@ -304,30 +452,68 @@ corresponding portions of the underlying encoded buffer referenced by the URL. As with other library functions, `decode_view` permits accessing elements of composed elements while avoiding memory allocations entirely: -// [table [[Code][Output]] [[ -// [c++] -// [snippet_encoded_compound_elements_1] -// ][ -// [teletype] -// ``` -// path -// to -// my-file.txt -// ``` -// ]][[ -// [c++] -// [snippet_encoded_compound_elements_2] -// ][ -// [teletype] -// ``` -// key = id, value = 42 -// key = name, value = John Doe -// ``` -// ]]] +[cols="a,a"] +|=== +// Headers +|Code|Output + +// Row 1, Column 1 +| +// snippet_encoded_compound_elements_1 +[source,cpp] +---- +segments_encoded_view segs = u.encoded_segments(); + +for( pct_string_view v : segs ) +{ + decode_view dv = *v; + std::cout << dv << "\n"; +} +---- + +// Row 1, Column 2 +| +[source] +---- +path +to +my-file.txt +---- + + +// Row 2, Column 1 +| + // snippet_encoded_compound_elements_2 +[source,cpp] +---- +params_encoded_view params_ref = u.encoded_params(); + +for( auto v : params_ref ) +{ + decode_view dk(v.key); + decode_view dv(v.value); + + std::cout << + "key = " << dk << + ", value = " << dv << "\n"; +} +---- + +// Row 2, Column 2 +| +[source] +---- +key = id, value = 42 +key = name, value = John Doe +---- + + +|=== -=== Modifying + +== Modifying The library provides the containers `url` and `static_url` which supporting modification of the URL contents. A `url` or `static_url` must be constructed @@ -338,9 +524,10 @@ character buffer, the `url` container uses the default allocator to control a resizable character buffer which it owns. +// snippet_quicklook_modifying_1 [source,cpp] ---- -// snippet_quicklook_modifying_1 +url u = parse_uri( s ).value(); ---- @@ -348,9 +535,10 @@ On the other hand, a `static_url` has fixed-capacity storage and does not require dynamic memory allocations. +// snippet_quicklook_modifying_1b [source,cpp] ---- -// snippet_quicklook_modifying_1b +static_url<1024> su = parse_uri( s ).value(); ---- @@ -361,17 +549,19 @@ constructible, and equality comparable. They support all the inspection function Changing the scheme is easy: +// snippet_quicklook_modifying_2 [source,cpp] ---- -// snippet_quicklook_modifying_2 +u.set_scheme( "https" ); ---- Or we can use a predefined constant: +// snippet_quicklook_modifying_3 [source,cpp] ---- -// snippet_quicklook_modifying_3 +u.set_scheme_id( scheme::https ); // equivalent to u.set_scheme( "https" ); ---- @@ -386,29 +576,62 @@ It is not possible for a `url` to hold syntactically illegal text. Modification functions return a reference to the object, so chaining is possible: -// [table [[Code][Output]] [[ -// [c++] -// [snippet_quicklook_modifying_4] -// ][ -// [teletype] -// ``` -// https://192.168.0.1:8080/path/to/my%2dfile.txt?id=42&name=John%20Doe#page%20anchor -// ``` -// ]]] +[cols="a,a"] +|=== +// Headers +|Code|Output + +// Row 1, Column 1 +| +// snippet_quicklook_modifying_4 +[source,cpp] +---- +u.set_host_ipv4( ipv4_address( "192.168.0.1" ) ) + .set_port_number( 8080 ) + .remove_userinfo(); +std::cout << u << "\n"; +---- + +// Row 1, Column 2 +| +[source] +---- +https://192.168.0.1:8080/path/to/my%2dfile.txt?id=42&name=John%20Doe#page%20anchor +---- + + +|=== + All non-const operations offer the strong exception safety guarantee. The path segment and query parameter containers returned by a `url` offer modifiable range functionality, using member functions of the container: -// [table [[Code][Output]] [[ -// [c++] -// [snippet_quicklook_modifying_5] -// ][ -// [teletype] -// ``` -// https://192.168.0.1:8080/path/to/my%2dfile.txt?id=42&name=Vinnie%20Falco#page%20anchor -// ``` -// ]]] +[cols="a,a"] +|=== +// Headers +|Code|Output + +// Row 1, Column 1 +| +// snippet_quicklook_modifying_5 +[source,cpp] +---- +params_ref p = u.params(); +p.replace(p.find("name"), {"name", "John Doe"}); +std::cout << u << "\n"; +---- + +// Row 1, Column 2 +| +[source] +---- +https://192.168.0.1:8080/path/to/my%2dfile.txt?id=42&name=Vinnie%20Falco#page%20anchor +---- + + +|=== + diff --git a/doc/modules/ROOT/pages/urls/containers.adoc b/doc/modules/ROOT/pages/urls/containers.adoc index e64cbf09..e967374b 100644 --- a/doc/modules/ROOT/pages/urls/containers.adoc +++ b/doc/modules/ROOT/pages/urls/containers.adoc @@ -10,34 +10,37 @@ -== Containers += Containers Three containers are provided for interacting with URLs: -// [table Types [ -// [Name] -// [Description] -// ][ -// [__url__] -// [ -// A valid, modifiable URL which performs dynamic memory allocation -// to store the character buffer. -// ] -// ][ -// [__url_view__] -// [ -// A read-only reference to a character buffer containing a valid URL. -// The view does not retain ownership of the underlying character buffer; -// instead, it is managed by the caller. -// ] -// ][ -// [__static_url__] -// [ -// A valid, modifiable URL which stores the character buffer -// inside the class itself. This is a class template, where -// the maximum buffer size is a non-type template parameter. -// ] -// ]] +[cols="a,a"] +|=== +// Headers +|Name|Description + +// Row 1, Column 1 +|`url` +// Row 1, Column 2 +|A valid, modifiable URL which performs dynamic memory allocation + to store the character buffer. + +// Row 2, Column 1 +|`url_view` +// Row 2, Column 2 +|A read-only reference to a character buffer containing a valid URL. + The view does not retain ownership of the underlying character buffer; + instead, it is managed by the caller. + +// Row 3, Column 1 +|`static_url` +// Row 3, Column 2 +|A valid, modifiable URL which stores the character buffer + inside the class itself. This is a class template, where + the maximum buffer size is a non-type template parameter. + +|=== + Inheritance provides the observer and modifier public members; class @@ -49,7 +52,7 @@ Although the members are public, these base classes can only be constructed by the library as needed to support the implementation. The class hierarchy looks like this: -// [$url/images/ClassHierarchy.svg] +image:ClassHierarchy.svg[] Throughout this documentation and especially below, when an observer is discussed, it is applicable to all three derived containers @@ -60,9 +63,7 @@ The tables and exposition which follow describe the available observers and modifiers, along with notes relating important behaviors or special requirements. - - -=== Scheme +== Scheme The scheme is represented as a case-insensitive string, along with an enumeration constant which acts as a numeric indentifier when the @@ -73,62 +74,72 @@ a letter. These members are used to inspect and modify the scheme: -// [table Scheme Observers [ -// [Function] -// [Return Type] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_view_base.has_scheme `has_scheme`]] -// [`bool`] -// [ -// Return `true` if a scheme is present. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.scheme `scheme`]] -// [__string_view__] -// [ -// Return the scheme as a string, or the empty string if there is no scheme. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.scheme_id `scheme_id`]] -// [[link url.ref.boost__urls__scheme `scheme`]] -// [ -// Return the scheme as an enumerated constant, the value -// [link url.ref.boost__urls__scheme `scheme::unknown`] -// if the scheme is not one of the well-known schemes, or the value -// [link url.ref.boost__urls__scheme `scheme::none`] -// if there is no scheme. -// ] -// ]] +[cols="a,a,a"] +|=== +// Headers +|Function|Return Type|Description -// [table Scheme Modifiers [ -// [Function] -// [Parameters] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_base.set_scheme `set_scheme`]] -// [__string_view__] -// [ -// Set the scheme to a string. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_scheme_id `set_scheme_id`]] -// [[link url.ref.boost__urls__scheme `scheme`]] -// [ -// Set the scheme to a well-known scheme constant. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.remove_scheme `remove_scheme`]] -// [] -// [ -// Remove the scheme if present. -// This includes the trailing colon (":"). -// ] -// ]] +// Row 1, Column 1 +|`has_scheme` +// Row 1, Column 2 +|`bool` +// Row 1, Column 3 +|Return `true` if a scheme is present. + +// Row 2, Column 1 +|`scheme` +// Row 2, Column 2 +|`string_view` +// Row 2, Column 3 +|Return the scheme as a string, or the empty string if there is no scheme. + +// Row 3, Column 1 +|`scheme_id` +// Row 3, Column 2 +|`scheme` +// Row 3, Column 3 +|Return the scheme as an enumerated constant, the value + `scheme::unknown` + if the scheme is not one of the well-known schemes, or the value + `scheme::none` + if there is no scheme. + +|=== + + +[cols="a,a,a"] +|=== +// Headers +|Function|Parameters|Description + +// Row 1, Column 1 +|`set_scheme` +// Row 1, Column 2 +|`string_view` +// Row 1, Column 3 +|Set the scheme to a string. + +// Row 2, Column 1 +|`set_scheme_id` +// Row 2, Column 2 +|`scheme` +// Row 2, Column 3 +|Set the scheme to a well-known scheme constant. + +// Row 3, Column 1 +|`remove_scheme` +// Row 3, Column 2 +| +// Row 3, Column 3 +|Remove the scheme if present. + This includes the trailing colon (":"). + +|=== -=== Authority + +== Authority The authority is an optional part whose presence is indicated by an unescaped double slash ("//") immediately following the scheme, @@ -136,62 +147,71 @@ or at the beginning if the scheme is not present. It contains three components: an optional userinfo, the host, and an optional port. The authority in this diagram has all three components: -// [$url/images/AuthorityDiagram.svg] +image::AuthorityDiagram.svg[] An empty authority, corresponding to just a zero-length host component, is distinct from the absence of an authority. These members are used to inspect and modify the authority as a whole string: -// [table Authority Observers [ -// [Function] -// [Return Type] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_view_base.has_authority `has_authority`]] -// [`bool`] -// [ -// Return `true` if an authority is present. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.authority `authority`]] -// [__authority_view__] -// [ -// Return the authority as a decoded string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_authority `encoded_authority`]] -// [__pct_string_view__] -// [ -// Return the authority as a read-only view. -// ] -// ]] +[cols="a,a,a"] +|=== +// Headers +|Function|Return Type|Description + +// Row 1, Column 1 +|`has_authority` +// Row 1, Column 2 +|`bool` +// Row 1, Column 3 +|Return `true` if an authority is present. + +// Row 2, Column 1 +|`authority` +// Row 2, Column 2 +|`authority_view` +// Row 2, Column 3 +|Return the authority as a decoded string. + +// Row 3, Column 1 +|`encoded_authority` +// Row 3, Column 2 +|`pct_string_view` +// Row 3, Column 3 +|Return the authority as a read-only view. + +|=== + + +[cols="a,a,a"] +|=== +// Headers +|Function|Parameters|Description + +// Row 1, Column 1 +|`set_encoded_authority` +// Row 1, Column 2 +|`pct_string_view` +// Row 1, Column 3 +|Set the authority to the string, which may contain percent escapes. + Reserved characters are percent-escaped automatically. + +// Row 2, Column 1 +|`remove_authority` +// Row 2, Column 2 +| +// Row 2, Column 3 +|Remove the authority if present. + This includes the leading double slash ("//"). + +|=== -// [table Authority Modifiers [ -// [Function] -// [Parameters] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_base.set_encoded_userinfo `set_encoded_authority`]] -// [__pct_string_view__] -// [ -// Set the authority to the string, which may contain percent escapes. -// Reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.remove_authority `remove_authority`]] -// [] -// [ -// Remove the authority if present. -// This includes the leading double slash ("//"). -// ] -// ]] The paragraphs and tables that follow describe how to interact with the individual parts of the authority. -=== Userinfo +== Userinfo An authority may have an optional userinfo, which consists of a user and optional password. The presence of the userinfo is indicated by an unescaped @@ -200,185 +220,218 @@ by an unescaped colon (":"). An empty password string is distinct from no password. This table shows various URLs with userinfos, and the corresponding user and password: -// [table Userinfo Examples [ -// [URL] -// [User] -// [Password] -// [Notes] -// ][ -// [`//user:pass@`] -// ["user"] -// ["pass"] -// [ -// User and password -// ] -// ][ -// [`//@`] -// [""] -// [] -// [ -// Empty user, no password -// ] -// ][ -// [`//user@`] -// ["user"] -// [] -// [ -// No password -// ] -// ][ -// [`//user:@`] -// ["user"] -// [""] -// [ -// Empty password -// ] -// ][ -// [`//:pass@`] -// [""] -// ["pass"] -// [ -// Empty user -// ] -// ][ -// [`//:@`] -// [""] -// [""] -// [ -// Empty user and password -// ] -// ]] +[cols="a,a,a,a"] +|=== +// Headers +|URL|User|Password|Notes -// [caution -// Although the specification allows the format username:password, -// the password component is deprecated and should be avoided if -// possible or otherwise used with care. It is not recommended to -// transfer password data through URLs unless it is an empty -// string indicating no password. -// ] +// Row 1, Column 1 +|`//user:pass@` +// Row 1, Column 2 +|"user" +// Row 1, Column 3 +|"pass" +// Row 1, Column 4 +|User and password + +// Row 2, Column 1 +|`//@` +// Row 2, Column 2 +|"" +// Row 2, Column 3 +| +// Row 2, Column 4 +|Empty user, no password + +// Row 3, Column 1 +|`//user@` +// Row 3, Column 2 +|"user" +// Row 3, Column 3 +| +// Row 3, Column 4 +|No password + +// Row 4, Column 1 +|`//user:@` +// Row 4, Column 2 +|"user" +// Row 4, Column 3 +|"" +// Row 4, Column 4 +|Empty password + +// Row 5, Column 1 +|`//:pass@` +// Row 5, Column 2 +|"" +// Row 5, Column 3 +|"pass" +// Row 5, Column 4 +|Empty user + +// Row 6, Column 1 +|`//:@` +// Row 6, Column 2 +|"" +// Row 6, Column 3 +|"" +// Row 6, Column 4 +|Empty user and password + +|=== + +[CAUTION] +==== +Although the specification allows the format username:password, +the password component is deprecated and should be avoided if +possible or otherwise used with care. It is not recommended to +transfer password data through URLs unless it is an empty +string indicating no password. +==== These members are used to inspect and modify the userinfo: -// [table Userinfo Observers [ -// [Function] -// [Return Type] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_view_base.has_userinfo `has_userinfo`]] -// [`bool`] -// [ -// Return `true` if a userinfo is present. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.has_password `has_password`]] -// [`bool`] -// [ -// Return `true` if a password is present. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.user `user`]] -// [__std_string__] -// [ -// Return the user as a decoded string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.password `password`]] -// [__std_string__] -// [ -// Return the password as a decoded string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.userinfo `userinfo`]] -// [__std_string__] -// [ -// Return the userinfo as a decoded string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_user `encoded_user`]] -// [__pct_string_view__] -// [ -// Return the user. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_password `encoded_password`]] -// [__pct_string_view__] -// [ -// Return the password, or an empty string if no password is present. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_userinfo `encoded_userinfo`]] -// [__pct_string_view__] -// [ -// Return the userinfo. -// ] -// ]] +[cols="a,a,a"] +|=== +// Headers +|Function|Return Type|Description -// [table Userinfo Modifiers [ -// [Function] -// [Parameters] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_base.set_user `set_user`]] -// [__string_view__] -// [ -// Set the user to the string. -// Reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_user `set_password`]] -// [__string_view__] -// [ -// Set the password to the string. -// Reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_userinfo `set_userinfo`]] -// [__string_view__] -// [ -// Set the userinfo to the string. -// Reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_encoded_user `set_encoded_user`]] -// [__pct_string_view__] -// [ -// Set the user to the string, which may contain percent escapes. -// Reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_encoded_password `set_encoded_password`]] -// [__pct_string_view__] -// [ -// Set the password to the string, which may contain percent escapes. -// Reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_encoded_userinfo `set_encoded_userinfo`]] -// [__pct_string_view__] -// [ -// Set the userinfo to the string, which may contain percent escapes. -// Reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.remove_password `remove_password`]] -// [] -// [ -// Remove the password if present. -// This includes the password separator colon (":"). -// ] -// ][ -// [[link url.ref.boost__urls__url_base.remove_userinfo `remove_userinfo`]] -// [] -// [ -// Remove the userinfo if present. -// This includes the user and password separator colon (":") -// and the trailing at sign ("@"). -// ] -// ]] +// Row 1, Column 1 +|`has_userinfo` +// Row 1, Column 2 +|`bool` +// Row 1, Column 3 +|Return `true` if a userinfo is present. + +// Row 2, Column 1 +|`has_password` +// Row 2, Column 2 +|`bool` +// Row 2, Column 3 +|Return `true` if a password is present. + +// Row 3, Column 1 +|`user` +// Row 3, Column 2 +|https://en.cppreference.com/w/cpp/string/basic_string[`std::string`,window=blank_] +// Row 3, Column 3 +|Return the user as a decoded string. + +// Row 4, Column 1 +|`password` +// Row 4, Column 2 +|https://en.cppreference.com/w/cpp/string/basic_string[`std::string`,window=blank_] +// Row 4, Column 3 +|Return the password as a decoded string. + +// Row 5, Column 1 +|`userinfo` +// Row 5, Column 2 +|https://en.cppreference.com/w/cpp/string/basic_string[`std::string`,window=blank_] +// Row 5, Column 3 +|Return the userinfo as a decoded string. + +// Row 6, Column 1 +|`encoded_user` +// Row 6, Column 2 +|`pct_string_view` +// Row 6, Column 3 +|Return the user. + +// Row 7, Column 1 +|`encoded_password` +// Row 7, Column 2 +|`pct_string_view` +// Row 7, Column 3 +|Return the password, or an empty string if no password is present. + +// Row 8, Column 1 +|`encoded_userinfo` +// Row 8, Column 2 +|`pct_string_view` +// Row 8, Column 3 +|Return the userinfo. + +|=== + + +[cols="a,a,a"] +|=== +// Headers +|Function|Parameters|Description + +// Row 1, Column 1 +|`set_user` +// Row 1, Column 2 +|`string_view` +// Row 1, Column 3 +|Set the user to the string. + Reserved characters are percent-escaped automatically. + +// Row 2, Column 1 +|`set_password` +// Row 2, Column 2 +|`string_view` +// Row 2, Column 3 +|Set the password to the string. + Reserved characters are percent-escaped automatically. + +// Row 3, Column 1 +|`set_userinfo` +// Row 3, Column 2 +|`string_view` +// Row 3, Column 3 +|Set the userinfo to the string. + Reserved characters are percent-escaped automatically. + +// Row 4, Column 1 +|`set_encoded_user` +// Row 4, Column 2 +|`pct_string_view` +// Row 4, Column 3 +|Set the user to the string, which may contain percent escapes. + Reserved characters are percent-escaped automatically. + +// Row 5, Column 1 +|`set_encoded_password` +// Row 5, Column 2 +|`pct_string_view` +// Row 5, Column 3 +|Set the password to the string, which may contain percent escapes. + Reserved characters are percent-escaped automatically. + +// Row 6, Column 1 +|`set_encoded_userinfo` +// Row 6, Column 2 +|`pct_string_view` +// Row 6, Column 3 +|Set the userinfo to the string, which may contain percent escapes. + Reserved characters are percent-escaped automatically. + +// Row 7, Column 1 +|`remove_password` +// Row 7, Column 2 +| +// Row 7, Column 3 +|Remove the password if present. + This includes the password separator colon (":"). + +// Row 8, Column 1 +|`remove_userinfo` +// Row 8, Column 2 +| +// Row 8, Column 3 +|Remove the userinfo if present. + This includes the user and password separator colon (":") + and the trailing at sign ("@"). + +|=== -=== Host + +== Host The host portion of the authority is a string which can be a host name, an IPv4 address, an IPv6 address, or an IPvFuture address depending on @@ -387,210 +440,233 @@ even if the resulting host string would be zero length. These members are used to inspect and modify the host: -// [table Host Observers [ -// [Function] -// [Return Type] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_view_base.host_type `host_type`]] -// [[link url.ref.boost__urls__host_type `host_type`]] -// [ -// Return the host type enumeration constant. -// If there is no authority, this is the value -// [link url.ref.boost__urls__host_type `host_type::none`]. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.host `host`]] -// [__std_string__] -// [ -// Return the host as a decoded string, or an empty string if there is -// no authority. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.host_address `host_address`]] -// [__std_string__] -// [ -// Return the host as a decoded string. -// If the host type is -// [link url.ref.boost__urls__host_type `host_type::ipv6`] or -// [link url.ref.boost__urls__host_type `host_type::ipvfuture`], -// the enclosing brackets are removed. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.host_name `host_name`]] -// [__std_string__] -// [ -// Return the host name as a decoded string, or the empty string if -// the host type is not -// [link url.ref.boost__urls__host_type `host_type::name`]. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.host_ipv4_address `host_ipv4_address`]] -// [__ipv4_address__] -// [ -// Return the host as an __ipv4_address__. -// If the host type is not -// [link url.ref.boost__urls__host_type `host_type::ipv4`], -// a default-constructed value is returned. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.host_ipv6_address `host_ipv6_address`]] -// [__ipv6_address__] -// [ -// Return the host as an __ipv6_address__. -// If the host type is not -// [link url.ref.boost__urls__host_type `host_type::ipv6`], -// a default-constructed value is returned. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.host_ipvfuture `host_ipvfuture`]] -// [__string_view__] -// [ -// Return the host as a string without enclosing brackets if -// the host type is -// [link url.ref.boost__urls__host_type `host_type::ipvfuture`], -// otherwise return an empty string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_host `encoded_host`]] -// [__pct_string_view__] -// [ -// Return the host, or an empty string if there is no authority. -// This includes enclosing brackets if the host type is -// [link url.ref.boost__urls__host_type `host_type::ipv6`] or -// [link url.ref.boost__urls__host_type `host_type::ipvfuture`]. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_host_address `encoded_host_address`]] -// [__pct_string_view__] -// [ -// Return the host. -// If the host type is -// [link url.ref.boost__urls__host_type `host_type::ipv6`] or -// [link url.ref.boost__urls__host_type `host_type::ipvfuture`], -// the enclosing brackets are removed. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_host_name `encoded_host_name`]] -// [__pct_string_view__] -// [ -// Return the host name as a string. If the host type is not -// [link url.ref.boost__urls__host_type `host_type::name`], -// an empty string is returned. -// ] -// ]] +[cols="a,a,a"] +|=== +// Headers +|Function|Return Type|Description -// [table Host Modifiers [ -// [Function] -// [Parameters] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_base.set_host `set_host`]] -// [__string_view__] -// [ -// Set the host to the string, depending on the contents. If -// the string is a valid IPv4 address, a valid IPv6 address -// enclosed in brackets, or a valid IPvFuture address enclosed -// in brackets then the resulting host type is -// [link url.ref.boost__urls__host_type `host_type::ipv4`], -// [link url.ref.boost__urls__host_type `host_type::ipv6`], or -// [link url.ref.boost__urls__host_type `host_type::ipvfuture`] -// respectively. Otherwise, the host type is -// [link url.ref.boost__urls__host_type `host_type::name`], and -// any reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_host_address `set_host_address`]] -// [__string_view__] -// [ -// Set the host to the string, depending on the contents. If -// the string is a valid IPv4 address, a valid IPv6 address, or -// a valid IPvFuture address then the resulting host type is -// [link url.ref.boost__urls__host_type `host_type::ipv4`], -// [link url.ref.boost__urls__host_type `host_type::ipv6`], or -// [link url.ref.boost__urls__host_type `host_type::ipvfuture`] -// respectively. Otherwise, the host type is -// [link url.ref.boost__urls__host_type `host_type::name`], and -// any reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_host_ipv4 `set_host_ipv4`]] -// [__ipv4_address__] -// [ -// Set the host to the IPv4 address. The host type is -// [link url.ref.boost__urls__host_type `host_type::ipv4`]. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_host_ipv6 `set_host_ipv6`]] -// [__ipv6_address__] -// [ -// Set the host to the IPv6 address. The host type is -// [link url.ref.boost__urls__host_type `host_type::ipv6`]. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_host_ipvfuture `set_host_ipvfuture`]] -// [__string_view__] -// [ -// Set the host to the IPvFuture address, which should not include -// square brackets. The host type is -// [link url.ref.boost__urls__host_type `host_type::ipvfuture`]. -// If the string is not a valid IPvFuture address, an exception -// is thrown. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_host_name `set_host_name`]] -// [__string_view__] -// [ -// Set the host to the string. -// Any reserved characters are percent-escaped automatically. -// The host type is -// [link url.ref.boost__urls__host_type `host_type::name`]. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_encoded_host `set_encoded_host`]] -// [__pct_string_view__] -// [ -// Set the host to the string, depending on the contents. If -// the string is a valid IPv4 address, a valid IPv6 address -// enclosed in brackets, or a valid IPvFuture address enclosed -// in brackets then the resulting host type is -// [link url.ref.boost__urls__host_type `host_type::ipv4`], -// [link url.ref.boost__urls__host_type `host_type::ipv6`], or -// [link url.ref.boost__urls__host_type `host_type::ipvfuture`] -// respectively. Otherwise, the host type is -// [link url.ref.boost__urls__host_type `host_type::name`], the -// string may contain percent escapes, and any reserved characters -// are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_encoded_host_address `set_encoded_host_address`]] -// [__pct_string_view__] -// [ -// Set the host to the string, depending on the contents. If -// the string is a valid IPv4 address, a valid IPv6 address, or -// a valid IPvFuture address then the resulting host type is -// [link url.ref.boost__urls__host_type `host_type::ipv4`], -// [link url.ref.boost__urls__host_type `host_type::ipv6`], or -// [link url.ref.boost__urls__host_type `host_type::ipvfuture`] -// respectively. Otherwise, the host type is -// [link url.ref.boost__urls__host_type `host_type::name`], the -// string may contain percent escapes, and and -// any reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_encoded_host_name `set_encoded_host_name`]] -// [__pct_string_view__] -// [ -// Set the host to the string, which may contain percent escapes. -// Any reserved characters are percent-escaped automatically. -// The host type is -// [link url.ref.boost__urls__host_type `host_type::name`]. -// ] -// ]] +// Row 1, Column 1 +|`host_type` +// Row 1, Column 2 +|`host_type` +// Row 1, Column 3 +|Return the host type enumeration constant. + If there is no authority, this is the value + `host_type::none`. + +// Row 2, Column 1 +|`host` +// Row 2, Column 2 +|https://en.cppreference.com/w/cpp/string/basic_string[`std::string`,window=blank_] +// Row 2, Column 3 +|Return the host as a decoded string, or an empty string if there is + no authority. + +// Row 3, Column 1 +|`host_address` +// Row 3, Column 2 +|https://en.cppreference.com/w/cpp/string/basic_string[`std::string`,window=blank_] +// Row 3, Column 3 +|Return the host as a decoded string. + If the host type is + `host_type::ipv6` or + `host_type::ipvfuture`, + the enclosing brackets are removed. + +// Row 4, Column 1 +|`host_name` +// Row 4, Column 2 +|https://en.cppreference.com/w/cpp/string/basic_string[`std::string`,window=blank_] +// Row 4, Column 3 +|Return the host name as a decoded string, or the empty string if + the host type is not + `host_type::name`. + +// Row 5, Column 1 +|`host_ipv4_address` +// Row 5, Column 2 +|`ipv4_address` +// Row 5, Column 3 +|Return the host as an `ipv4_address`. + If the host type is not + `host_type::ipv4`, + a default-constructed value is returned. + +// Row 6, Column 1 +|`host_ipv6_address` +// Row 6, Column 2 +|`ipv6_address` +// Row 6, Column 3 +|Return the host as an `ipv6_address`. + If the host type is not + `host_type::ipv6`, + a default-constructed value is returned. + +// Row 7, Column 1 +|`host_ipvfuture` +// Row 7, Column 2 +|`string_view` +// Row 7, Column 3 +|Return the host as a string without enclosing brackets if + the host type is + `host_type::ipvfuture`, + otherwise return an empty string. + +// Row 8, Column 1 +|`encoded_host` +// Row 8, Column 2 +|`pct_string_view` +// Row 8, Column 3 +|Return the host, or an empty string if there is no authority. + This includes enclosing brackets if the host type is + `host_type::ipv6` or + `host_type::ipvfuture`. + +// Row 9, Column 1 +|`encoded_host_address` +// Row 9, Column 2 +|`pct_string_view` +// Row 9, Column 3 +|Return the host. + If the host type is + `host_type::ipv6` or + `host_type::ipvfuture`, + the enclosing brackets are removed. + +// Row 10, Column 1 +|`encoded_host_name` +// Row 10, Column 2 +|`pct_string_view` +// Row 10, Column 3 +|Return the host name as a string. If the host type is not + `host_type::name`, + an empty string is returned. + +|=== + + +[cols="a,a,a"] +|=== +// Headers +|Function|Parameters|Description + +// Row 1, Column 1 +|`set_host` +// Row 1, Column 2 +|`string_view` +// Row 1, Column 3 +|Set the host to the string, depending on the contents. If + the string is a valid IPv4 address, a valid IPv6 address + enclosed in brackets, or a valid IPvFuture address enclosed + in brackets then the resulting host type is + `host_type::ipv4`, + `host_type::ipv6`, or + `host_type::ipvfuture` + respectively. Otherwise, the host type is + `host_type::name`, and + any reserved characters are percent-escaped automatically. + +// Row 2, Column 1 +|`set_host_address` +// Row 2, Column 2 +|`string_view` +// Row 2, Column 3 +|Set the host to the string, depending on the contents. If + the string is a valid IPv4 address, a valid IPv6 address, or + a valid IPvFuture address then the resulting host type is + `host_type::ipv4`, + `host_type::ipv6`, or + `host_type::ipvfuture` + respectively. Otherwise, the host type is + `host_type::name`, and + any reserved characters are percent-escaped automatically. + +// Row 3, Column 1 +|`set_host_ipv4` +// Row 3, Column 2 +|`ipv4_address` +// Row 3, Column 3 +|Set the host to the IPv4 address. The host type is + `host_type::ipv4`. + +// Row 4, Column 1 +|`set_host_ipv6` +// Row 4, Column 2 +|`ipv6_address` +// Row 4, Column 3 +|Set the host to the IPv6 address. The host type is + `host_type::ipv6`. + +// Row 5, Column 1 +|`set_host_ipvfuture` +// Row 5, Column 2 +|`string_view` +// Row 5, Column 3 +|Set the host to the IPvFuture address, which should not include + square brackets. The host type is + `host_type::ipvfuture`. + If the string is not a valid IPvFuture address, an exception + is thrown. + +// Row 6, Column 1 +|`set_host_name` +// Row 6, Column 2 +|`string_view` +// Row 6, Column 3 +|Set the host to the string. + Any reserved characters are percent-escaped automatically. + The host type is + `host_type::name`. + +// Row 7, Column 1 +|`set_encoded_host` +// Row 7, Column 2 +|`pct_string_view` +// Row 7, Column 3 +|Set the host to the string, depending on the contents. If + the string is a valid IPv4 address, a valid IPv6 address + enclosed in brackets, or a valid IPvFuture address enclosed + in brackets then the resulting host type is + `host_type::ipv4`, + `host_type::ipv6`, or + `host_type::ipvfuture` + respectively. Otherwise, the host type is + `host_type::name`, the + string may contain percent escapes, and any reserved characters + are percent-escaped automatically. + +// Row 8, Column 1 +|`set_encoded_host_address` +// Row 8, Column 2 +|`pct_string_view` +// Row 8, Column 3 +|Set the host to the string, depending on the contents. If + the string is a valid IPv4 address, a valid IPv6 address, or + a valid IPvFuture address then the resulting host type is + `host_type::ipv4`, + `host_type::ipv6`, or + `host_type::ipvfuture` + respectively. Otherwise, the host type is + `host_type::name`, the + string may contain percent escapes, and and + any reserved characters are percent-escaped automatically. + +// Row 9, Column 1 +|`set_encoded_host_name` +// Row 9, Column 2 +|`pct_string_view` +// Row 9, Column 3 +|Set the host to the string, which may contain percent escapes. + Any reserved characters are percent-escaped automatically. + The host type is + `host_type::name`. + +|=== -=== Port + +== Port The port is a string of digits, possibly of zero length. The presence of a port is indicated by a colon prefix (":") appearing after the host and @@ -601,61 +677,71 @@ of the integer, then it is mapped to the number zero. These members are used to inspect and modify the port: -// [table Port Observers [ -// [Function] -// [Return Type] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_view_base.has_port `has_port`]] -// [`bool`] -// [ -// Return `true` if a port is present. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.port `port`]] -// [__string_view__] -// [ -// Return the port as a string, or an empty string if there is no port. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.port_number `port_number`]] -// [`std::uint16_t`] -// [ -// Return the port as an unsigned integer. If the number would be -// greater than 65535, then zero is returned. -// ] -// ]] +[cols="a,a,a"] +|=== +// Headers +|Function|Return Type|Description -// [table Port Modifiers [ -// [Function] -// [Parameters] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_base.set_port `set_port`]] -// [__string_view__] -// [ -// Set the port to a string. -// If the string contains any character which is not a digit, -// an exception is thrown. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_port_number `set_port_number`]] -// [`std::uint16_t`] -// [ -// Set the port to a number. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.remove_port `remove_port`]] -// [] -// [ -// Remove the port if present. -// This does not remove the authority. -// ] -// ]] +// Row 1, Column 1 +|`has_port` +// Row 1, Column 2 +|`bool` +// Row 1, Column 3 +|Return `true` if a port is present. + +// Row 2, Column 1 +|`port` +// Row 2, Column 2 +|`string_view` +// Row 2, Column 3 +|Return the port as a string, or an empty string if there is no port. + +// Row 3, Column 1 +|`port_number` +// Row 3, Column 2 +|`std::uint16_t` +// Row 3, Column 3 +|Return the port as an unsigned integer. If the number would be + greater than 65535, then zero is returned. + +|=== + + +[cols="a,a,a"] +|=== +// Headers +|Function|Parameters|Description + +// Row 1, Column 1 +|`set_port` +// Row 1, Column 2 +|`string_view` +// Row 1, Column 3 +|Set the port to a string. + If the string contains any character which is not a digit, + an exception is thrown. + +// Row 2, Column 1 +|`set_port_number` +// Row 2, Column 2 +|`std::uint16_t` +// Row 2, Column 3 +|Set the port to a number. + +// Row 3, Column 1 +|`remove_port` +// Row 3, Column 2 +| +// Row 3, Column 3 +|Remove the port if present. + This does not remove the authority. + +|=== -=== Path + +== Path Depending on the scheme, the path may be treated as a string, or as a hierarchically structured sequence of segments delimited @@ -664,79 +750,93 @@ for every URL, even if it is the empty string. These members are used to inspect and modify the path: -// [table Path Observers [ -// [Function] -// [Return Type] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_view_base.is_path_absolute `is_path_absolute`]] -// [`bool`] -// [ -// Return `true` if the path starts with a forward slash ("/"). -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.path `path`]] -// [__std_string__] -// [ -// Return the path as a decoded string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_path `encoded_path`]] -// [__pct_string_view__] -// [ -// Return the path. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.segments `segments`]] -// [__segments_view__] -// [ -// Return the path as a range of decoded segments. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_segments `encoded_segments`]] -// [__segments_encoded_view__] -// [ -// Return the path as a range of segments. -// ] -// ]] +[cols="a,a,a"] +|=== +// Headers +|Function|Return Type|Description + +// Row 1, Column 1 +|`is_path_absolute` +// Row 1, Column 2 +|`bool` +// Row 1, Column 3 +|Return `true` if the path starts with a forward slash ("/"). + +// Row 2, Column 1 +|`path` +// Row 2, Column 2 +|https://en.cppreference.com/w/cpp/string/basic_string[`std::string`,window=blank_] +// Row 2, Column 3 +|Return the path as a decoded string. + +// Row 3, Column 1 +|`encoded_path` +// Row 3, Column 2 +|`pct_string_view` +// Row 3, Column 3 +|Return the path. + +// Row 4, Column 1 +|`segments` +// Row 4, Column 2 +|`segments_view` +// Row 4, Column 3 +|Return the path as a range of decoded segments. + +// Row 5, Column 1 +|`encoded_segments` +// Row 5, Column 2 +|`segments_encoded_view` +// Row 5, Column 3 +|Return the path as a range of segments. + +|=== + + +[cols="a,a,a"] +|=== +// Headers +|Function|Parameters|Description + +// Row 1, Column 1 +|`set_path` +// Row 1, Column 2 +|`string_view` +// Row 1, Column 3 +|Set the path to the string. + Reserved characters are percent-escaped automatically. + +// Row 2, Column 1 +|`set_path_absolute` +// Row 2, Column 2 +|`bool` +// Row 2, Column 3 +|Set whether the path is absolute. + +// Row 3, Column 1 +|`set_encoded_path` +// Row 3, Column 2 +|`pct_string_view` +// Row 3, Column 3 +|Set the path to the string, which may contain percent escapes. + Reserved characters are percent-escaped automatically. + +// Row 4, Column 1 +|`segments` +// Row 4, Column 2 +|`segments_ref` +// Row 4, Column 3 +|Return the path as a modifiable range of decoded segments. + +// Row 5, Column 1 +|`encoded_segments` +// Row 5, Column 2 +|`segments_encoded_ref` +// Row 5, Column 3 +|Return the path as a modifiable range of segments. + +|=== -// [table Path Modifiers [ -// [Function] -// [Parameters] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_base.set_path `set_path`]] -// [__string_view__] -// [ -// Set the path to the string. -// Reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_path_absolute `set_path_absolute`]] -// [`bool`] -// [ -// Set whether the path is absolute. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_encoded_path `set_encoded_path`]] -// [__pct_string_view__] -// [ -// Set the path to the string, which may contain percent escapes. -// Reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.segments `segments`]] -// [__segments_ref__] -// [ -// Return the path as a modifiable range of decoded segments. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.encoded_segments `encoded_segments`]] -// [__segments_encoded_ref__] -// [ -// Return the path as a modifiable range of segments. -// ] -// ]] The segments-based containers `segments_view`, `segments_ref`, @@ -745,7 +845,7 @@ are discussed in a later section. -=== Query +== Query Depending on the scheme, the query may be treated as a string, or as a structured series of key-value pairs (called "params") @@ -754,80 +854,94 @@ an empty query string is distinct from no query. These members are used to inspect and modify the query: -// [table Query Observers [ -// [Function] -// [Return Type] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_view_base.has_query `has_query`]] -// [`bool`] -// [ -// Return `true` if a query is present. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.query `query`]] -// [__std_string__] -// [ -// Return the query as a decoded string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_query `encoded_query`]] -// [__pct_string_view__] -// [ -// Return the query. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.params `params`]] -// [__params_view__] -// [ -// Return the query as a read-only range of decoded params. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_params `encoded_params`]] -// [__params_encoded_view__] -// [ -// Return the query as a read-only range of params. -// ] -// ]] +[cols="a,a,a"] +|=== +// Headers +|Function|Return Type|Description + +// Row 1, Column 1 +|`has_query` +// Row 1, Column 2 +|`bool` +// Row 1, Column 3 +|Return `true` if a query is present. + +// Row 2, Column 1 +|`query` +// Row 2, Column 2 +|https://en.cppreference.com/w/cpp/string/basic_string[`std::string`,window=blank_] +// Row 2, Column 3 +|Return the query as a decoded string. + +// Row 3, Column 1 +|`encoded_query` +// Row 3, Column 2 +|`pct_string_view` +// Row 3, Column 3 +|Return the query. + +// Row 4, Column 1 +|`params` +// Row 4, Column 2 +|`params_view` +// Row 4, Column 3 +|Return the query as a read-only range of decoded params. + +// Row 5, Column 1 +|`encoded_params` +// Row 5, Column 2 +|`params_encoded_view` +// Row 5, Column 3 +|Return the query as a read-only range of params. + +|=== + + +[cols="a,a,a"] +|=== +// Headers +|Function|Parameters|Description + +// Row 1, Column 1 +|`set_query` +// Row 1, Column 2 +|`string_view` +// Row 1, Column 3 +|Set the query to a string. + Reserved characters are percent-escaped automatically. + +// Row 2, Column 1 +|`set_encoded_query` +// Row 2, Column 2 +|`pct_string_view` +// Row 2, Column 3 +|Set the query to a string, which may contain percent escapes. + Reserved characters are percent-escaped automatically. + +// Row 3, Column 1 +|`params` +// Row 3, Column 2 +|`params_ref` +// Row 3, Column 3 +|Return the query as a modifiable range of decoded params. + +// Row 4, Column 1 +|`encoded_params` +// Row 4, Column 2 +|`params_encoded_ref` +// Row 4, Column 3 +|Return the query as a modifiable range of params. + +// Row 5, Column 1 +|`remove_query` +// Row 5, Column 2 +| +// Row 5, Column 3 +|Remove the query. + This also removes the leading question mark ("?") if present. + +|=== -// [table Query Modifiers [ -// [Function] -// [Parameters] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_base.set_query `set_query`]] -// [__string_view__] -// [ -// Set the query to a string. -// Reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_encoded_query `set_encoded_query`]] -// [__pct_string_view__] -// [ -// Set the query to a string, which may contain percent escapes. -// Reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.params `params`]] -// [__params_ref__] -// [ -// Return the query as a modifiable range of decoded params. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.encoded_params `encoded_params`]] -// [__params_encoded_ref__] -// [ -// Return the query as a modifiable range of params. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.remove_query `remove_query`]] -// [] -// [ -// Remove the query. -// This also removes the leading question mark ("?") if present. -// ] -// ]] The params-based containers `params_view`, `params_ref`, @@ -836,111 +950,130 @@ are discussed in a later section. -=== Fragment +== Fragment The fragment is treated as a string; there is no common, structured interpretation of the contents. These members are used to inspect and modify the fragment: -// [table Fragment Observers [ -// [Function] -// [Return Type] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_view_base.has_fragment `has_fragment`]] -// [`bool`] -// [ -// Return `true` if a fragment is present. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.fragment `fragment`]] -// [__std_string__] -// [ -// Return the fragment as a decoded string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.fragment `encoded_fragment`]] -// [__pct_string_view__] -// [ -// Return the fragment. -// ] -// ]] +[cols="a,a,a"] +|=== +// Headers +|Function|Return Type|Description -// [table Fragment Modifiers [ -// [Function] -// [Parameters] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_base.set_query `set_fragment`]] -// [__string_view__] -// [ -// Set the fragment to the string. -// Reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.set_encoded_fragment `set_encoded_fragment`]] -// [__pct_string_view__] -// [ -// Set the fragment to the string, which may contain percent escapes. -// Reserved characters are percent-escaped automatically. -// ] -// ][ -// [[link url.ref.boost__urls__url_base.remove_fragment `remove_fragment`]] -// [] -// [ -// Remove the fragment. -// This also removes the leading pound sign ("#") if present. -// ] -// ]] +// Row 1, Column 1 +|`has_fragment` +// Row 1, Column 2 +|`bool` +// Row 1, Column 3 +|Return `true` if a fragment is present. + +// Row 2, Column 1 +|`fragment` +// Row 2, Column 2 +|https://en.cppreference.com/w/cpp/string/basic_string[`std::string`,window=blank_] +// Row 2, Column 3 +|Return the fragment as a decoded string. + +// Row 3, Column 1 +|`encoded_fragment` +// Row 3, Column 2 +|`pct_string_view` +// Row 3, Column 3 +|Return the fragment. + +|=== + + +[cols="a,a,a"] +|=== +// Headers +|Function|Parameters|Description + +// Row 1, Column 1 +|`set_fragment` +// Row 1, Column 2 +|`string_view` +// Row 1, Column 3 +|Set the fragment to the string. + Reserved characters are percent-escaped automatically. + +// Row 2, Column 1 +|`set_encoded_fragment` +// Row 2, Column 2 +|`pct_string_view` +// Row 2, Column 3 +|Set the fragment to the string, which may contain percent escapes. + Reserved characters are percent-escaped automatically. + +// Row 3, Column 1 +|`remove_fragment` +// Row 3, Column 2 +| +// Row 3, Column 3 +|Remove the fragment. + This also removes the leading pound sign ("#") if present. + +|=== -=== Compound Fields + +== Compound Fields For convenience, these observers and modifiers for aggregated subsets of the URL are provided: -// [table Compound Field Observers [ -// [Function] -// [Return Type] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_host_and_port `encoded_host_and_port`]] -// [__pct_string_view__] -// [ -// Return the host and port as a string with percent escapes. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_origin `encoded_origin`]] -// [__pct_string_view__] -// [ -// Return only the scheme and authority parts as an individual string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_resource `encoded_resource`]] -// [__pct_string_view__] -// [ -// Return only the path, query, and fragment parts as an individual string. -// ] -// ][ -// [[link url.ref.boost__urls__url_view_base.encoded_target `encoded_target`]] -// [__pct_string_view__] -// [ -// Return only the path and query parts as an individual string. -// ] -// ]] +[cols="a,a,a"] +|=== +// Headers +|Function|Return Type|Description + +// Row 1, Column 1 +|`encoded_host_and_port` +// Row 1, Column 2 +|`pct_string_view` +// Row 1, Column 3 +|Return the host and port as a string with percent escapes. + +// Row 2, Column 1 +|`encoded_origin` +// Row 2, Column 2 +|`pct_string_view` +// Row 2, Column 3 +|Return only the scheme and authority parts as an individual string. + +// Row 3, Column 1 +|`encoded_resource` +// Row 3, Column 2 +|`pct_string_view` +// Row 3, Column 3 +|Return only the path, query, and fragment parts as an individual string. + +// Row 4, Column 1 +|`encoded_target` +// Row 4, Column 2 +|`pct_string_view` +// Row 4, Column 3 +|Return only the path and query parts as an individual string. + +|=== + + +[cols="a,a,a"] +|=== +// Headers +|Function|Parameters|Description + +// Row 1, Column 1 +|`remove_origin` +// Row 1, Column 2 +| +// Row 1, Column 3 +|Remove the scheme and authority parts from the URL. + +|=== -// [table Compound Field Modifiers [ -// [Function] -// [Parameters] -// [Description] -// ][ -// [[link url.ref.boost__urls__url_base.remove_origin `remove_origin`]] -// [] -// [ -// Remove the scheme and authority parts from the URL. -// ] -// ]] diff --git a/doc/modules/ROOT/pages/urls/formatting.adoc b/doc/modules/ROOT/pages/urls/formatting.adoc index eb60ef66..dd4f2768 100644 --- a/doc/modules/ROOT/pages/urls/formatting.adoc +++ b/doc/modules/ROOT/pages/urls/formatting.adoc @@ -8,21 +8,23 @@ // -== Formatting += Formatting Algorithms to format URLs construct a mutable URL by parsing and applying arguments to a URL template. The following example uses the `format` function to construct an absolute URL: +// snippet_format_1 [source,cpp] ---- -// snippet_format_1 +url u = format("{}://{}:{}/rfc/{}", "https", "www.ietf.org", 80, "rfc2396.txt"); +assert(u.buffer() == "https://www.ietf.org:80/rfc/rfc2396.txt"); ---- The rules for a format URL string are the same -as for a `std::format_string`, where replacement +as for a https://en.cppreference.com/w/cpp/utility/format/basic_format_string[`std::format_string`,window=blank_], where replacement fields are delimited by curly braces. The URL type is inferred from the format string. @@ -32,9 +34,11 @@ applied and any invalid characters for that formatted argument are percent-escaped: +// snippet_format_2 [source,cpp] ---- -// snippet_format_2 +url u = format("https://{}/{}", "www.boost.org", "Hello world!"); +assert(u.buffer() == "https://www.boost.org/Hello%20world!"); ---- @@ -45,16 +49,25 @@ characters are normalized to ensure the URL is valid: -[source,cpp] ----- // snippet_format_3a ----- - - - [source,cpp] ---- +url u = format("{}:{}", "mailto", "someone@example.com"); +assert(u.buffer() == "mailto:someone@example.com"); +assert(u.scheme() == "mailto"); +assert(u.path() == "someone@example.com"); +---- + + + // snippet_format_3b +[source,cpp] +---- +url u = format("{}{}", "mailto:", "someone@example.com"); +assert(u.buffer() == "mailto%3Asomeone@example.com"); +assert(!u.has_scheme()); +assert(u.path() == "mailto:someone@example.com"); +assert(u.encoded_path() == "mailto%3Asomeone@example.com"); ---- @@ -62,19 +75,24 @@ The function `format_to` can be used to format URLs into any modifiable URL container. +// snippet_format_4 [source,cpp] ---- -// snippet_format_4 +static_url<50> u; +format_to(u, "{}://{}:{}/rfc/{}", "https", "www.ietf.org", 80, "rfc2396.txt"); +assert(u.buffer() == "https://www.ietf.org:80/rfc/rfc2396.txt"); ---- -As with `std::format`, positional and named arguments are +As with https://en.cppreference.com/w/cpp/utility/format/format[`std::format`,window=blank_], positional and named arguments are supported. +// snippet_format_5a [source,cpp] ---- -// snippet_format_5a +url u = format("{0}://{2}:{1}/{3}{4}{3}", "https", 80, "www.ietf.org", "abra", "cad"); +assert(u.buffer() == "https://www.ietf.org:80/abracadabra"); ---- @@ -82,21 +100,26 @@ The `arg` function can be used to associate names with arguments: +// snippet_format_5b [source,cpp] ---- -// snippet_format_5b +url u = format("https://example.com/~{username}", arg("username", "mark")); +assert(u.buffer() == "https://example.com/~mark"); ---- -A second overload based on `std::initializer_list` +A second overload based on https://en.cppreference.com/w/cpp/utility/initializer_list[`std::initializer_list`,window=blank_] is provided for both `format` and `format_to`. These overloads can help with lists of named arguments: +// snippet_format_5c [source,cpp] ---- -// snippet_format_5c +boost::core::string_view fmt = "{scheme}://{host}:{port}/{dir}/{file}"; +url u = format(fmt, {{"scheme", "https"}, {"port", 80}, {"host", "example.com"}, {"dir", "path/to"}, {"file", "file.txt"}}); +assert(u.buffer() == "https://example.com:80/path/to/file.txt"); ---- diff --git a/doc/modules/ROOT/pages/urls/index.adoc b/doc/modules/ROOT/pages/urls/index.adoc index 83298c70..6dd86a6e 100644 --- a/doc/modules/ROOT/pages/urls/index.adoc +++ b/doc/modules/ROOT/pages/urls/index.adoc @@ -8,17 +8,17 @@ // -== URLs += URLs A URL, short for "Uniform Resource Locator," is a compact string of characters identifying an abstract or physical resource. It has these five parts, with may be optional or disallowed depending on the context: -// [$url/images/PartsDiagram.svg] +image:PartsDiagram.svg[] Each part's syntax is defined by a set of production rules in -`rfc3986`. All valid URLs conform to this grammar, also called +https://tools.ietf.org/html/rfc3986[rfc3986,window=blank_]. All valid URLs conform to this grammar, also called the "generic syntax." Here is an example URL which describes a file and its location on a network host: @@ -31,40 +31,55 @@ https://www.example.com/path/to/file.txt?userid=1001&pages=3&results=full#page1 The parts and their corresponding text is as follows: -// [table Example Parts [ -// [Part] -// [Text] -// ][ -// [[link url.urls.containers.scheme ['scheme]]] -// ["https"] -// ][ -// [[link url.urls.containers.authority ['authority]]] -// ["www.example.com"] -// ][ -// [[link url.urls.containers.path ['path]]] -// ["/path/to/file.txt"] -// ][ -// [[link url.urls.containers.query ['query]]] -// ["userid=1001&pages=3&results=full"] -// ][ -// [[link url.urls.containers.fragment ['fragment]]] -// ["page1"] -// ]] +[cols="a,a"] +|=== +// Headers +|Part|Text + +// Row 1, Column 1 +|__scheme__ +// Row 1, Column 2 +|"https" + +// Row 2, Column 1 +|__authority__ +// Row 2, Column 2 +|"www.example.com" + +// Row 3, Column 1 +|__path__ +// Row 3, Column 2 +|"/path/to/file.txt" + +// Row 4, Column 1 +|__query__ +// Row 4, Column 2 +|"userid=1001&pages=3&results=full" + +// Row 5, Column 1 +|__fragment__ +// Row 5, Column 2 +|"page1" + +|=== + The production rule for the example above is called a __URI__, which can contain all five parts. The specification using -https://datatracker.ietf.org/doc/html/rfc2234[__ABNF__,window=blank_] +https://datatracker.ietf.org/doc/html/rfc2234[__ABNF notation__,window=blank_] is: -``` -URI = scheme ":" hier-part // [ "?" query ] // [ "#" fragment ] +[source] +---- +URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] hier-part = "//" authority path-abempty / path-absolute / path-rootless / path-empty -``` +---- + In this notation, the square brackets ("\// [" and "\]") denote optional elements, quoted text represents character literals, and slashes are @@ -80,7 +95,7 @@ __URI-reference__. These are discussed in greater depth later. -=== Scheme +== Scheme The most important part is the __scheme__, whose production rule is: @@ -98,28 +113,44 @@ https://en.wikipedia.org/wiki/Internet_Assigned_Numbers_Authority[Internet Assig Here are some registered schemes and their corresponding specifications: -// [table Public Schemes [ -// [Scheme] -// [Specification] -// ][ -// [[*http]] -// [[@https://datatracker.ietf.org/doc/html/rfc7230#section-2.7.1 http URI Scheme (rfc7230)]] -// ][ -// [[*magnet]] -// [[@https://en.wikipedia.org/wiki/Magnet_URI_scheme Magnet URI scheme]] -// ][ -// [[*mailto]] -// [[@https://datatracker.ietf.org/doc/html/rfc6068 The 'mailto' URI Scheme (rfc6068)]] -// ][ -// [[*payto]] -// [[@https://datatracker.ietf.org/doc/html/rfc8905 The 'payto' URI Scheme for Payments (rfc8905)]] -// ][ -// [[*telnet]] -// [[@https://datatracker.ietf.org/doc/html/rfc4248 The telnet URI Scheme (rfc4248)]] -// ][ -// [[*urn]] -// [[@https://datatracker.ietf.org/doc/html/rfc2141 URN Syntax]] -// ]] +[cols="a,a"] +|=== +// Headers +|Scheme|Specification + +// Row 1, Column 1 +|**http** +// Row 1, Column 2 +|https://datatracker.ietf.org/doc/html/rfc7230#section-2.7.1[http URI Scheme (rfc7230),window=blank_] + +// Row 2, Column 1 +|**magnet** +// Row 2, Column 2 +|https://en.wikipedia.org/wiki/Magnet_URI_scheme[Magnet URI scheme,window=blank_] + +// Row 3, Column 1 +|**mailto** +// Row 3, Column 2 +|https://datatracker.ietf.org/doc/html/rfc6068[The 'mailto' URI Scheme (rfc6068),window=blank_] + +// Row 4, Column 1 +|**payto** +// Row 4, Column 2 +|https://datatracker.ietf.org/doc/html/rfc8905[The 'payto' URI Scheme for Payments (rfc8905),window=blank_] +// Row 4, Column 4 + +// Row 5, Column 1 +|**telnet** +// Row 5, Column 2 +|https://datatracker.ietf.org/doc/html/rfc4248[The telnet URI Scheme (rfc4248),window=blank_] + +// Row 6, Column 1 +|**urn** +// Row 6, Column 2 +|https://datatracker.ietf.org/doc/html/rfc2141[URN Syntax,window=blank_] + +|=== + Private schemes are possible, defined by organizations to enumerate internal resources such as documents or physical devices, or to facilitate the operation @@ -130,19 +161,28 @@ that private does not imply secret; some private schemes such as Amazon's "s3" have publicly available specifications and are quite popular. Here are some examples: -// [table Private Schemes [ -// [Scheme] -// [Specification] -// ][ -// [[*app]] -// [[@https://www.w3.org/TR/app-uri/ app: URL Scheme]] -// ][ -// [[*odbc]] -// [[@https://datatracker.ietf.org/doc/html/draft-patrick-lambert-odbc-uri-scheme ODBC URI Scheme]] -// ][ -// [[*slack]] -// [[@https://api.slack.com/reference/deep-linking Reference: Deep linking into Slack]] -// ]] +[cols="a,a"] +|=== +// Headers +|Scheme|Specification + +// Row 1, Column 1 +|**app** +// Row 1, Column 2 +|https://www.w3.org/TR/app-uri/[app: URL Scheme,window=blank_] + +// Row 2, Column 1 +|**odbc** +// Row 2, Column 2 +|https://datatracker.ietf.org/doc/html/draft-patrick-lambert-odbc-uri-scheme[ODBC URI Scheme,window=blank_] + +// Row 3, Column 1 +|**slack** +// Row 3, Column 2 +|https://api.slack.com/reference/deep-linking[Reference: Deep linking into Slack,window=blank_] + +|=== + In some cases the scheme is implied by the surrounding context and therefore omitted. Here is a complete HTTP/1.1 GET request for the @@ -165,47 +205,66 @@ https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1[HTTP specification,w thusly: -``` -origin-form = absolute-path // [ "?" query ] +[source] +---- +origin-form = absolute-path [ "?" query ] absolute-path = 1*( "/" segment ) -``` +---- + + +[NOTE] +==== +All URLs have a scheme, whether it is explicit or implicit. +The scheme determines what the rest of the URL means. +==== -// [note -// All URLs have a scheme, whether it is explicit or implicit. -// The scheme determines what the rest of the URL means. -// ] Here are some more examples of URLs using various schemes (and one example of something that is not a URL): -// [table Scheme Examples [ -// [URL] -// [Notes] -// ][ -// [`https://www.boost.org/index.html`] -// [Hierarchical URL with `https` protocol. Resource in the HTTP protocol.] -// ][ -// [`ftp://host.dom/etc/motd`] -// [Hierarchical URL with `ftp` scheme. Resource in the FTP protocol.] -// ][ -// [`urn:isbn:045145052`] -// [Opaque URL with `urn` scheme. Identifies `isbn` resource.] -// ][ -// [`mailto:person@example.com`] -// [Opaque URL with `mailto` scheme. Identifies e-mail address.] -// ][ -// [`index.html`] -// [URL reference. Missing scheme and authority.] -// ][ -// [`www.boost.org`] -// [A Protocol-Relative Link (PRL). [*Not a URL].] -// ]] +[cols="a,a"] +|=== +// Headers +|URL|Notes + +// Row 1, Column 1 +|`pass:[https://www.boost.org/index.html]` +// Row 1, Column 2 +|Hierarchical URL with `https` protocol. Resource in the HTTP protocol. + +// Row 2, Column 1 +|`pass:[ftp://host.dom/etc/motd]` +// Row 2, Column 2 +|Hierarchical URL with `ftp` scheme. Resource in the FTP protocol. + +// Row 3, Column 1 +|`urn:isbn:045145052` +// Row 3, Column 2 +|Opaque URL with `urn` scheme. Identifies `isbn` resource. + +// Row 4, Column 1 +|`mailto:person@example.com` +// Row 4, Column 2 +|Opaque URL with `mailto` scheme. Identifies e-mail address. + +// Row 5, Column 1 +|`index.html` +// Row 5, Column 2 +|URL reference. Missing scheme and authority. + +// Row 6, Column 1 +|`www.boost.org` +// Row 6, Column 2 +|A Protocol-Relative Link (PRL). **Not**. + +|=== -=== Authority + +== Authority The authority determines how a resource can be accessed. It contains two parts: the @@ -218,9 +277,11 @@ which identify a communication endpoint having dominion over the resource described in the remainder of the URL. This is the ABNF specification for the authority part: -``` -authority = // [ user [ ":" password ] "@" ] host // [ ":" port ] -``` +[source] +---- +authority = [ user [ ":" password ] "@" ] host [ ":" port ] +---- + The combination of user and optional password is called the __userinfo__. @@ -235,7 +296,7 @@ https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3[__port__,window=blan which identify a communication endpoint having dominion over the resource described in the remainder of the URL. -// [$url/images/AuthorityDiagram.svg] +image:AuthorityDiagram.svg[] Some observations: @@ -247,28 +308,42 @@ Some observations: The host subcomponent represents where resources are located. -// [note -// Note that if an authority is present, the host is always -// defined even if it is the empty string (corresponding -// to a zero-length ['reg-name] in the BNF). -// -// [snippet_parsing_authority_10a] -// ] +[NOTE] +==== +Note that if an authority is present, the host is always +defined even if it is the empty string (corresponding +to a zero-length __reg-name__ in the BNF). + +// snippet_parsing_authority_10a +[source,cpp] +---- +url_view u( "https:///path/to_resource" ); +assert( u.has_authority() ); +assert( u.authority().buffer().empty() ); +assert( u.path() == "/path/to_resource" ); +---- + +==== + The authority component also influences how we should interpret the URL path. If the authority is present, the path component must either be empty or begin with a slash. -// [note Although the specification allows the format `username:password`, -// the password component should be used with care. -// -// It is not recommended to transfer password data through URLs -// unless this is an empty string indicating no password.] +[NOTE] +==== +Although the specification allows the format `username:password`, +the password component should be used with care. + +It is not recommended to transfer password data through URLs +unless this is an empty string indicating no password. +==== -=== Containers + +== Containers This library provides the following containers, which are capable of storing any possible URL: @@ -299,14 +374,14 @@ use with URLs. // StringToken // Percent Encoding -// [include 3.1.parsing.qbk] -// [include 3.2.containers.qbk] -// [include 3.3.segments.qbk] -// [include 3.4.params.qbk] -// [include 3.5.normalization.qbk] -// [include 3.6.stringtoken.qbk] -// [include 3.7.percent-encoding.qbk] -// [include 3.8.formatting.qbk] + + + + + + + + diff --git a/doc/modules/ROOT/pages/urls/normalization.adoc b/doc/modules/ROOT/pages/urls/normalization.adoc index d14bc770..2f217eb8 100644 --- a/doc/modules/ROOT/pages/urls/normalization.adoc +++ b/doc/modules/ROOT/pages/urls/normalization.adoc @@ -8,7 +8,7 @@ // -== Normalization += Normalization Normalization allows us to determine if two URLs refer to the same resource. URLs comparisons serve the same purpose, where two strings @@ -40,20 +40,26 @@ of producing false negatives: Simple String Comparison can be performed by accessing the underlying buffer of URLs: +// snippet_normalizing_1 [source,cpp] ---- -// snippet_normalizing_1 +url_view u1("https://www.boost.org/index.html"); +url_view u2("https://www.boost.org/doc/../index.html"); +assert(u1.buffer() != u2.buffer()); ---- -By only considering the rules of `rfc3986`, Simple String +By only considering the rules of https://tools.ietf.org/html/rfc3986[rfc3986,window=blank_], Simple String Comparison fails to identify the URLs above point to the same resource. The comparison operators implement Syntax-Based -Normalization, which implements the rules defined by `rfc3986`. +Normalization, which implements the rules defined by https://tools.ietf.org/html/rfc3986[rfc3986,window=blank_]. +// snippet_normalizing_2 [source,cpp] ---- -// snippet_normalizing_2 +url_view u1("https://www.boost.org/index.html"); +url_view u2("https://www.boost.org/doc/../index.html"); +assert(u1 == u2); ---- @@ -66,13 +72,18 @@ same underlying representation. In other words, Simple String Comparison of two normalized URLs is equivalent to Syntax-Based Normalization. +// snippet_normalizing_3 [source,cpp] ---- -// snippet_normalizing_3 +url_view u1("https://www.boost.org/index.html"); +url u2("https://www.boost.org/doc/../index.html"); +assert(u1 == u2); +u2.normalize(); +assert(u1.buffer() == u2.buffer()); ---- -Normalization uses the following definitions of `rfc3986` +Normalization uses the following definitions of https://tools.ietf.org/html/rfc3986[rfc3986,window=blank_] to minimize false negatives: * Case Normalization: percent-encoding triplets are normalized to use uppercase letters @@ -81,9 +92,12 @@ to minimize false negatives: The following example normalizes the percent-encoding and path segments of a URL: +// snippet_normalizing_4 [source,cpp] ---- -// snippet_normalizing_4 +url u("https://www.boost.org/doc/../%69%6e%64%65%78%20file.html"); +u.normalize(); +assert(u.buffer() == "https://www.boost.org/index%20file.html"); ---- @@ -92,9 +106,34 @@ for Scheme-Based and Protocol-Based Normalization. One common scheme-specific rule is ignoring the default port for that scheme and empty absolute paths: +// snippet_normalizing_5 [source,cpp] ---- -// snippet_normalizing_5 +auto normalize_http_url = + [](url& u) +{ + u.normalize(); + if (u.port() == "80" || + u.port().empty()) + u.remove_port(); + if (u.has_authority() && + u.encoded_path().empty()) + u.set_path_absolute(true); +}; + +url u1("https://www.boost.org"); +normalize_http_url(u1); +url u2("https://www.boost.org/"); +normalize_http_url(u2); +url u3("https://www.boost.org:/"); +normalize_http_url(u3); +url u4("https://www.boost.org:80/"); +normalize_http_url(u4); + +assert(u1.buffer() == "https://www.boost.org/"); +assert(u2.buffer() == "https://www.boost.org/"); +assert(u3.buffer() == "https://www.boost.org/"); +assert(u4.buffer() == "https://www.boost.org/"); ---- diff --git a/doc/modules/ROOT/pages/urls/params.adoc b/doc/modules/ROOT/pages/urls/params.adoc index 6918a84b..3de79bb3 100644 --- a/doc/modules/ROOT/pages/urls/params.adoc +++ b/doc/modules/ROOT/pages/urls/params.adoc @@ -10,12 +10,12 @@ -== Params += Params While the query is specified as a plain string, it is usually interpreted as a set of key-value pairs commonly referred to as -https://en.wikipedia.org/wiki/Query_string[__URL__,window=blank_], -although here we use the term __query__ or __params__ +https://en.wikipedia.org/wiki/Query_string[__URL Parameters__,window=blank_], +although here we use the term __query parameters__ or __params__ for short. There is no official, standard specification of the query parameters format, but the W3C recommendations and HTML 5 have this to say: @@ -45,35 +45,41 @@ Like the path, the library permits access to the params as using these separate, bidirectional view types which reference the underlying URL: -// [table Params Types [ -// [Type] -// [Accessor] -// [Description] -// ][ -// [__params_view__] -// [[link url.ref.boost__urls__url_view_base.params `params`]] -// [ -// A read-only range of decoded params. -// ] -// ][ -// [__params_ref__] -// [[link url.ref.boost__urls__url_base.params `params`]] -// [ -// A modifiable range of decoded params. -// ] -// ][ -// [__params_encoded_view__] -// [[link url.ref.boost__urls__url_view_base.encoded_params `encoded_params`]] -// [ -// A read-only range of params. -// ] -// ][ -// [__params_encoded_ref__] -// [[link url.ref.boost__urls__url_base.encoded_params `encoded_params`]] -// [ -// A modifiable range of params. -// ] -// ]] +[cols="a,a,a"] +|=== +// Headers +|Type|Accessor|Description + +// Row 1, Column 1 +|`params_view` +// Row 1, Column 2 +|`params` +// Row 1, Column 3 +|A read-only range of decoded params. + +// Row 2, Column 1 +|`params_ref` +// Row 2, Column 2 +|`params` +// Row 2, Column 3 +|A modifiable range of decoded params. + +// Row 3, Column 1 +|`params_encoded_view` +// Row 3, Column 2 +|`encoded_params` +// Row 3, Column 3 +|A read-only range of params. + +// Row 4, Column 1 +|`params_encoded_ref` +// Row 4, Column 2 +|`encoded_params` +// Row 4, Column 3 +|A modifiable range of params. + +|=== + A param always has a key, even if it is the empty string. The value is optional; an empty string is distinct from @@ -81,34 +87,39 @@ no value. To represent individual params the library uses these types, distinguished by their ownership model and whether or not percent-escapes are possible: -// [table Param Types [ -// [Type] -// [String Type] -// [Description] -// ][ -// [__param__] -// [__std_string__] -// [ -// A key-value pair with ownership of the strings. -// This can be used to hold decoded strings, or to -// allow the caller to take ownership of a param -// by making a copy. -// ] -// ][ -// [__param_view__] -// [__string_view__] -// [ -// A key-value pair without percent-escapes, -// referencing externally managed character buffers. -// ] -// ][ -// [__param_pct_view__] -// [__pct_string_view__] -// [ -// A key-value pair which may contain percent-escapes, -// referencing externally managed character buffers. -// ] -// ]] +[cols="a,a,a"] +|=== +// Headers +|Type|String Type|Description + +// Row 1, Column 1 +|`param` +// Row 1, Column 2 +|https://en.cppreference.com/w/cpp/string/basic_string[`std::string`,window=blank_] +// Row 1, Column 3 +|A key-value pair with ownership of the strings. + This can be used to hold decoded strings, or to + allow the caller to take ownership of a param + by making a copy. + +// Row 2, Column 1 +|`param_view` +// Row 2, Column 2 +|`string_view` +// Row 2, Column 3 +|A key-value pair without percent-escapes, + referencing externally managed character buffers. + +// Row 3, Column 1 +|`param_pct_view` +// Row 3, Column 2 +|`pct_string_view` +// Row 3, Column 3 +|A key-value pair which may contain percent-escapes, + referencing externally managed character buffers. + +|=== + Param types can be constructed from initializer lists, allowing for convenient notation. To represent a missing @@ -118,32 +129,49 @@ or `nullptr` may be used. This table shows some examples of initializer lists used to construct a param type, and the resulting data members: -// [table Param Initializers [ -// [Statement] -// [`qp.key`] -// [`qp.value`] -// [`qp.has_value`] -// ][ -// [`param qp = { "first", "John" };`] -// [`"First"`] -// [`"John"`] -// [`true`] -// ][ -// [`param qp = { "first", "" };`] -// [`"First"`] -// [`""`] -// [`true`] -// ][ -// [`param qp = { "first", no_value };`] -// [`"First"`] -// [`""`] -// [`false`] -// ][ -// [`param qp = { "", "Doe" };`] -// [`""`] -// [`"Doe"`] -// [`true`] -// ]] +[cols="a,a,a,a"] +|=== +// Headers +|Statement|`qp.key`|`qp.value`|`qp.has_value` + +// Row 1, Column 1 +|`param qp = { "first", "John" };` +// Row 1, Column 2 +|`"First"` +// Row 1, Column 3 +|`"John"` +// Row 1, Column 4 +|`true` + +// Row 2, Column 1 +|`param qp = { "first", "" };` +// Row 2, Column 2 +|`"First"` +// Row 2, Column 3 +|`""` +// Row 2, Column 4 +|`true` + +// Row 3, Column 1 +|`param qp = { "first", no_value };` +// Row 3, Column 2 +|`"First"` +// Row 3, Column 3 +|`""` +// Row 3, Column 4 +|`false` + +// Row 4, Column 1 +|`param qp = { "", "Doe" };` +// Row 4, Column 2 +|`""` +// Row 4, Column 3 +|`"Doe"` +// Row 4, Column 4 +|`true` + +|=== + To understand the relationship between the query and the resulting range of params, first we define this @@ -151,9 +179,17 @@ function `parms` which returns a list of params corresponding to the elements in a container of params: +// code_container_5_1 [source,cpp] ---- -// code_container_5_1 +auto parms( core::string_view s ) -> std::list< param > +{ + url_view u( s ); + std::list< param > seq; + for( auto qp : u.params() ) + seq.push_back( qp ); + return seq; +} ---- @@ -161,28 +197,43 @@ In the table below we show the result of invoking `parms` with different queries. This demonstrates how the syntax of the query maps to the parameter structure: -// [table Params Sequences [ -// [s] -// [`parms( s )`] -// ][ -// [`"?first=John&last=Doe"`] -// [`{ { "first", "John" }, { "last", "Doe" } }`] -// ][ -// [`"?id=42&unsorted"`] -// [`{ { "id", "42" }, { "last", no_value } }`] -// ][ -// [`"?col=cust&row="`] -// [`{ { "col", "cust" }, { "row", "" } }`] -// ][ -// [`"?justify=left&"`] -// [`{ { "justify", "left" }, { "", no_value } }`] -// ][ -// [`"?"`] -// [`{ { "", no_value } }`] -// ][ -// [`""`] -// [`{ }`] -// ]] +[cols="a,a"] +|=== +// Headers +|s|`parms( s )` + +// Row 1, Column 1 +|`"?first=John&last=Doe"` +// Row 1, Column 2 +|`{ { "first", "John" }, { "last", "Doe" } }` + +// Row 2, Column 1 +|`"?id=42&unsorted"` +// Row 2, Column 2 +|`{ { "id", "42" }, { "last", no_value } }` + +// Row 3, Column 1 +|`"?col=cust&row="` +// Row 3, Column 2 +|`{ { "col", "cust" }, { "row", "" } }` + +// Row 4, Column 1 +|`"?justify=left&"` +// Row 4, Column 2 +|`{ { "justify", "left" }, { "", no_value } }` + +// Row 5, Column 1 +|`"?"` +// Row 5, Column 2 +|`{ { "", no_value } }` + +// Row 6, Column 1 +|`""` +// Row 6, Column 2 +|`{ }` + +|=== + It may be surprising that an empty query string ("?") produces a sequence with one empty param. This is by diff --git a/doc/modules/ROOT/pages/urls/parsing.adoc b/doc/modules/ROOT/pages/urls/parsing.adoc index 3516d343..bff9c8b8 100644 --- a/doc/modules/ROOT/pages/urls/parsing.adoc +++ b/doc/modules/ROOT/pages/urls/parsing.adoc @@ -8,7 +8,7 @@ // -== Parsing += Parsing Algorithms which parse URLs return a view which references the underlying character buffer without taking ownership, avoiding @@ -17,9 +17,10 @@ string literal containing a https://datatracker.ietf.org/doc/html/rfc3986#section-3[__URI__,window=blank_]: +// code_urls_parsing_1 [source,cpp] ---- -// code_urls_parsing_1 +boost::core::string_view s = "https://user:pass@example.com:443/path/to/my%2dfile.txt?id=42&name=John%20Doe+Jingleheimer%2DSchmidt#page%20anchor"; ---- @@ -42,9 +43,10 @@ https://datatracker.ietf.org/doc/html/rfc3986#section-4.1[__URI-reference__,wind grammar, throwing an exception upon failure. The following two statements are equivalent: +// code_urls_parsing_2 [source,cpp] ---- -// code_urls_parsing_2 +boost::system::result r = parse_uri( s ); ---- @@ -54,49 +56,73 @@ There are several varieties of URLs, and depending on the use-case a particular grammar may be needed. In the target of an HTTP GET request for example, the scheme and fragment are omitted. This corresponds to the https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1[__origin-form__,window=blank_] -production rule described in `rfc7230`. The function +production rule described in https://tools.ietf.org/html/rfc7230[rfc7230,window=blank_]. The function `parse_origin_form` is suited for this purpose. All the URL parsing functions are listed here: -// [table Parsing Functions [ -// [Function] -// [Grammar] -// [Example] -// [Notes] -// ][ -// [[link url.ref.boost__urls__parse_absolute_uri `parse_absolute_uri`]] -// [[@https://datatracker.ietf.org/doc/html/rfc3986#section-4.3 ['absolute-URI]]] -// [[teletype]`http://www.boost.org/index.html?field=value`] -// [No fragment] -// ][ -// [[link url.ref.boost__urls__parse_origin_form `parse_origin_form`]] -// [[@https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1 ['origin-form]]] -// [[teletype]`/index.html?field=value`] -// [Used in HTTP] -// ][ -// [[link url.ref.boost__urls__parse_relative_ref `parse_relative_ref`]] -// [[@https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 ['relative-ref]]] -// [[teletype]`//www.boost.org/index.html?field=value#downloads`] -// [] -// ][ -// [[link url.ref.boost__urls__parse_uri `parse_uri`]] -// [[@https://datatracker.ietf.org/doc/html/rfc3986#section-3 ['URI]]] -// [[teletype]`http://www.boost.org/index.html?field=value#downloads`] -// [] -// ][ -// [[link url.ref.boost__urls__parse_uri_reference `parse_uri_reference`]] -// [[@https://datatracker.ietf.org/doc/html/rfc3986#section-4.1 ['URI-reference]]] -// [[teletype]`http://www.boost.org/index.html`] -// [Any ['URI] or ['relative-ref]] -// ]] +[cols="a,a,a,a"] +|=== +// Headers +|Function|Grammar|Example|Notes + +// Row 1, Column 1 +|`parse_absolute_uri` +// Row 1, Column 2 +|https://datatracker.ietf.org/doc/html/rfc3986#section-4.3[__absolute-URI__,window=blank_] +// Row 1, Column 3 +|`pass:[http://www.boost.org/index.html?field=value]` +// Row 1, Column 4 +|No fragment + +// Row 2, Column 1 +|`parse_origin_form` +// Row 2, Column 2 +|https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1[__origin-form__,window=blank_] +// Row 2, Column 3 +|`pass:[/index.html?field=value]` +// Row 2, Column 4 +|Used in HTTP + +// Row 3, Column 1 +|`parse_relative_ref` +// Row 3, Column 2 +|https://datatracker.ietf.org/doc/html/rfc3986#section-4.2[__relative-ref__,window=blank_] +// Row 3, Column 3 +|`pass:[//www.boost.org/index.html?field=value#downloads]` +// Row 3, Column 4 +| + +// Row 4, Column 1 +|`parse_uri` +// Row 4, Column 2 +|https://datatracker.ietf.org/doc/html/rfc3986#section-3[__URI__,window=blank_] +// Row 4, Column 3 +|`pass:[http://www.boost.org/index.html?field=value#downloads]` +// Row 4, Column 4 +| + +// Row 5, Column 1 +|`parse_uri_reference` +// Row 5, Column 2 +|https://datatracker.ietf.org/doc/html/rfc3986#section-4.1[__URI-reference__,window=blank_] +// Row 5, Column 3 +|`pass:[http://www.boost.org/index.html]` +// Row 5, Column 4 +|Any __URI__ or __relative-ref__ + +|=== + The URL is stored in its serialized form. Therefore, it can always be easily output, sent, or embedded as part of a protocol: +// snippet_parsing_url_1bb [source,cpp] ---- -// snippet_parsing_url_1bb +url u = parse_uri_reference( "https://www.example.com/path/to/file.txt" ).value(); + +assert(u.encoded_path() == "/path/to/file.txt"); ---- @@ -104,9 +130,12 @@ A `url` is an allocating container which owns its character buffer. Upon construction from `url_view`, it allocates dynamic storage to hold a copy of the string. +// snippet_parsing_url_1bc [source,cpp] ---- -// snippet_parsing_url_1bc +boost::system::result< url > rv = parse_uri_reference( "https://www.example.com/path/to/file.txt" ); + +static_assert( std::is_convertible< boost::system::result< url_view >, boost::system::result< url > >::value, "" ); ---- @@ -114,13 +143,16 @@ A `static_url` is a container which owns its character buffer for a URL whose maximum size is known. Upon construction from `url_view`, it does not perform any dynamic memory allocations. +// snippet_parsing_url_1bd [source,cpp] ---- -// snippet_parsing_url_1bd +boost::system::result< static_url<1024> > rv = parse_uri_reference( "https://www.example.com/path/to/file.txt" ); + +static_assert( std::is_convertible< boost::system::result< static_url<1024> >, boost::system::result< url > >::value, "" ); ---- -=== Result Type +== Result Type These functions have a return type which uses the `result` alias template. This class allows the parsing algorithms to report @@ -129,9 +161,20 @@ errors without referring to exceptions. The functions `result::operator bool()` and `result::operator*` can be used to check if the result contains an error. +// snippet_parsing_url_1 [source,cpp] ---- -// snippet_parsing_url_1 +boost::system::result< url > ru = parse_uri_reference( "https://www.example.com/path/to/file.txt" ); +if ( ru ) +{ + url u = *ru; + assert(u.encoded_path() == "/path/to/file.txt"); +} +else +{ + boost::system::error_code e = ru.error(); + handle_error(e); +} ---- @@ -140,9 +183,18 @@ error, `result::operator*` provides an unchecked alternative to get a value from `result`. In contexts where it is acceptable to throw errors, `result::value` can be used directly. +// snippet_parsing_url_1b [source,cpp] ---- -// snippet_parsing_url_1b +try +{ + url u = parse_uri_reference( "https://www.example.com/path/to/file.txt" ).value(); + assert(u.encoded_path() == "/path/to/file.txt"); +} +catch (boost::system::system_error &e) +{ + handle_error(e); +} ---- diff --git a/doc/modules/ROOT/pages/urls/percent-encoding.adoc b/doc/modules/ROOT/pages/urls/percent-encoding.adoc index eb38e817..6967d532 100644 --- a/doc/modules/ROOT/pages/urls/percent-encoding.adoc +++ b/doc/modules/ROOT/pages/urls/percent-encoding.adoc @@ -8,61 +8,83 @@ // -== Percent Encoding += Percent Encoding -=== Encoding +== Encoding The `encode` can be used to percent-encode strings -with the specified `CharSet`. +with the specified __CharSet__. +// snippet_encoding_1 [source,cpp] ---- -// snippet_encoding_1 +std::string s = encode("hello world!", unreserved_chars); +assert(s == "hello%20world%21"); ---- A few parameters, such as encoding spaces as plus (`+`), can be adjusted with `encode_opts`: +// snippet_encoding_2 [source,cpp] ---- -// snippet_encoding_2 +encoding_opts opt; +opt.space_as_plus = true; +std::string s = encode("msg=hello world", pchars, opt); +assert(s == "msg=hello+world"); ---- -The result type of the function can also be specified via a `StringToken` +The result type of the function can also be specified via a __StringToken__ so that strings can be reused or appended. +// snippet_encoding_3 [source,cpp] ---- -// snippet_encoding_3 +std::string s; +encode("hello ", pchars, {}, string_token::assign_to(s)); +encode("world", pchars, {}, string_token::append_to(s)); +assert(s == "hello%20world"); ---- We can also use `encoded_size` to determine the required size before attempting to encode: +// snippet_encoding_4 [source,cpp] ---- -// snippet_encoding_4 +boost::core::string_view e = "hello world"; +std::string s; +s.reserve(encoded_size(e, pchars)); +encode(e, pchars, {}, string_token::assign_to(s)); +assert(s == "hello%20world"); ---- In other scenarios, strings can also be directly encoded into buffers: +// snippet_encoding_5 [source,cpp] ---- -// snippet_encoding_5 +boost::core::string_view e = "hello world"; +std::string s; +s.resize(encoded_size(e, pchars)); +encode(&s[0], s.size(), e, pchars); +assert(s == "hello%20world"); ---- -=== Validating +== Validating The class `pct_string_view` represents a reference percent-encoded strings: +// snippet_encoding_6 [source,cpp] ---- -// snippet_encoding_6 +pct_string_view sv = "hello%20world"; +assert(sv == "hello%20world"); ---- @@ -75,9 +97,14 @@ To simply validate a string without recurring to exceptions, a `result` can be returned with the `make_pct_string_view`: +// snippet_encoding_7 [source,cpp] ---- -// snippet_encoding_7 +boost::system::result rs = + make_pct_string_view("hello%20world"); +assert(rs.has_value()); +pct_string_view sv = rs.value(); +assert(sv == "hello%20world"); ---- @@ -88,9 +115,13 @@ The modifying functions in classes such as `url` expect instances of removes the responsibility of revalidating this information or throwing exceptions from these functions: +// snippet_encoding_8 [source,cpp] ---- -// snippet_encoding_8 +pct_string_view s = "path/to/file"; +url u; +u.set_encoded_path(s); +assert(u.buffer() == "path/to/file"); ---- @@ -98,9 +129,12 @@ When exceptions are acceptable, a common pattern is to let a literal string or other type convertible to `string_view` be implicitly converted to `pct_string_view`. +// snippet_encoding_9 [source,cpp] ---- -// snippet_encoding_9 +url u; +u.set_encoded_path("path/to/file"); +assert(u.buffer() == "path/to/file"); ---- @@ -112,9 +146,13 @@ Reusing the validation guarantee is particularly useful when the `pct_string_view` comes from another source where the data is also ensured to be validated: +// snippet_encoding_10 [source,cpp] ---- -// snippet_encoding_10 +url_view uv("path/to/file"); +url u; +u.set_encoded_path(uv.encoded_path()); +assert(u.buffer() == "path/to/file"); ---- @@ -124,15 +162,20 @@ does not to revalidate any information from `encoded_path` because these references are passed as `pct_string_view`. -=== Decode +== Decode The class `pct_string_view` represents a reference percent-encoded strings. `decode_view` is analogous to `pct_string_view`, with the main difference that the underlying buffer always dereferences to decoded characters. +// snippet_encoding_11 [source,cpp] ---- -// snippet_encoding_11 +pct_string_view es("hello%20world"); +assert(es == "hello%20world"); + +decode_view dv("hello%20world"); +assert(dv == "hello world"); ---- @@ -140,9 +183,15 @@ A `decode_view` can also be created from a `pct_string_view` with the `operator*`. The also gives us an opportunity to validate external strings: +// snippet_encoding_12 [source,cpp] ---- -// snippet_encoding_12 +boost::system::result rs = + make_pct_string_view("hello%20world"); +assert(rs.has_value()); +pct_string_view s = rs.value(); +decode_view dv = *s; +assert(dv == "hello world"); ---- @@ -150,9 +199,37 @@ This is particularly useful when the decoded string need to be accessed for comparisons with no necessity to explicitly decoding the string into a buffer: +// snippet_encoding_13 [source,cpp] ---- -// snippet_encoding_13 +url_view u = + parse_relative_ref("user/john%20doe/profile%20photo.jpg").value(); +std::vector route = + {"user", "john doe", "profile photo.jpg"}; +auto segs = u.encoded_segments(); +auto it0 = segs.begin(); +auto end0 = segs.end(); +auto it1 = route.begin(); +auto end1 = route.end(); +while ( + it0 != end0 && + it1 != end1) +{ + pct_string_view seg0 = *it0; + decode_view dseg0 = *seg0; + boost::core::string_view seg1 = *it1; + if (dseg0 == seg1) + { + ++it0; + ++it1; + } + else + { + break; + } +} +bool route_match = it0 == end0 && it1 == end1; +assert(route_match); ---- @@ -162,9 +239,14 @@ can be used to decode the data into a buffer. Like the free-function `encode`, decoding options and the string token can be customized. +// snippet_encoding_14 [source,cpp] ---- -// snippet_encoding_14 +pct_string_view s = "user/john%20doe/profile%20photo.jpg"; +std::string buf; +buf.resize(s.decoded_size()); +s.decode({}, string_token::assign_to(buf)); +assert(buf == "user/john doe/profile photo.jpg"); ---- diff --git a/doc/modules/ROOT/pages/urls/segments.adoc b/doc/modules/ROOT/pages/urls/segments.adoc index df1b47eb..9e1ec433 100644 --- a/doc/modules/ROOT/pages/urls/segments.adoc +++ b/doc/modules/ROOT/pages/urls/segments.adoc @@ -10,42 +10,48 @@ -== Segments += Segments Hierarchical schemes often interpret the path as a slash-delimited sequence of percent-encoded strings called segments. In this library the segments may be accessed using these separate, bidirectional view types which reference the underlying URL: -// [table Segments Types [ -// [Type] -// [Accessor] -// [Description] -// ][ -// [__segments_view__] -// [[link url.ref.boost__urls__url_view_base.segments `segments`]] -// [ -// A read-only range of decoded segments. -// ] -// ][ -// [__segments_ref__] -// [[link url.ref.boost__urls__url_base.segments `segments`]] -// [ -// A modifiable range of decoded segments. -// ] -// ][ -// [__segments_encoded_view__] -// [[link url.ref.boost__urls__url_view_base.encoded_segments `encoded_segments`]] -// [ -// A read-only range of segments. -// ] -// ][ -// [__segments_encoded_ref__] -// [[link url.ref.boost__urls__url_base.encoded_segments `encoded_segments`]] -// [ -// A modifiable range of segments. -// ] -// ]] +[cols="a,a,a"] +|=== +// Headers +|Type|Accessor|Description + +// Row 1, Column 1 +|`segments_view` +// Row 1, Column 2 +|`segments` +// Row 1, Column 3 +|A read-only range of decoded segments. + +// Row 2, Column 1 +|`segments_ref` +// Row 2, Column 2 +|`segments` +// Row 2, Column 3 +|A modifiable range of decoded segments. + +// Row 3, Column 1 +|`segments_encoded_view` +// Row 3, Column 2 +|`encoded_segments` +// Row 3, Column 3 +|A read-only range of segments. + +// Row 4, Column 1 +|`segments_encoded_ref` +// Row 4, Column 2 +|`encoded_segments` +// Row 4, Column 3 +|A modifiable range of segments. + +|=== + First we observe these invariants about paths and segments: @@ -73,9 +79,17 @@ we define this function `segs` which returns a list of strings corresponding to the elements in a container of segments: +// code_container_4_1 [source,cpp] ---- -// code_container_4_1 +auto segs( core::string_view s ) -> std::list< std::string > +{ + url_view u( s ); + std::list< std::string > seq; + for( auto seg : u.encoded_segments() ) + seq.push_back( seg.decode() ); + return seq; +} ---- @@ -83,59 +97,97 @@ In this table we show the result of invoking `segs` with different paths. This demonstrates how the library achieves the invariants described above for various interesting cases: -// [table Segments [ -// [s] -// [`segs( s )`] -// [absolute] -// ][ -// [`""`] -// [`{ }`] -// [] -// ][ -// [`"/"`] -// [`{ }`] -// [yes] -// ][ -// [`"./"`] -// [`{ "" }`] -// [] -// ][ -// [`"usr"`] -// [`{ "usr" }`] -// [] -// ][ -// [`"./usr"`] -// [`{ "usr" }`] -// [] -// ][ -// [`"/index.htm"`] -// [`{ "index.htm" }`] -// [yes] -// ][ -// [`"/images/cat-pic.gif"`] -// [`{ "images", "cat-pic.gif" }`] -// [yes] -// ][ -// [`"images/cat-pic.gif"`] -// [`{ "images", "cat-pic.gif" }`] -// [] -// ][ -// [`"/fast//query"`] -// [`{ "fast", "", "query" }`] -// [yes] -// ][ -// [`"fast//"`] -// [`{ "fast", "", "" }`] -// [] -// ][ -// [`"/./"`] -// [`{ "" }`] -// [yes] -// ][ -// [`".//"`] -// [`{ "", "" }`] -// [] -// ]] +[cols="a,a,a"] +|=== +// Headers +|s|`segs( s )`|absolute + +// Row 1, Column 1 +|`""` +// Row 1, Column 2 +|`{ }` +// Row 1, Column 3 +| + +// Row 2, Column 1 +|`"/"` +// Row 2, Column 2 +|`{ }` +// Row 2, Column 3 +|yes + +// Row 3, Column 1 +|`"./"` +// Row 3, Column 2 +|`{ "" }` +// Row 3, Column 3 +| + +// Row 4, Column 1 +|`"usr"` +// Row 4, Column 2 +|`{ "usr" }` +// Row 4, Column 3 +| + +// Row 5, Column 1 +|`"./usr"` +// Row 5, Column 2 +|`{ "usr" }` +// Row 5, Column 3 +| + +// Row 6, Column 1 +|`"/index.htm"` +// Row 6, Column 2 +|`{ "index.htm" }` +// Row 6, Column 3 +|yes + +// Row 7, Column 1 +|`"/images/cat-pic.gif"` +// Row 7, Column 2 +|`{ "images", "cat-pic.gif" }` +// Row 7, Column 3 +|yes + +// Row 8, Column 1 +|`"images/cat-pic.gif"` +// Row 8, Column 2 +|`{ "images", "cat-pic.gif" }` +// Row 8, Column 3 +| + +// Row 9, Column 1 +|`"/fast//query"` +// Row 9, Column 2 +|`{ "fast", "", "query" }` +// Row 9, Column 3 +|yes + +// Row 10, Column 1 +|`"fast//"` +// Row 10, Column 2 +|`{ "fast", "", "" }` +// Row 10, Column 3 +| + +// Row 11, Column 1 +|`"/./"` +// Row 11, Column 2 +|`{ "" }` +// Row 11, Column 3 +|yes + +// Row 12, Column 1 +|`".//"` +// Row 12, Column 2 +|`{ "", "" }` +// Row 12, Column 3 +| + +|=== + This implies that two paths may map to the same sequence of segments . In the paths `"usr"` and `"./usr"`, the `"./"` @@ -212,59 +264,97 @@ various modifications to a URL containing a path: // set_encoded_path() // edit_segments() -// [table Path Operations [ -// [URL] -// [Operation] -// [Result] -// ][ -// [`"info:kyle:xy"`] -// [`remove_scheme()`] -// [`"kyle%3Axy"`] -// ][ -// [`"kyle%3Axy"`] -// [`set_scheme( "gopher" )`] -// [`"gopher:kyle:xy"`] -// ][ -// [`"http://www.example.com//kyle:xy"`] -// [`remove_authority()`] -// [`"http:/.//kyle:xy"`] -// ][ -// [`"//www.example.com//kyle:xy"`] -// [`remove_authority()`] -// [`"/.//kyle:xy"`] -// ][ -// [`"http://www.example.com//kyle:xy"`] -// [`remove_origin()`] -// [`"/.//kyle:xy"`] -// ][ -// [`"info:kyle:xy"`] -// [`remove_origin()`] -// [`"kyle%3Axy"`] -// ][ -// [`"/kyle:xy"`] -// [`set_path_absolute( false )`] -// [`"kyle%3Axy"`] -// ][ -// [`"kyle%3Axy"`] -// [`set_path_absolute( true )`] -// [`"/kyle:xy"`] -// ][ -// [`""`] -// [`set_path( "kyle:xy" )`] -// [`"kyle%3Axy"`] -// ][ -// [`""`] -// [`set_path( "//foo/fighters.txt" )`] -// [`"/.//foo/fighters.txt"`] -// ][ -// [`"my%3Asharona/billa%3Abong"`] -// [`normalize()`] -// [`"my%3Asharona/billa:bong"`] -// ][ -// [`"./my:sharona"`] -// [`normalize()`] -// [`"my%3Asharona"`] -// ]] +[cols="a,a,a"] +|=== +// Headers +|URL|Operation|Result + +// Row 1, Column 1 +|`"info:kyle:xy"` +// Row 1, Column 2 +|`remove_scheme()` +// Row 1, Column 3 +|`"kyle%3Axy"` + +// Row 2, Column 1 +|`"kyle%3Axy"` +// Row 2, Column 2 +|`set_scheme( "gopher" )` +// Row 2, Column 3 +|`"gopher:kyle:xy"` + +// Row 3, Column 1 +|`"http://www.example.com//kyle:xy"` +// Row 3, Column 2 +|`remove_authority()` +// Row 3, Column 3 +|`"http:/.//kyle:xy"` + +// Row 4, Column 1 +|`"//www.example.com//kyle:xy"` +// Row 4, Column 2 +|`remove_authority()` +// Row 4, Column 3 +|`"/.//kyle:xy"` + +// Row 5, Column 1 +|`"http://www.example.com//kyle:xy"` +// Row 5, Column 2 +|`remove_origin()` +// Row 5, Column 3 +|`"/.//kyle:xy"` + +// Row 6, Column 1 +|`"info:kyle:xy"` +// Row 6, Column 2 +|`remove_origin()` +// Row 6, Column 3 +|`"kyle%3Axy"` + +// Row 7, Column 1 +|`"/kyle:xy"` +// Row 7, Column 2 +|`set_path_absolute( false )` +// Row 7, Column 3 +|`"kyle%3Axy"` + +// Row 8, Column 1 +|`"kyle%3Axy"` +// Row 8, Column 2 +|`set_path_absolute( true )` +// Row 8, Column 3 +|`"/kyle:xy"` + +// Row 9, Column 1 +|`""` +// Row 9, Column 2 +|`set_path( "kyle:xy" )` +// Row 9, Column 3 +|`"kyle%3Axy"` + +// Row 10, Column 1 +|`""` +// Row 10, Column 2 +|`set_path( "//foo/fighters.txt" )` +// Row 10, Column 3 +|`"/.//foo/fighters.txt"` + +// Row 11, Column 1 +|`"my%3Asharona/billa%3Abong"` +// Row 11, Column 2 +|`normalize()` +// Row 11, Column 3 +|`"my%3Asharona/billa:bong"` + +// Row 12, Column 1 +|`"./my:sharona"` +// Row 12, Column 2 +|`normalize()` +// Row 12, Column 3 +|`"my%3Asharona"` + +|=== + For the full set of containers and functions for operating on paths and segments, please consult the reference. diff --git a/doc/modules/ROOT/pages/urls/stringtoken.adoc b/doc/modules/ROOT/pages/urls/stringtoken.adoc index f1036079..e3ad3264 100644 --- a/doc/modules/ROOT/pages/urls/stringtoken.adoc +++ b/doc/modules/ROOT/pages/urls/stringtoken.adoc @@ -8,10 +8,10 @@ // -== String Token += String Token Functions which perform percent-decoding return values using -`std::string` when called without special arguments. This is +https://en.cppreference.com/w/cpp/string/basic_string[`std::string`,window=blank_] when called without special arguments. This is the best default for ergonomics, and a good enough default for performance considering that many decoded strings fit in the small buffer available to most standard implementations. @@ -23,7 +23,7 @@ algorithms acquire and store data in strings, for example: * Appending to existing strings The library provides a special customization mechanism called -`StringToken` to control how algorithms which require an output +__StringToken__ to control how algorithms which require an output buffer acquire their storage. The signature diff --git a/doc/qbk/6.0.examples.qbk b/doc/qbk/6.0.examples.qbk index 99b2cbb7..29a868fa 100644 --- a/doc/qbk/6.0.examples.qbk +++ b/doc/qbk/6.0.examples.qbk @@ -4,112 +4,20 @@ Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - Official repository: https://github.com/cppalliance/json + Official repository: https://github.com/cppalliance/url ] [/-----------------------------------------------------------------------------] [section Examples] -[section QR Code] - -A QR code is a machine-readable two-dimensional barcode. They might contain data -for a identifier or a URL to a website. - -This example shows how to construct and modify URLs to consume a third party API to -generate QR Codes. - -[example_qrcode] -[endsect] - -[section Finicky] - -This example shows how to classify URLs according to a set of rules. It is -inspired by [@https://github.com/johnste/finicky Finicky] application. - -The URLs are classified and redirected to a browser according to their -category. See the example `config.json` file. - -[example_finicky] -[endsect] - -[section mailto URLs] - -`mailto` is a URL scheme for email addresses. `mailto` URL are used on websites -to allow users to send an email to a specific address directly from an HTML document. - -This example parses a mailto URL into a new view type and prints its components to -standard output. - -[example_mailto] -[endsect] - -[section Magnet Link] - -`magnet` is a URL scheme for identifying files by their content. These files are -usually identified by cryptographic hash value. - -Magnet links are useful in peer-to-peer file sharing networks because they allow -resources to be referred to without the need for a continuously available host.. - -This example parses a magnet link into a new view type and prints its components to -standard output. - -[example_magnet] -[endsect] - -[section File Router] - -This example defines a router that associates URL paths to a directory in the filesystem. If -the specified route matches and the file exists, the example prints its contents to standard output. - -[example_file_router] -[endsect] - -[section Router] - -This example defines a router for URL paths. If the specified route matches one of the existing -routes, the example executes the underlying callback function. - -[example_router] -[endsect] - -[section Sanitizing URLs] - -This example parses a non-strict or invalid URL -into path components according to its delimiters. -This pattern can be adapted to the requirements of other -applications. - -Once the non-strict components are determined, a new URL is -created and its parts are set with the `set_encoded_X` -functions, which will encode any invalid chars accordingly. - -This sort of transformation is useful in applications that are -extremely loose in what kinds of URLs they accept, such as -browsers. The sanitized URL can later be used for machine-to-machine -communication. - -Using non-strict URLs directly is a security concern in -machine-to-machine communication, is ambiguous, and also -involve an extra cost for the transformations. - -Different transformations are required by different applications to -construct a valid URL appropriate for machine-to-machine communication. -For instance, if an invalid relative reference includes something that -looks like a host in the first path segment, browsers usually interpret -that as the host with an implicit "https" scheme. Other applications -also have other implicit schemes. - -The example also identifies whether the input url is already valid. -It includes diagnostics that can be used to help the user determine -if a URL is invalid and why it's invalid. - -Once all transformations are applied, the result is a URL -appropriate for machine-to-machine communication. - -[example_sanitize_url] -[endsect] +[include 6.1.qrcode.qbk] +[include 6.2.finicky.qbk] +[include 6.3.mailto.qbk] +[include 6.4.magnet-link.qbk] +[include 6.5.file-router.qbk] +[include 6.6.router.qbk] +[include 6.7.sanitize.qbk] [endsect] diff --git a/doc/qbk/6.1.qrcode.qbk b/doc/qbk/6.1.qrcode.qbk new file mode 100644 index 00000000..607b8e35 --- /dev/null +++ b/doc/qbk/6.1.qrcode.qbk @@ -0,0 +1,21 @@ +[/ + Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + Official repository: https://github.com/cppalliance/url +] + +[/-----------------------------------------------------------------------------] + +[section QR Code] + +A QR code is a machine-readable two-dimensional barcode. They might contain data +for a identifier or a URL to a website. + +This example shows how to construct and modify URLs to consume a third party API to +generate QR Codes. + +[example_qrcode] +[endsect] diff --git a/doc/qbk/6.2.finicky.qbk b/doc/qbk/6.2.finicky.qbk new file mode 100644 index 00000000..c718ea2d --- /dev/null +++ b/doc/qbk/6.2.finicky.qbk @@ -0,0 +1,21 @@ +[/ + Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + Official repository: https://github.com/cppalliance/url +] + +[/-----------------------------------------------------------------------------] + +[section Finicky] + +This example shows how to classify URLs according to a set of rules. It is +inspired by [@https://github.com/johnste/finicky Finicky] application. + +The URLs are classified and redirected to a browser according to their +category. See the example `config.json` file. + +[example_finicky] +[endsect] diff --git a/doc/qbk/6.3.mailto.qbk b/doc/qbk/6.3.mailto.qbk new file mode 100644 index 00000000..ebbdc272 --- /dev/null +++ b/doc/qbk/6.3.mailto.qbk @@ -0,0 +1,21 @@ +[/ + Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + Official repository: https://github.com/cppalliance/url +] + +[/-----------------------------------------------------------------------------] + +[section mailto URLs] + +`mailto` is a URL scheme for email addresses. `mailto` URL are used on websites +to allow users to send an email to a specific address directly from an HTML document. + +This example parses a mailto URL into a new view type and prints its components to +standard output. + +[example_mailto] +[endsect] diff --git a/doc/qbk/6.4.magnet-link.qbk b/doc/qbk/6.4.magnet-link.qbk new file mode 100644 index 00000000..9bf7e080 --- /dev/null +++ b/doc/qbk/6.4.magnet-link.qbk @@ -0,0 +1,24 @@ +[/ + Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + Official repository: https://github.com/cppalliance/url +] + +[/-----------------------------------------------------------------------------] + +[section Magnet Link] + +`magnet` is a URL scheme for identifying files by their content. These files are +usually identified by cryptographic hash value. + +Magnet links are useful in peer-to-peer file sharing networks because they allow +resources to be referred to without the need for a continuously available host.. + +This example parses a magnet link into a new view type and prints its components to +standard output. + +[example_magnet] +[endsect] diff --git a/doc/qbk/6.5.file-router.qbk b/doc/qbk/6.5.file-router.qbk new file mode 100644 index 00000000..c1b1f412 --- /dev/null +++ b/doc/qbk/6.5.file-router.qbk @@ -0,0 +1,19 @@ +[/ + Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + Official repository: https://github.com/cppalliance/url +] + +[/-----------------------------------------------------------------------------] + +[section File Router] + +This example defines a router that associates URL paths to a directory in the filesystem. If +the specified route matches and the file exists, the example prints its contents to standard output. + +[example_file_router] +[endsect] + diff --git a/doc/qbk/6.6.router.qbk b/doc/qbk/6.6.router.qbk new file mode 100644 index 00000000..ad62148d --- /dev/null +++ b/doc/qbk/6.6.router.qbk @@ -0,0 +1,19 @@ +[/ + Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + Official repository: https://github.com/cppalliance/url +] + +[/-----------------------------------------------------------------------------] + +[section Router] + +This example defines a router for URL paths. If the specified route matches one of the existing +routes, the example executes the underlying callback function. + +[example_router] +[endsect] + diff --git a/doc/qbk/6.7.sanitize.qbk b/doc/qbk/6.7.sanitize.qbk new file mode 100644 index 00000000..77e63836 --- /dev/null +++ b/doc/qbk/6.7.sanitize.qbk @@ -0,0 +1,47 @@ +[/ + Copyright (c) 2023 Alan de Freitas (alandefreitas@gmail.com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + Official repository: https://github.com/cppalliance/url +] + +[/-----------------------------------------------------------------------------] + +[section Sanitizing URLs] + +This example parses a non-strict or invalid URL +into path components according to its delimiters. +This pattern can be adapted to the requirements of other +applications. + +Once the non-strict components are determined, a new URL is +created and its parts are set with the `set_encoded_X` +functions, which will encode any invalid chars accordingly. + +This sort of transformation is useful in applications that are +extremely loose in what kinds of URLs they accept, such as +browsers. The sanitized URL can later be used for machine-to-machine +communication. + +Using non-strict URLs directly is a security concern in +machine-to-machine communication, is ambiguous, and also +involve an extra cost for the transformations. + +Different transformations are required by different applications to +construct a valid URL appropriate for machine-to-machine communication. +For instance, if an invalid relative reference includes something that +looks like a host in the first path segment, browsers usually interpret +that as the host with an implicit "https" scheme. Other applications +also have other implicit schemes. + +The example also identifies whether the input url is already valid. +It includes diagnostics that can be used to help the user determine +if a URL is invalid and why it's invalid. + +Once all transformations are applied, the result is a URL +appropriate for machine-to-machine communication. + +[example_sanitize_url] +[endsect]