diff --git a/doc/qbk/05_00_io.qbk b/doc/qbk/05_00_io.qbk index 37b222b5..116989ce 100644 --- a/doc/qbk/05_00_io.qbk +++ b/doc/qbk/05_00_io.qbk @@ -10,6 +10,11 @@ [/-----------------------------------------------------------------------------] [section Input/Output] +[block''''''] + +The library provides parsing and serialization algorithms to transform +JSON to and from the __value__ container as needed. This is accomplished +through free functions and classes, described as follows. [include 05_01_parsing.qbk] [include 05_02_serializing.qbk] diff --git a/doc/qbk/05_01_parsing.qbk b/doc/qbk/05_01_parsing.qbk index 2567535c..2abee1cc 100644 --- a/doc/qbk/05_01_parsing.qbk +++ b/doc/qbk/05_01_parsing.qbk @@ -58,8 +58,8 @@ functions and types to assist with parsing: [ A low level building block used for efficiently building a __value__. The parsers use this internally, and users - may use it to adapt foreign parsers to use this library's - containers. + may use it to adapt foreign parsers to produce this + library's containers. ] ]] @@ -105,7 +105,7 @@ with __parse_options__ is possible: [/-----------------------------------------------------------------------------] -[heading Parser Instance] +[heading Parser] Instances of __parser__ and __stream_parser__ offer functionality beyond what is available when using the __parse__ free functions: @@ -122,39 +122,17 @@ an instance of __parser__ or __stream_parser__, this temporary storage can be reused when parsing more than one JSON, reducing the total number of dynamic memory allocations. -To use the __stream_parser__, declare an instance. Then call -[link json.ref.boost__json__stream_parser.write `write`] -zero or more times with successive buffers representing the input JSON. -When there are no more buffers, call -[link json.ref.boost__json__stream_parser.finish `finish`]. -The function -[link json.ref.boost__json__stream_parser.done `done`]. -returns `true` after a successful call to `write` or `finish` -if parsing is complete. This example persists the parser instance -in a class member to reuse across calls: +To use the __parser__, declare an instance. Then call +[link json.ref.boost__json__parser.write `write`] +once with the buffer containing representing the input JSON. +Finally, call +[link json.ref.boost__json__parser.release `release`] +to take ownership of the resulting __value__ upon success. +This example persists the parser instance in a class member +to reuse across calls: [doc_parsing_7] -The parser interface allows a -[@https://en.wikipedia.org/wiki/Online_algorithm ['streaming algorithm]]; -it is possible to parse a JSON incrementally, in pieces. The entire -input JSON does not need to be loaded into memory at once first. This -interface requires more function calls than with the parse free functions. -A network server can use the streaming interface to process incoming -JSON in fixed-size amounts, providing these benefits: - -* CPU consumption per I/O cycle is bounded -* Memory consumption per I/O cycle is bounded -* Jitter, unfairness, and latency is reduced -* Less total memory is required to process the full input - -In the following example a JSON is parsed from standard input a line -at a time. Error codes are used instead. The function -[link json.ref.boost__json__stream_parser.finish `finish`] -is used to indicate the end of the input: - -[doc_parsing_8] - Sometimes a protocol may have a JSON text followed by data that is in a different format or specification. The JSON portion can still be parsed by using the function @@ -162,31 +140,27 @@ by using the function Upon success, the return value will indicate the number of characters consumed from the input, which will exclude the non-JSON characters: -[doc_parsing_9] +[doc_parsing_8] The parser instance may be constructed with parse options which allow some non-standard JSON extensions to be recognized: -[doc_parsing_10] +[doc_parsing_9] -[/-----------------------------------------------------------------------------] +[heading Streaming Parser] -[heading Parser Instance] +The __stream_parser__ implements a +[@https://en.wikipedia.org/wiki/Online_algorithm ['streaming algorithm]]; +it allows incremental processing of large JSON inputs using one or more +contiguous character buffers. The entire input JSON does not need to be +loaded into memory at once. A network server can use the streaming +interface to process incoming JSON in fixed-size amounts, providing +these benefits: -Instances of __parser__ and __stream_parser__ offer functionality beyond -what is available when using the __parse__ free functions: - -* More control over memory -* Streaming API, parse input JSON incrementally -* Improved performance when parsing multiple JSONs -* Ignore non-JSON content after the end of a JSON - -The parser implementation uses temporary storage space to accumulate -values during parsing. When using the __parse__ free functions, this -storage is allocated and freed in each call. However, by declaring -an instance of __parser__ or __stream_parser__, this temporary storage -can be reused when parsing more than one JSON, reducing the total -number of dynamic memory allocations. +* CPU consumption per I/O cycle is bounded +* Memory consumption per I/O cycle is bounded +* Jitter, unfairness, and latency is reduced +* Less total memory is required to process the full input To use the __stream_parser__, declare an instance. Then call [link json.ref.boost__json__stream_parser.write `write`] @@ -194,45 +168,15 @@ zero or more times with successive buffers representing the input JSON. When there are no more buffers, call [link json.ref.boost__json__stream_parser.finish `finish`]. The function -[link json.ref.boost__json__stream_parser.done `done`]. +[link json.ref.boost__json__stream_parser.done `done`] returns `true` after a successful call to `write` or `finish` -if parsing is complete. This example persists the parser instance -in a class member to reuse across calls: - -[doc_parsing_7] - -The parser interface allows a -[@https://en.wikipedia.org/wiki/Online_algorithm ['streaming algorithm]]; -it is possible to parse a JSON incrementally, in pieces. The entire -input JSON does not need to be loaded into memory at once first. This -interface requires more function calls than with the parse free functions. -A network server can use the streaming interface to process incoming -JSON in fixed-size amounts, providing these benefits: - -* CPU consumption per I/O cycle is bounded -* Memory consumption per I/O cycle is bounded -* Jitter, unfairness, and latency is reduced -* Less total memory is required to process the full input +if parsing is complete. In the following example a JSON is parsed from standard input a line at a time. Error codes are used instead. The function [link json.ref.boost__json__stream_parser.finish `finish`] is used to indicate the end of the input: -[doc_parsing_8] - -Sometimes a protocol may have a JSON text followed by data that is in -a different format or specification. The JSON portion can still be parsed -by using the function -[link json.ref.boost__json__parser.write_some `write_some`]. -Upon success, the return value will indicate the number of characters -consumed from the input, which will exclude the non-JSON characters: - -[doc_parsing_9] - -The parser instance may be constructed with parse options which -allow some non-standard JSON extensions to be recognized: - [doc_parsing_10] [/-----------------------------------------------------------------------------] @@ -258,11 +202,13 @@ for this temporary storage area may be specified. Otherwise, the default memory resource is used. In addition to a memory resource, the parser can make use of a caller-owned buffer for temporary storage. This can help avoid dynamic allocations for small inputs. -The following example uses a 4kb temporary buffer for the parser, -and falls back to the default memory resource if needed: +The following example uses a four kilobyte temporary buffer for +the parser, and falls back to the default memory resource if needed: [doc_parsing_12] +[section Avoiding Dynamic Allocations] + Through careful specification of buffers and memory resources, it is possible to eliminate all dynamic allocation completely when parsing JSON, for the case where the entire JSON is available in @@ -270,6 +216,8 @@ a single character buffer, as shown here: [doc_parsing_13] +[endsect] + [/-----------------------------------------------------------------------------] [heading Custom Parsers] diff --git a/test/doc_parsing.cpp b/test/doc_parsing.cpp index 114330de..820732d9 100644 --- a/test/doc_parsing.cpp +++ b/test/doc_parsing.cpp @@ -103,7 +103,7 @@ class connection public: void do_read( string_view s ) // called for each complete message from the network { - p_.reset(); // start parsing a new JSON + p_.reset(); // start parsing a new JSON using the default resource p_.write( s ); // parse the buffer, using exceptions to indicate error do_rpc( p_.release() ); // process the command } @@ -114,7 +114,37 @@ public: //---------------------------------------------------------- +static void set2() { + +//---------------------------------------------------------- +{ //[doc_parsing_8 +stream_parser p; +error_code ec; +string_view s = "[1,2,3] %HOME%"; +std::size_t n = p.write_some( s, ec ); +assert( ! ec && p.done() && n == 8 ); +s = s.substr( n ); +value jv = p.release(); +assert( s == "%HOME%" ); +//] +} +//---------------------------------------------------------- +{ +//[doc_parsing_9 +parse_options opt; // All extensions default to off +opt.allow_comments = true; // Permit C and C++ style comments to appear in whitespace +opt.allow_trailing_commas = true; // Allow an additional trailing comma in object and array element lists +opt.allow_invalid_utf8 = true; // Skip utf-8 validation of keys and strings +stream_parser p( storage_ptr(), opt ); // The stream_parser will use the options +//] +} +//---------------------------------------------------------- + +} // set2 + +//---------------------------------------------------------- +//[doc_parsing_10 value read_json( std::istream& is, error_code& ec ) { stream_parser p; @@ -134,31 +164,8 @@ value read_json( std::istream& is, error_code& ec ) //---------------------------------------------------------- -static void set2() { +static void set3() { -//---------------------------------------------------------- -{ -//[doc_parsing_9 -stream_parser p; -error_code ec; -string_view s = "[1,2,3] %HOME%"; -std::size_t n = p.write_some( s, ec ); -assert( ! ec && p.done() && n == 8 ); -s = s.substr( n ); -value jv = p.release(); -assert( s == "%HOME%" ); -//] -} -//---------------------------------------------------------- -{ -//[doc_parsing_10 -parse_options opt; // All extensions default to off -opt.allow_comments = true; // Permit C and C++ style comments to appear in whitespace -opt.allow_trailing_commas = true; // Allow an additional trailing comma in object and array element lists -opt.allow_invalid_utf8 = true; // Skip utf-8 validation of keys and strings -stream_parser p( storage_ptr(), opt ); // The stream_parser will use the options -//] -} //---------------------------------------------------------- { //[doc_parsing_11 @@ -185,7 +192,7 @@ stream_parser p( } //---------------------------------------------------------- -} // set2 +} // set3 //---------------------------------------------------------- @@ -210,17 +217,15 @@ template< class Handler > void do_rpc( string_view s, Handler&& handler ) { unsigned char temp[ 4096 ]; // The stream_parser will use this storage for its temporary needs - stream_parser p( // Construct a strict stream_parser using the temp buffer and no dynamic memory - get_null_resource(), // The null resource guarantees we will never dynamically allocate + parser p( // Construct a strict parser using the temp buffer and no dynamic memory + get_null_resource(), // The null resource never dynamically allocates memory parse_options(), // Default constructed parse options allow only standard JSON temp ); unsigned char buf[ 16384 ]; // Now we need a buffer to hold the actual JSON values static_resource mr2( buf ); // The static resource is monotonic, using only a caller-provided buffer p.reset( &mr2 ); // Use the static resource for producing the value - p.write( s ); // Parse the entire string we received from the network client - p.finish(); // Inform the stream_parser that the complete input has been provided // Retrieve the value and invoke the handler with it. // The value will use `buf` for storage. The handler @@ -240,6 +245,7 @@ public: { (void)&set1; (void)&set2; + (void)&set3; } };