// Copyright (C) 2020 T. Zachary Laine // // Distributed under the Boost Software License, Version 1.0. (See // accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) //[ extended_callback_parsing_json_example #include #include #include #include #include namespace json { namespace bp = ::boost::parser; using namespace bp::literals; template struct excessive_nesting : std::runtime_error { excessive_nesting(Iter it) : runtime_error("excessive_nesting"), iter(it) {} Iter iter; }; struct global_state { int recursive_open_count = 0; int max_recursive_open_count = 0; }; struct double_escape_locals { int first_surrogate = 0; }; bp::rule const ws = "whitespace"; bp::rule const string_char = "code point (code points <= U+001F must be escaped)"; bp::rule const hex_4 = "four hexadecimal digits"; bp::rule const escape_seq = "\\uXXXX hexadecimal escape sequence"; bp::rule const escape_double_seq = "\\uXXXX hexadecimal escape sequence"; bp::rule const single_escaped_char = "'\"', '\\', '/', 'b', 'f', 'n', 'r', or 't'"; bp::callback_rule const null = "null"; // Since we don't create polymorphic values in this parse, we need to be // able to report that we parsed a bool, so we need a callback rule for // this. bp::callback_rule const bool_p = "boolean"; bp::callback_rule const string = "string"; bp::callback_rule const number = "number"; // object_element is broken up into the key (object_element_key) and the // whole thing (object_element). This was done because the value after // the ':' may have many parts. It may be an array, for example. This // implies that we need to report that we have the string part of the // object-element, and that the rest -- the value -- is coming. bp::callback_rule const object_element_key = "string"; bp::rule const object_element = "object-element"; // object gets broken up too, to enable the reporting of the beginning and // end of the object when '{' or '}' is parsed, respectively. The same // thing is done for array, below. bp::callback_rule const object_open = "'{'"; bp::callback_rule const object_close = "'}'"; bp::rule const object = "object"; bp::callback_rule const array_open = "'['"; bp::callback_rule const array_close = "']'"; bp::rule const array = "array"; // value no longer produces an attribute, and it has no callback either. // Each individual possible kind of value (string, array, etc.) gets // reported separately. bp::rule const value = "value"; // Since we use these tag types as function parameters in the callbacks, // they need to be complete types. class null_tag {}; class bool_tag {}; class string_tag {}; class number_tag {}; class object_element_key_tag {}; class object_open_tag {}; class object_close_tag {}; class array_open_tag {}; class array_close_tag {}; auto const ws_def = '\x09'_l | '\x0a' | '\x0d' | '\x20'; auto first_hex_escape = [](auto & ctx) { auto & locals = _locals(ctx); uint32_t const cu = _attr(ctx); if (!boost::parser::detail::text::high_surrogate(cu)) _pass(ctx) = false; else locals.first_surrogate = cu; }; auto second_hex_escape = [](auto & ctx) { auto & locals = _locals(ctx); uint32_t const cu = _attr(ctx); if (!boost::parser::detail::text::low_surrogate(cu)) { _pass(ctx) = false; } else { uint32_t const high_surrogate_min = 0xd800; uint32_t const low_surrogate_min = 0xdc00; uint32_t const surrogate_offset = 0x10000 - (high_surrogate_min << 10) - low_surrogate_min; uint32_t const first_cu = locals.first_surrogate; _val(ctx) = (first_cu << 10) + cu + surrogate_offset; } }; auto const hex_4_def = boost::parser::uint_.base<16>().digits<4>(); auto const escape_seq_def = "\\u" > hex_4; auto const escape_double_seq_def = escape_seq[first_hex_escape] >> escape_seq[second_hex_escape]; bp::symbols const single_escaped_char_def = { {"\"", 0x0022u}, {"\\", 0x005cu}, {"/", 0x002fu}, {"b", 0x0008u}, {"f", 0x000cu}, {"n", 0x000au}, {"r", 0x000du}, {"t", 0x0009u}}; auto const string_char_def = escape_double_seq | escape_seq | ('\\'_l > single_escaped_char) | (bp::cp - bp::char_(0x0000u, 0x001fu)); auto const null_def = "null"_l; auto const bool_p_def = bp::bool_; auto const string_def = bp::lexeme['"' >> *(string_char - '"') > '"']; auto parse_double = [](auto & ctx) { auto const cp_range = _attr(ctx); auto cp_first = cp_range.begin(); auto const cp_last = cp_range.end(); auto const result = bp::prefix_parse(cp_first, cp_last, bp::double_); if (result) { _val(ctx) = *result; } else { // This would be more efficient if we used // boost::container::small_vector, or std::inplace_vector from // C++26. std::vector chars(cp_first, cp_last); auto const chars_first = &*chars.begin(); auto chars_last = chars_first + chars.size(); _val(ctx) = std::strtod(chars_first, &chars_last); } }; auto const number_def = bp::raw[bp::lexeme [-bp::char_('-') >> (bp::char_('1', '9') >> *bp::digit | bp::char_('0')) >> -(bp::char_('.') >> +bp::digit) >> -(bp::char_("eE") >> -bp::char_("+-") >> +bp::digit)]] [parse_double]; // The object_element_key parser is exactly the same as the string parser. // Note that we did *not* use string here, though; we used string_def. If // we had used string, its callback would have been called first, and // worse still, since it moves its attribute, the callback for // object_element_key would always report the empty string, because the // string callback would have consumed it first. auto const object_element_key_def = string_def; auto const object_element_def = object_element_key > ':' > value; // This is a very straightforward way to write object_def when we know we // don't care about attribute-generating (non-callback) parsing. If we // wanted to support both modes in one parser definition, we could have // written: // auto const object_open_def = eps; // auto const object_close_def = eps; // auto const object_def = '{' >> object_open >> // -(object_element % ',') > // '}' >> object_close; auto const object_open_def = '{'_l; auto const object_close_def = '}'_l; auto const object_def = object_open >> -(object_element % ',') > object_close; auto const array_open_def = '['_l; auto const array_close_def = ']'_l; auto const array_def = array_open >> -(value % ',') > array_close; auto const value_def = number | bool_p | null | string | array | object; BOOST_PARSER_DEFINE_RULES( ws, hex_4, escape_seq, escape_double_seq, single_escaped_char, string_char, null, bool_p, string, number, object_element_key, object_element, object_open, object_close, object, array_open, array_close, array, value); // The parse function loses its attribute from the return type; now the // return type is just bool. template bool parse( std::string_view str, std::string_view filename, Callbacks const & callbacks, int max_recursion = 512) { auto const range = boost::parser::as_utf32(str); using iter_t = decltype(range.begin()); if (max_recursion <= 0) max_recursion = INT_MAX; global_state globals{0, max_recursion}; // This is a different error handler from the json.cpp example, just // to show different options. bp::stream_error_handler error_handler(filename); auto const parser = bp::with_error_handler( bp::with_globals(value, globals), error_handler); try { // This is identical to the parse() call in json.cpp, except that // it is callback_parse() instead, and it takes the callbacks // parameter. return bp::callback_parse(range, parser, ws, callbacks); } catch (excessive_nesting const & e) { std::string const message = "error: Exceeded maximum number (" + std::to_string(max_recursion) + ") of open arrays and/or objects"; bp::write_formatted_message( std::cout, filename, range.begin(), e.iter, range.end(), message); } return {}; } } std::string file_slurp(std::ifstream & ifs) { std::string retval; while (ifs) { char const c = ifs.get(); retval += c; } if (!retval.empty() && retval.back() == -1) retval.pop_back(); return retval; } // This is our callbacks-struct. It has a callback for each of the kinds of // callback rules in our parser. If one were missing, you'd get a pretty // nasty template instantiation error. Note that these are all const members; // callback_parse() takes the callbacks object by constant reference. struct json_callbacks { void operator()(json::null_tag) const { std::cout << "JSON null value\n"; } void operator()(json::bool_tag, bool b) const { indent(); std::cout << "JSON bool " << (b ? "true" : "false") << "\n"; } void operator()(json::string_tag, std::string s) const { indent(); std::cout << "JSON string \"" << s << "\"\n"; } void operator()(json::number_tag, double d) const { indent(); std::cout << "JSON number " << d << "\n"; } void operator()(json::object_element_key_tag, std::string key) const { indent(); std::cout << "JSON object element with key \"" << key << "\" and value...\n"; } void operator()(json::object_open_tag) const { indent(1); std::cout << "Beginning of JSON object.\n"; } void operator()(json::object_close_tag) const { indent(-1); std::cout << "End of JSON object.\n"; } void operator()(json::array_open_tag) const { indent(1); std::cout << "Beginning of JSON array.\n"; } void operator()(json::array_close_tag) const { indent(-1); std::cout << "End of JSON array.\n"; } void indent(int level_bump = 0) const { if (level_bump < 0) indent_.resize(indent_.size() - 2); std::cout << indent_; if (0 < level_bump) indent_ += " "; } mutable std::string indent_; }; int main(int argc, char * argv[]) { if (argc < 2) { std::cerr << "A filename to parse is required.\n"; exit(1); } std::ifstream ifs(argv[1]); if (!ifs) { std::cerr << "Unable to read file '" << argv[1] << "'.\n"; exit(1); } std::string const file_contents = file_slurp(ifs); bool success = json::parse(file_contents, argv[1], json_callbacks{}); if (success) { std::cout << "Parse successful!\n"; } else { std::cerr << "Parse failure.\n"; exit(1); } return 0; } //]