diff --git a/.travis.yml b/.travis.yml index c4ed79c0..60c8faf9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -118,6 +118,7 @@ jobs: env: - COMMENT=codecov.io - B2_CXXSTD=11 + - B2_CXXFLAGS="cxxflags=-fno-inline" - B2_TOOLSET=gcc-8 - B2_DEFINES="define=BOOST_DISABLE_ASSERTS define=BOOST_NO_STRESS_TEST=1" addons: *gcc-8 diff --git a/include/boost/json/assign_string.hpp b/include/boost/json/assign_string.hpp index 021da7c5..d3058fca 100644 --- a/include/boost/json/assign_string.hpp +++ b/include/boost/json/assign_string.hpp @@ -29,8 +29,8 @@ from_json( value const& v) { if(! v.is_string()) - throw system_error( - error::expected_string); + BOOST_THROW_EXCEPTION( + system_error(error::not_string)); auto& s= v.as_string(); t.assign(s.data(), s.size()); } diff --git a/include/boost/json/assign_vector.hpp b/include/boost/json/assign_vector.hpp index 788a8095..8d0ad4f7 100644 --- a/include/boost/json/assign_vector.hpp +++ b/include/boost/json/assign_vector.hpp @@ -31,8 +31,8 @@ from_json( value const& v) { if(! v.is_array()) - throw system_error( - error::expected_array); + BOOST_THROW_EXCEPTION( + system_error(error::not_array)); auto& arr = v.as_array(); t.resize(0); t.resize(arr.size()); diff --git a/include/boost/json/basic_parser.hpp b/include/boost/json/basic_parser.hpp index bd6fce15..92adae0e 100644 --- a/include/boost/json/basic_parser.hpp +++ b/include/boost/json/basic_parser.hpp @@ -40,6 +40,8 @@ class basic_parser state, stack_capacity> stack_; number::mantissa_type n_mant_; number::exponent_type n_exp_; + long u0_; + unsigned short u_; bool n_neg_; bool n_exp_neg_; bool is_key_; diff --git a/include/boost/json/detail/basic_parser.hpp b/include/boost/json/detail/basic_parser.hpp index 537f5568..af9f01e5 100644 --- a/include/boost/json/detail/basic_parser.hpp +++ b/include/boost/json/detail/basic_parser.hpp @@ -38,6 +38,56 @@ struct parser_base { return static_cast(c) < 32; } + + static + short + hex_digit(char c) noexcept + { + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'A' && c <= 'F') + return 10 + c - 'A'; + if (c >= 'a' && c <= 'f') + return 10 + c - 'a'; + return -1; + } + + static + int + utf8_encode( + char* dest, + unsigned long cp) + { + if(cp < 0x80) + { + dest[0] = static_cast(cp); + return 1; + } + + if(cp < 0x800) + { + dest[0] = static_cast( (cp >> 6) | 0xc0); + dest[1] = static_cast( (cp & 0x3f) | 0x80); + return 2; + } + + if(cp < 0x10000) + { + dest[0] = static_cast( (cp >> 12) | 0xe0); + dest[1] = static_cast(((cp >> 6) & 0x3f) | 0x80); + dest[2] = static_cast( (cp & 0x3f) | 0x80); + return 3; + } + + { + dest[0] = static_cast( (cp >> 18) | 0xf0); + dest[1] = static_cast(((cp >> 12) & 0x3f) | 0x80); + dest[2] = static_cast(((cp >> 6) & 0x3f) | 0x80); + dest[3] = static_cast( (cp & 0x3f) | 0x80); + return 4; + } + } + }; } // detail diff --git a/include/boost/json/detail/buffer.hpp b/include/boost/json/detail/buffer.hpp new file mode 100644 index 00000000..89f029e2 --- /dev/null +++ b/include/boost/json/detail/buffer.hpp @@ -0,0 +1,116 @@ +// +// Copyright (c) 2018-2019 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/vinniefalco/json +// + +#ifndef BOOST_JSON_DETAIL_BUFFER_HPP +#define BOOST_JSON_DETAIL_BUFFER_HPP + +#include +#include + +namespace boost { +namespace json { +namespace detail { + +// A simple string-like temporary static buffer +template +class buffer +{ + char buf_[N]; + unsigned int size_ = 0; + +public: + using size_type = unsigned int; + + buffer() = default; + + string_view + get() const noexcept + { + return {buf_, size_}; + } + + operator string_view() const noexcept + { + return get(); + } + + char const* + data() const noexcept + { + return buf_; + } + + size_type + size() const noexcept + { + return size_; + } + + size_type + max_size() const noexcept + { + return N; + } + + void + clear() noexcept + { + size_ = 0; + } + + void + push_back(char ch) noexcept + { + buf_[size_++] = ch; + } + + // append valid 32-bit code point as utf8 + void + append_utf8( + unsigned long cp) noexcept + { + auto dest = buf_ + size_; + if(cp < 0x80) + { + dest[0] = static_cast(cp); + size_ += 1; + return; + } + + if(cp < 0x800) + { + dest[0] = static_cast( (cp >> 6) | 0xc0); + dest[1] = static_cast( (cp & 0x3f) | 0x80); + size_ += 2; + return; + } + + if(cp < 0x10000) + { + dest[0] = static_cast( (cp >> 12) | 0xe0); + dest[1] = static_cast(((cp >> 6) & 0x3f) | 0x80); + dest[2] = static_cast( (cp & 0x3f) | 0x80); + size_ += 3; + } + + { + dest[0] = static_cast( (cp >> 18) | 0xf0); + dest[1] = static_cast(((cp >> 12) & 0x3f) | 0x80); + dest[2] = static_cast(((cp >> 6) & 0x3f) | 0x80); + dest[3] = static_cast( (cp & 0x3f) | 0x80); + size_ += 4; + } + } +}; + +} // detail +} // json +} // boost + +#endif diff --git a/include/boost/json/detail/stack.hpp b/include/boost/json/detail/stack.hpp index 9825c5ad..c809466a 100644 --- a/include/boost/json/detail/stack.hpp +++ b/include/boost/json/detail/stack.hpp @@ -147,7 +147,7 @@ public: } void - push_front(T const& t) + push(T const& t) { if(n_ < N) ::new(&base()[n_]) T(t); @@ -170,7 +170,7 @@ public: } void - pop_front() + pop() { BOOST_ASSERT(n_ > 0); if(! v_.empty()) diff --git a/include/boost/json/error.hpp b/include/boost/json/error.hpp index 8f2544af..4d97549f 100644 --- a/include/boost/json/error.hpp +++ b/include/boost/json/error.hpp @@ -32,44 +32,101 @@ using error_condition = boost::system::error_condition; /// Error codes returned by JSON operations enum class error { - /// The serialized JSON object contains a syntax error + /// syntax error syntax = 1, - /// Unexpected extra data encountered while parsing + /// extra data extra_data, - /// A mantissa overflowed while parsing + /// mantissa overflow mantissa_overflow, - /// The parser encountered an exponent that overflowed + /// exponent too large exponent_overflow, - /// The parser's maximum depth limit was reached + /// too deep too_deep, - /// Expected a value of kind object - expected_object, + /// illegal character for value + illegal_char, - /// Expected a value of kind array - expected_array, + /// illegal control character + illegal_control_char, + + /// illegal character in escape sequence + illegal_escape_char, - /// Expected a value of kind string - expected_string, + /// illegal extra digits in number + illegal_extra_digits, - /// Expect a value of kind number - expected_number, + /// illegal extra characters + illegal_extra_chars, - /// Expected a value of kind boolean - expected_bool, + /// illegal leading surrogate + illegal_leading_surrogate, - /// Expected a value of kind boolean + /// illegal trailing surrogate + illegal_trailing_surrogate, + + /// expected comma + expected_comma, + + /// expected colon + expected_colon, + + /// expected quotes + expected_quotes, + + /// expected hex digit + expected_hex_digit, + + /// expected utf16 escape + expected_utf16_escape, + + /// expected mantissa + expected_mantissa, + + /// expected fractional part of mantissa + expected_fraction, + + /// expected exponent here + expected_exponent, + + /// expected 'true' + expected_true, + + /// expected 'false' + expected_false, + + /// expected 'null' expected_null, - /// An integer assignment would overflow + /// not an object + not_object, + + /// not an array + not_array, + + /// not a string + not_string, + + /// not a number + not_number, + + /// not a boolean + not_bool, + + /// not a null + not_null, + + /// integer overflow integer_overflow, - /// The key was not found in the object - key_not_found + /// key not found + key_not_found, + + /// test failure + test_failure }; /// Error conditions corresponding to JSON errors diff --git a/include/boost/json/impl/basic_parser.ipp b/include/boost/json/impl/basic_parser.ipp index 86025726..77dedf73 100644 --- a/include/boost/json/impl/basic_parser.ipp +++ b/include/boost/json/impl/basic_parser.ipp @@ -12,6 +12,7 @@ #include #include +#include #include namespace boost { @@ -57,18 +58,21 @@ namespace json { enum class basic_parser::state : char { json, - element, ws, value, - object1, object2, object3, object4, colon, - array1, array2, array3, array4, + object1, object2, colon, + array1, array2, string1, string2, string3, string4, - true1, true2, true3, true4, - false1, false2, false3, false4, false5, - null1, null2, null3, null4, + true1, true2, true3, + false1, false2, false3, false4, + null1, null2, null3, - number, number_mant1, number_mant2, + u_esc1, u_esc2, u_esc3, u_esc4, + u_pair1, u_pair2, + u_surr, + + number_mant1, number_mant2, number_fract1, number_fract2, number_fract3, number_exp, number_exp_sign, number_exp_digits1, number_exp_digits2, number_end, @@ -94,11 +98,14 @@ basic_parser:: reset() { stack_.clear(); - stack_.push_front(state::end); - stack_.push_front(state::json); + stack_.push(state::end); + stack_.push(state::json); + is_key_ = false; + n_neg_ = false; + u0_ = -1; } -//------------------------------------------------------------------------------ +//---------------------------------------------------------- // Append the digit to the // value, which must be unsigned. @@ -148,55 +155,7 @@ append_digit( return true; } -//------------------------------------------------------------------------------ - -void -basic_parser:: -write_eof(error_code& ec) -{ - // write a null, this is invalid no matter - // what state we are in, to get a descriptive - // error. - // - // VFALCO we might want to return error::partial_data - - auto const fail = - [this, &ec] - { - char c = 0; - write_some(&c, 1, ec); - BOOST_ASSERT(ec); - }; - - while(stack_.front() != state::end) - { - // pop all states that - // allow "" (empty string) - switch(stack_.front()) - { - case state::number_mant2: - case state::number_fract1: - case state::number_fract3: - case state::number_exp: - case state::number_exp_digits2: - stack_.front() = state::number_end; - write_some(nullptr, 0, ec); - if(ec) - return; - break; - - case state::ws: - stack_.pop_front(); - break; - - default: - return fail(); - } - } - ec = {}; -} - -//------------------------------------------------------------------------------ +//---------------------------------------------------------- std::size_t basic_parser:: @@ -209,21 +168,19 @@ write_some( auto n = size; auto const p0 = p; auto const p1 = p0 + n; - std::string temp; // VFALCO string bad! - temp.reserve(4096); + detail::buffer<2048> temp; ec = {}; BOOST_ASSERT(stack_.front() != state::end); auto const maybe_flush = [&] { - if(temp.size() != temp.max_size()) + // need 4 chars for largest utf8 code point + if(temp.size() < temp.max_size() - 4) return; if(is_key_) - this->on_key_data( - {temp.data(), temp.size()}, ec); + this->on_key_data(temp, ec); else - this->on_string_data( - {temp.data(), temp.size()}, ec); + this->on_string_data(temp, ec); temp.clear(); }; loop: @@ -233,23 +190,19 @@ loop: this->on_document_begin(ec); if(ec) goto finish; - stack_.front() = state::element; - temp.clear(); - is_key_ = false; - goto loop; - - case state::element: - stack_.front() = state::ws; - stack_.push_front(state::value); - stack_.push_front(state::ws); - goto loop; + stack_.pop(); + stack_.push(state::ws); + stack_.push(state::value); + stack_.push(state::ws); + BOOST_FALLTHROUGH; case state::ws: +loop_ws: while(p < p1) { if(! is_ws(*p)) { - stack_.pop_front(); + stack_.pop(); goto loop; } ++p; @@ -258,51 +211,66 @@ loop: case state::value: { - if(p >= p1) - break; + BOOST_ASSERT(p < p1); switch(*p) { // object case '{': + ++p; stack_.front() = state::object1; - goto loop; + stack_.push(state::ws); + this->on_object_begin(ec); + if(ec) + goto finish; + goto loop_ws; // array case '[': ++p; stack_.front() = state::array1; + stack_.push(state::ws); this->on_array_begin(ec); - goto loop; + if(ec) + goto finish; + goto loop_ws; // string case '"': stack_.front() = state::string1; - goto loop; + goto loop_string; // number + case '-': + ++p; + n_neg_ = true; + BOOST_FALLTHROUGH; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - case '-': - stack_.front() = state::number; - goto loop; + n_mant_ = 0; + n_exp_ = 0; + stack_.front() = state::number_mant1; + goto loop_number; // true case 't': if(p + 4 <= p1) { if( - p[1] != 'r' || - p[2] != 'u' || - p[3] != 'e') + p[1] == 'r' && + p[2] == 'u' && + p[3] == 'e') { - ec = error::syntax; - goto finish; + p = p + 4; + this->on_bool(true, ec); + if(ec) + goto finish; + stack_.pop(); + goto loop; } - p = p + 4; - stack_.front() = state::true4; - goto loop; + ec = error::expected_true; + goto finish; } ++p; stack_.front() = state::true1; @@ -313,17 +281,20 @@ loop: if(p + 5 <= p1) { if( - p[1] != 'a' || - p[2] != 'l' || - p[3] != 's' || - p[4] != 'e') + p[1] == 'a' && + p[2] == 'l' && + p[3] == 's' && + p[4] == 'e') { - ec = error::syntax; - goto finish; + p = p + 5; + this->on_bool(false, ec); + if(ec) + goto finish; + stack_.pop(); + goto loop; } - p = p + 5; - stack_.front() = state::false5; - goto loop; + ec = error::expected_false; + goto finish; } ++p; stack_.front() = state::false1; @@ -334,113 +305,118 @@ loop: if(p + 4 <= p1) { if( - p[1] != 'u' || - p[2] != 'l' || - p[3] != 'l') + p[1] == 'u' && + p[2] == 'l' && + p[3] == 'l') { - ec = error::syntax; - goto finish; + p = p + 4; + this->on_null(ec); + if(ec) + goto finish; + stack_.pop(); + goto loop; } - p = p + 4; - stack_.front() = state::null4; - goto loop; + ec = error::expected_null; + goto finish; } ++p; stack_.front() = state::null1; goto loop; default: - ec = error::syntax; + ec = error::illegal_char; goto finish; } break; } - //-------------------------------------------------------------------------- + //------------------------------------------------------ // // object // - // beginning of object - case state::object1: - BOOST_ASSERT(*p == '{'); - ++p; - this->on_object_begin(ec); - if(ec) - goto finish; - stack_.front() = state::object2; - stack_.push_front(state::ws); - goto loop; - // first key or end of object - case state::object2: - if(p >= p1) - break; + case state::object1: + BOOST_ASSERT(p < p1); if(*p == '}') { ++p; - stack_.front() = state::object4; + this->on_object_end(ec); + if(ec) + goto finish; + stack_.pop(); goto loop; } - stack_.front() = state::object3; - stack_.push_front(state::element); - stack_.push_front(state::colon); - stack_.push_front(state::ws); - stack_.push_front(state::string1); + stack_.pop(); + stack_.push(state::object2); + stack_.push(state::ws); + stack_.push(state::value); + stack_.push(state::ws); + stack_.push(state::colon); + stack_.push(state::ws); + stack_.push(state::string1); is_key_ = true; goto loop; - case state::object3: - if(p >= p1) - break; + case state::object2: + BOOST_ASSERT(p < p1); if(*p == '}') { ++p; - stack_.front() = state::object4; + this->on_object_end(ec); + if(ec) + goto finish; + stack_.pop(); goto loop; } if(*p != ',') { - ec = error::syntax; + ec = error::expected_comma; goto finish; } ++p; - stack_.front() = state::object3; - stack_.push_front(state::element); - stack_.push_front(state::colon); - stack_.push_front(state::ws); - stack_.push_front(state::string1); - stack_.push_front(state::ws); + stack_.push(state::ws); + stack_.push(state::value); + stack_.push(state::ws); + stack_.push(state::colon); + stack_.push(state::ws); + stack_.push(state::string1); + stack_.push(state::ws); is_key_ = true; - goto loop; - - case state::object4: - this->on_object_end(ec); - if(ec) - goto finish; - stack_.pop_front(); - goto loop; + goto loop_ws; case state::colon: - if(p >= p1) - break; + BOOST_ASSERT(p < p1); if(*p != ':') { - ec = error::syntax; + ec = error::expected_colon; goto finish; } ++p; - stack_.pop_front(); + stack_.pop(); goto loop; - //-------------------------------------------------------------------------- + //------------------------------------------------------ // // array // case state::array1: - stack_.front() = state::array2; - stack_.push_front(state::ws); + BOOST_ASSERT(p < p1); + if(*p == ']') + { + ++p; + this->on_array_end(ec); + if(ec) + goto finish; + stack_.pop(); + goto loop; + } + stack_.pop(); + stack_.push(state::array2); + stack_.push(state::ws); + stack_.push(state::value); + stack_.push(state::ws); goto loop; case state::array2: @@ -449,63 +425,49 @@ loop: if(*p == ']') { ++p; - stack_.front() = state::array4; - goto loop; - } - stack_.front() = state::array3; - stack_.push_front(state::element); - goto loop; - - case state::array3: - if(p >= p1) - break; - if(*p == ']') - { - ++p; - stack_.front() = state::array4; + this->on_array_end(ec); + if(ec) + goto finish; + stack_.pop(); goto loop; } if(*p != ',') { - ec = error::syntax; + ec = error::expected_comma; goto finish; } ++p; - stack_.front() = state::array3; - stack_.push_front(state::element); - stack_.push_front(state::ws); + stack_.push(state::ws); + stack_.push(state::value); + stack_.push(state::ws); goto loop; - case state::array4: - this->on_array_end(ec); - if(ec) - goto finish; - stack_.pop_front(); - goto loop; - - //-------------------------------------------------------------------------- + //------------------------------------------------------ // // string // // double quote opening string case state::string1: +loop_string: if(p >= p1) break; if(*p != '\"') { - ec = error::syntax; + ec = error::expected_quotes; goto finish; } ++p; - stack_.front() = state::string2; + stack_.pop(); + stack_.push(state::string2); BOOST_FALLTHROUGH; - //goto loop; // characters // No copies here case state::string2: { + if(p >= p1) + break; auto const start = p; while(p < p1) { @@ -523,33 +485,37 @@ loop: if(ec) goto finish; is_key_ = false; - stack_.pop_front(); + stack_.pop(); goto loop; } if(*p == '\\') { - if(is_key_) - this->on_key_data({start, - static_cast( - p - start)}, ec); - else - this->on_string_data({start, - static_cast( - p - start)}, ec); - ++p; + if(p > start) + { + if(is_key_) + this->on_key_data({start, + static_cast( + p - start)}, ec); + else + this->on_string_data({start, + static_cast( + p - start)}, ec); + } if(ec) goto finish; + ++p; stack_.front() = state::string4; goto loop; } if(is_control(*p)) { - ec = error::syntax; + ec = error::illegal_control_char; goto finish; } // TODO UTF-8? ++p; } + BOOST_ASSERT(p != start); if(is_key_) this->on_key_data({start, static_cast( @@ -572,17 +538,15 @@ loop: if(*p == '\"') { if(is_key_) - this->on_key_end({temp.data(), - temp.size()}, ec); + this->on_key_end(temp, ec); else - this->on_string_end({temp.data(), - temp.size()}, ec); + this->on_string_end(temp, ec); ++p; if(ec) goto finish; temp.clear(); is_key_ = false; - stack_.pop_front(); + stack_.pop(); goto loop; } if(*p == '\\') @@ -593,10 +557,9 @@ loop: } if(is_control(*p)) { - ec = error::syntax; + ec = error::illegal_control_char; goto finish; } - // TODO UTF-8 maybe_flush(); temp.push_back(*p++); } @@ -650,18 +613,176 @@ loop: break; case 'u': - BOOST_ASSERT(false); - break; + ++p; + stack_.front() = state::string3; + stack_.push(state::u_esc1); + goto loop; default: - ec = error::syntax; + ec = error::illegal_escape_char; goto finish; } ++p; stack_.front() = state::string3; goto loop; - //-------------------------------------------------------------------------- + // utf16 escape, got "\u" already + case state::u_esc1: + { + if(p >= p1) + break; + auto d = hex_digit(*p++); + if(d == -1) + { + ec = error::expected_hex_digit; + goto finish; + } + u_ = d << 12; + if(p + 3 <= p1) + { + // fast path + d = hex_digit(*p++); + if(d == -1) + { + ec = error::expected_hex_digit; + goto finish; + } + u_ += d << 8; + d = hex_digit(*p++); + if(d == -1) + { + ec = error::expected_hex_digit; + goto finish; + } + u_ += d << 4; + d = hex_digit(*p++); + if(d == -1) + { + ec = error::expected_hex_digit; + goto finish; + } + u_ += d; + stack_.front() = state::u_surr; + goto loop; + } + stack_.front() = state::u_esc2; + goto loop; + } + + case state::u_esc2: + { + if(p >= p1) + break; + auto d = hex_digit(*p++); + if(d == -1) + { + ec = error::expected_hex_digit; + goto finish; + } + u_ += d << 8; + stack_.front() = state::u_esc3; + goto loop; + } + + case state::u_esc3: + { + if(p >= p1) + break; + auto d = hex_digit(*p++); + if(d == -1) + { + ec = error::expected_hex_digit; + goto finish; + } + u_ += d << 4; + stack_.front() = state::u_esc4; + goto loop; + } + + case state::u_esc4: + { + if(p >= p1) + break; + auto d = hex_digit(*p++); + if(d == -1) + { + ec = error::expected_hex_digit; + goto finish; + } + u_ += d; + stack_.front() = state::u_surr; + goto loop; + } + + // handles 1 or 2 surrogates + case state::u_surr: + { + // one code unit + if(u0_ == -1) + { + if( u_ >= 0xd800) + { + if(u_ <= 0xdbff) + { + // need 2nd surrogate + u0_ = u_; + stack_.front() = state::u_pair1; + goto loop; + } + else if(u_ <= 0xdfff) + { + ec = error::illegal_leading_surrogate; + goto finish; + } + } + + maybe_flush(); + temp.append_utf8(u_); + stack_.pop(); + goto loop; + } + // both code units + if( u_ < 0xdc00 || + u_ > 0xdfff) + { + ec = error::illegal_trailing_surrogate; + goto finish; + } + unsigned long cp = + ((u0_ - 0xd800) << 10) + + (u_ - 0xdc00); + temp.append_utf8(cp); + u0_ = -1; + stack_.pop(); + goto loop; + } + + // second utf16 surrogate + case state::u_pair1: + if(p >= p1) + break; + if(*p != '\\') + { + ec = error::expected_utf16_escape; + goto finish; + } + ++p; + stack_.front() = state::u_pair2; + goto loop; + + case state::u_pair2: + if(p >= p1) + break; + if(*p != 'u') + { + ec = error::expected_utf16_escape; + goto finish; + } + ++p; + stack_.front() = state::u_esc1; + goto loop; + + //------------------------------------------------------ // // true @@ -672,7 +793,7 @@ loop: break; if(*p != 'r') { - ec = error::syntax; + ec = error::expected_true; goto finish; } ++p; @@ -684,7 +805,7 @@ loop: break; if(*p != 'u') { - ec = error::syntax; + ec = error::expected_true; goto finish; } ++p; @@ -696,18 +817,14 @@ loop: break; if(*p != 'e') { - ec = error::syntax; + ec = error::expected_true; goto finish; } ++p; - stack_.front() = state::true4; - BOOST_FALLTHROUGH; - - case state::true4: this->on_bool(true, ec); if(ec) goto finish; - stack_.pop_front(); + stack_.pop(); goto loop; // @@ -719,7 +836,7 @@ loop: break; if(*p != 'a') { - ec = error::syntax; + ec = error::expected_false; goto finish; } ++p; @@ -731,7 +848,7 @@ loop: break; if(*p != 'l') { - ec = error::syntax; + ec = error::expected_false; goto finish; } ++p; @@ -743,7 +860,7 @@ loop: break; if(*p != 's') { - ec = error::syntax; + ec = error::expected_false; goto finish; } ++p; @@ -755,18 +872,14 @@ loop: break; if(*p != 'e') { - ec = error::syntax; + ec = error::expected_false; goto finish; } ++p; - stack_.front() = state::false5; - BOOST_FALLTHROUGH; - - case state::false5: this->on_bool(false, ec); if(ec) goto finish; - stack_.pop_front(); + stack_.pop(); goto loop; // @@ -778,7 +891,7 @@ loop: break; if(*p != 'u') { - ec = error::syntax; + ec = error::expected_null; goto finish; } ++p; @@ -790,7 +903,7 @@ loop: break; if(*p != 'l') { - ec = error::syntax; + ec = error::expected_null; goto finish; } ++p; @@ -802,47 +915,28 @@ loop: break; if(*p != 'l') { - ec = error::syntax; + ec = error::expected_null; goto finish; } ++p; - stack_.front() = state::null4; - BOOST_FALLTHROUGH; - - case state::null4: this->on_null(ec); if(ec) goto finish; - stack_.pop_front(); + stack_.pop(); goto loop; // // number // - case state::number: - BOOST_ASSERT(p < p1); - n_mant_ = 0; - n_exp_ = 0; - if(*p == '-') - { - ++p; - n_neg_ = true; - } - else - { - n_neg_ = false; - } - stack_.front() = state::number_mant1; - goto loop; - case state::number_mant1: +loop_number: if(p >= p1) break; if(! is_digit(*p)) { // expected mantissa digit - ec = error::syntax; + ec = error::expected_mantissa; goto finish; } if(*p != '0') @@ -882,7 +976,7 @@ loop: if(is_digit(*p)) { // unexpected digit after zero - ec = error::syntax; + ec = error::illegal_extra_digits; goto finish; } stack_.front() = state::number_exp; @@ -894,7 +988,7 @@ loop: if(! is_digit(*p)) { // expected mantissa fraction digit - ec = error::syntax; + ec = error::expected_fraction; goto finish; } stack_.front() = state::number_fract3; @@ -955,7 +1049,7 @@ loop: if(! is_digit(*p)) { // expected exponent digit - ec = error::syntax; + ec = error::expected_exponent; goto finish; } stack_.front() = state::number_exp_digits2; @@ -979,11 +1073,12 @@ loop: break; case state::number_end: + n_neg_ = false; this->on_number(number( n_mant_, n_exp_, n_neg_), ec); if(ec) goto finish; - stack_.pop_front(); + stack_.pop(); goto loop; // @@ -991,14 +1086,12 @@ loop: // case state::end: - /* if(p < p1) { // unexpected extra characters - ec = error::syntax; + ec = error::illegal_extra_chars; goto finish; } - */ break; } @@ -1006,7 +1099,7 @@ finish: return p - p0; } -//------------------------------------------------------------------------------ +//---------------------------------------------------------- // Called to parse the rest of the document, this // can be optimized by assuming no more data is coming. @@ -1020,10 +1113,67 @@ write( auto bytes_used = write_some(data, size, ec); if(! ec) + { write_eof(ec); + if(! ec) + { + if( bytes_used < size || + ec == error::illegal_char) + ec = error::illegal_extra_chars; + } + } return bytes_used; } +//---------------------------------------------------------- + +void +basic_parser:: +write_eof(error_code& ec) +{ + // write a null, this is invalid no matter + // what state we are in, to get a descriptive + // error. + // + // VFALCO we might want to return error::partial_data + + auto const fail = + [this, &ec] + { + char c = 0; + write_some(&c, 1, ec); + BOOST_ASSERT(ec); + }; + + while(stack_.front() != state::end) + { + // pop all states that + // allow "" (empty string) + switch(stack_.front()) + { + case state::number_mant2: + case state::number_fract1: + case state::number_fract3: + case state::number_exp: + case state::number_exp_digits2: + stack_.front() = state::number_end; + write_some(nullptr, 0, ec); + if(ec) + return; + break; + + case state::ws: + stack_.pop(); + break; + + default: + return fail(); + } + } + ec = {}; +} + + } // json } // boost diff --git a/include/boost/json/impl/error.ipp b/include/boost/json/impl/error.ipp index 8c68efb8..002cabd7 100644 --- a/include/boost/json/impl/error.ipp +++ b/include/boost/json/impl/error.ipp @@ -33,47 +33,110 @@ make_error_code(error e) { default: case error::syntax: return - "The serialized JSON object contains a syntax error"; + "syntax error"; case error::extra_data: return - "Unexpected extra data encountered while parsing"; + "extra data"; case error::mantissa_overflow: return - "A mantissa overflowed while parsing"; + "mantissa overflow"; case error::exponent_overflow: return - "An exponent overflowed while parsing"; + "exponent overflow"; case error::too_deep: return - "The parser reached the maximum allowed depth"; + "too deep"; // - case error::integer_overflow: return - "An integer assignment overflowed"; + case error::illegal_char: return + "illegal character for value"; - case error::expected_object: return - "Expected a value of kind object"; + case error::illegal_control_char: return + "illegal control character"; - case error::expected_array: return - "Expected a value of kind array"; + case error::illegal_escape_char: return + "illegal character in escape sequence"; - case error::expected_string: return - "Expected a value of kind string"; + case error::illegal_extra_digits: return + "illegal extra digits in number"; - case error::expected_number: return - "Expected a value of kind number"; + case error::illegal_extra_chars: return + "illegal extra characters"; - case error::expected_bool: return - "Expected a value of kind bool"; + case error::illegal_leading_surrogate: return + "illegal leading surrogate"; + + case error::illegal_trailing_surrogate: return + "illegal trailing surrogate"; + + // + + case error::expected_comma: return + "expected comma"; + + case error::expected_colon: return + "expected colon"; + + case error::expected_quotes: return + "expected quotes"; + + case error::expected_hex_digit: return + "expected hex digit"; + + case error::expected_utf16_escape: return + "expected utf16 escape"; + + case error::expected_mantissa: return + "expected mantissa"; + + case error::expected_fraction: return + "expected mantissa fraction"; + + case error::expected_exponent: return + "expected exponent"; + + case error::expected_true: return + "expected 'true'"; + + case error::expected_false: return + "expected 'false'"; case error::expected_null: return - "Expected a value of kind null"; + "expected 'null'"; + + // + + case error::not_object: return + "not an object"; + + case error::not_array: return + "not an array"; + + case error::not_string: return + "not a string"; + + case error::not_number: return + "not a number"; + + case error::not_bool: return + "not a boolean"; + + case error::not_null: return + "not a null"; + + case error::integer_overflow: return + "integer overflowed"; // case error::key_not_found: return - "The key was not found in the object"; + "key not found"; + + // + + case error::test_failure: return + "test failure"; } } @@ -91,15 +154,35 @@ make_error_code(error e) case error::mantissa_overflow: case error::exponent_overflow: case error::too_deep: + + case error::illegal_char: + case error::illegal_control_char: + case error::illegal_escape_char: + case error::illegal_extra_digits: + case error::illegal_extra_chars: + case error::illegal_leading_surrogate: + case error::illegal_trailing_surrogate: + + case error::expected_comma: + case error::expected_colon: + case error::expected_quotes: + case error::expected_hex_digit: + case error::expected_utf16_escape: + case error::expected_mantissa: + case error::expected_fraction: + case error::expected_exponent: + case error::expected_true: + case error::expected_false: + case error::expected_null: return condition::parse_error; + case error::not_object: + case error::not_array: + case error::not_string: + case error::not_number: + case error::not_bool: + case error::not_null: case error::integer_overflow: - case error::expected_object: - case error::expected_array: - case error::expected_string: - case error::expected_number: - case error::expected_bool: - case error::expected_null: return condition::assign_error; } } diff --git a/include/boost/json/impl/iterator.ipp b/include/boost/json/impl/iterator.ipp index fb2e46e0..03876c9a 100644 --- a/include/boost/json/impl/iterator.ipp +++ b/include/boost/json/impl/iterator.ipp @@ -161,13 +161,13 @@ operator++() noexcept { if(n.v->is_structured()) { - stack_.pop_front(); + stack_.pop(); stack_.emplace_front( *n.v, true); } else { - stack_.pop_front(); + stack_.pop(); } } else if(n.v->is_object()) @@ -175,7 +175,7 @@ operator++() noexcept if(n.obj_it == n.v->as_object().end()) { - stack_.pop_front(); + stack_.pop(); } else { @@ -192,7 +192,7 @@ operator++() noexcept if(n.arr_it == n.v->as_array().end()) { - stack_.pop_front(); + stack_.pop(); } else { diff --git a/include/boost/json/impl/parser.ipp b/include/boost/json/impl/parser.ipp index 0df26fab..7ba6e647 100644 --- a/include/boost/json/impl/parser.ipp +++ b/include/boost/json/impl/parser.ipp @@ -30,7 +30,7 @@ assign(T&& t) { BOOST_ASSERT(jv.is_null()); jv = std::forward(t); - stack_.pop_front(); + stack_.pop(); } else if(stack_.front()->is_array()) { @@ -94,7 +94,7 @@ parser:: on_document_begin(error_code&) { stack_.clear(); - stack_.push_front(&jv_); + stack_.push(&jv_); s_.clear(); obj_ = false; } @@ -119,7 +119,7 @@ on_object_begin(error_code& ec) { jv.as_array().emplace_back( kind::object); - stack_.push_front( + stack_.push( &jv.as_array().back()); } else @@ -136,7 +136,7 @@ on_object_end(error_code&) { BOOST_ASSERT( stack_.front()->is_object()); - stack_.pop_front(); + stack_.pop(); if(! stack_.empty()) { auto const& jv = stack_.front(); @@ -167,7 +167,7 @@ on_array_begin(error_code& ec) BOOST_ASSERT(s_.empty()); jv.as_array().emplace_back( kind::array); - stack_.push_front( + stack_.push( &jv.as_array().back()); } else @@ -184,7 +184,7 @@ on_array_end(error_code&) { BOOST_ASSERT( stack_.front()->is_array()); - stack_.pop_front(); + stack_.pop(); if(! stack_.empty()) { auto const& jv = stack_.front(); @@ -220,7 +220,7 @@ on_key_end( // overwrite duplicate keys if(! result.second) result.first->second.emplace_null(); - stack_.push_front(&result.first->second); + stack_.push(&result.first->second); s_.clear(); } @@ -243,7 +243,7 @@ on_string_data( { BOOST_ASSERT(s_.empty()); jv.as_array().emplace_back(kind::string); - stack_.push_front( + stack_.push( &jv.as_array().back()); stack_.front()->as_string().append( s.data(), s.size()); @@ -270,7 +270,7 @@ on_string_end( { on_string_data(s, ec); BOOST_ASSERT(stack_.front()->is_string()); - stack_.pop_front(); + stack_.pop(); if(! stack_.empty()) { auto const& jv = stack_.front(); @@ -290,7 +290,7 @@ on_number(number n, error_code&) { BOOST_ASSERT(jv.is_null()); jv.emplace_number() = std::move(n); - stack_.pop_front(); + stack_.pop(); } else if(stack_.front()->is_array()) { diff --git a/include/boost/json/impl/value.hpp b/include/boost/json/impl/value.hpp index 54be9a9c..f2438d68 100644 --- a/include/boost/json/impl/value.hpp +++ b/include/boost/json/impl/value.hpp @@ -578,22 +578,22 @@ from_json(T& t, value const& v) auto const rhs = v.get_int64(); if( rhs > (std::numeric_limits::max)() || rhs < (std::numeric_limits::min)()) - throw system_error( - error::integer_overflow); + BOOST_THROW_EXCEPTION(system_error( + error::integer_overflow)); t = static_cast(rhs); } else if(v.is_uint64()) { auto const rhs = v.get_uint64(); if(rhs > (std::numeric_limits::max)()) - throw system_error( - error::integer_overflow); + BOOST_THROW_EXCEPTION(system_error( + error::integer_overflow)); t = static_cast(rhs); } else { - throw system_error( - error::expected_number); + BOOST_THROW_EXCEPTION( + system_error(error::not_number)); } } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e6cc1c0a..9c3d1c95 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -21,6 +21,7 @@ add_executable (json-tests ${PROJECT_SOURCE_DIR}/src/src.cpp Jamfile test_storage.hpp + parse-vectors.hpp main.cpp _detail_stack.cpp array.cpp diff --git a/test/_detail_stack.cpp b/test/_detail_stack.cpp index b328efc2..1f10a4f0 100644 --- a/test/_detail_stack.cpp +++ b/test/_detail_stack.cpp @@ -33,10 +33,10 @@ public: s.emplace_front("2"); BEAST_EXPECT(s.size() == 2); BEAST_EXPECT(s[0] == "2"); - s.pop_front(); + s.pop(); BEAST_EXPECT(s.size() == 1); BEAST_EXPECT(s[0] == "1"); - s.pop_front(); + s.pop(); BEAST_EXPECT(s.empty()); } diff --git a/test/basic_parser.cpp b/test/basic_parser.cpp index cf3cb671..236b27ca 100644 --- a/test/basic_parser.cpp +++ b/test/basic_parser.cpp @@ -20,89 +20,165 @@ namespace json { class basic_parser_test : public beast::unit_test::suite { public: - struct test_parser - : basic_parser + class test_parser + : public basic_parser { - test_parser() = default; + std::size_t n_ = std::size_t(-1); + + void + maybe_fail(error_code& ec) + { + if(n_ && --n_ > 0) + return; + ec = error::test_failure; + } void on_document_begin( - error_code&) override + error_code& ec) override { + maybe_fail(ec); } void on_object_begin( - error_code&) override + error_code& ec) override { + maybe_fail(ec); } void on_object_end( - error_code&) override + error_code& ec) override { + maybe_fail(ec); } void on_array_begin( - error_code&) override + error_code& ec) override { + maybe_fail(ec); } void on_array_end( - error_code&) override + error_code& ec) override { + maybe_fail(ec); } void on_key_data( string_view, - error_code&) override + error_code& ec) override { + maybe_fail(ec); } void on_key_end( string_view, - error_code&) override + error_code& ec) override { + maybe_fail(ec); } void on_string_data( string_view, - error_code&) override + error_code& ec) override { + maybe_fail(ec); } void on_string_end( string_view, - error_code&) override + error_code& ec) override { + maybe_fail(ec); } void on_number( number, - error_code&) override + error_code& ec) override { + maybe_fail(ec); } void on_bool( bool, - error_code&) override + error_code& ec) override { + maybe_fail(ec); } void - on_null(error_code&) override + on_null(error_code& ec) override + { + maybe_fail(ec); + } + + public: + test_parser() = default; + + test_parser( + std::size_t n) + : n_(n) { } }; + void + parse_grind( + string_view input, + error_code ex) + { + if(input.size() > 100) + return; + for(std::size_t n = 0; + n < input.size() - 1; ++n) + { + error_code ec; + test_parser p; + p.write_some(input.data(), n, ec); + if(! ec) + p.write_some(input.data() + n, + input.size() - n, ec); + if(! ec) + p.write_eof(ec); + if(ec) + { + BEAST_EXPECTS( + ec == ex, std::string(input) + + " : " + ec.message()); + return; + } + } + + std::size_t n = 1; + for(; n < 10000; ++n) + { + error_code ec; + test_parser p{n}; + p.write( + input.data(), + input.size(), + ec); + if(ec != error::test_failure) + { + BEAST_EXPECTS( + ec == ex, std::string(input) + + " : " + ec.message()); + break; + } + } + BEAST_EXPECT(n < 10000); + } + void good(string_view s) { @@ -286,6 +362,17 @@ public: v.text.data(), v.text.size(), ec); + if(v.result == 'i') + { + auto const s = ec ? + "reject" : "accept"; + ++info; + log << + "'" << v.result << "' " << + v.name << " " << s << "\n"; + parse_grind(v.text, ec); + continue; + } char result; result = ec ? 'n' : 'y'; if(result != v.result) @@ -302,6 +389,10 @@ public: else log << "\n"; } + else + { + parse_grind(v.text, ec); + } } if(fail > 0) log << fail << "/" << tot << @@ -315,13 +406,13 @@ public: log << "sizeof(basic_parser) == " << sizeof(basic_parser) << "\n"; + testParseVectors(); + testObject(); testArray(); testString(); testNumber(); testMonostates(); - - testParseVectors(); } }; diff --git a/test/error.cpp b/test/error.cpp index ce7b4f39..71ebeedb 100644 --- a/test/error.cpp +++ b/test/error.cpp @@ -49,16 +49,38 @@ public: check(condition::parse_error, error::mantissa_overflow); check(condition::parse_error, error::exponent_overflow); check(condition::parse_error, error::too_deep); - - check(condition::assign_error, error::integer_overflow); - check(condition::assign_error, error::expected_object); - check(condition::assign_error, error::expected_array); - check(condition::assign_error, error::expected_string); - check(condition::assign_error, error::expected_number); - check(condition::assign_error, error::expected_bool); - check(condition::assign_error, error::expected_null); + check(condition::parse_error, error::illegal_char); + check(condition::parse_error, error::illegal_control_char); + check(condition::parse_error, error::illegal_escape_char); + check(condition::parse_error, error::illegal_extra_digits); + check(condition::parse_error, error::illegal_extra_chars); + check(condition::parse_error, error::illegal_leading_surrogate); + check(condition::parse_error, error::illegal_trailing_surrogate); + + check(condition::parse_error, error::expected_comma); + check(condition::parse_error, error::expected_colon); + check(condition::parse_error, error::expected_quotes); + check(condition::parse_error, error::expected_hex_digit); + check(condition::parse_error, error::expected_utf16_escape); + check(condition::parse_error, error::expected_mantissa); + check(condition::parse_error, error::expected_fraction); + check(condition::parse_error, error::expected_exponent); + check(condition::parse_error, error::expected_true); + check(condition::parse_error, error::expected_false); + check(condition::parse_error, error::expected_null); + + check(condition::assign_error, error::not_object); + check(condition::assign_error, error::not_array); + check(condition::assign_error, error::not_string); + check(condition::assign_error, error::not_number); + check(condition::assign_error, error::not_bool); + check(condition::assign_error, error::not_null); + check(condition::assign_error, error::integer_overflow); + check(error::key_not_found); + + check(error::test_failure); } }; diff --git a/test/make-pvs.py b/test/make-pvs.py index 9b9c79f8..e4f6967a 100644 --- a/test/make-pvs.py +++ b/test/make-pvs.py @@ -2,14 +2,44 @@ import os +def chex(c): + d1 = ord(c)/16; + d2 = ord(c)%16; + d = "0123456789ABCDEF"; + s = "\\x" + d[d1:d1+1] + d[d2:d2+1]; + return s; + +def escape(c): + if c == ' ' or c == '\t': + return c; + elif c == '\"': + return "\\\""; + elif c == '\\': + return "\\\\"; + n = ord(c); + if n >= 32 and n <= 127: + return c; + return chex(c); + +def tocpp(s): + v0 = "" + v = "\""; + for c in s: + v = v + escape(c); + if len(v) > 80: + if len(v0) > 50000: + return v0 + v + "\""; + v0 += v + "\"\n \""; + v = ""; + return v0 + v + "\""; + def do_files(directory): for root, directories, files in os.walk(directory): for filename in files: filepath = os.path.join(root, filename) with open(filepath, 'r') as file: data = file.read(); - data = data.replace('\"', "\\\""); - print(" { '" + filename[0:1] + "', \"" + filename[2:-5] + "\", R\"json(" + data + ")json\" },"); + print(" { '" + filename[0:1] + "', \"" + filename[2:-5] + "\", lit(" + tocpp(data) + ") },"); print(""" // @@ -58,6 +88,14 @@ struct parse_vectors inline parse_vectors() noexcept; private: + template + static + ::boost::string_view + lit(char const (&s)[N]) + { + return {s, N - 1}; + } + iterator first_; iterator last_; }; diff --git a/test/parse-vectors.hpp b/test/parse-vectors.hpp index 2ab68e70..f070d56d 100644 Binary files a/test/parse-vectors.hpp and b/test/parse-vectors.hpp differ