From 938bdf7158ccafa82fa5216567d7400a4ed12dd2 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Sun, 3 Nov 2019 09:51:17 -0800 Subject: [PATCH] Use sse2 for number parsing --- bench/bench.cpp | 149 +--------------------- include/boost/json/detail/config.hpp | 2 +- include/boost/json/detail/ieee_parser.hpp | 10 +- include/boost/json/detail/ieee_parser.ipp | 99 +++++++------- include/boost/json/detail/sse2.hpp | 91 ++++++++++++- include/boost/json/number.hpp | 3 +- 6 files changed, 145 insertions(+), 209 deletions(-) diff --git a/bench/bench.cpp b/bench/bench.cpp index 6c5d01a2..6b1b2256 100644 --- a/bench/bench.cpp +++ b/bench/bench.cpp @@ -397,7 +397,7 @@ benchSerialize( std::endl; for(unsigned j = 0; j < vi.size(); ++j) { - for(unsigned k = 0; k < 15; ++k) + for(unsigned k = 0; k < 5; ++k) { auto const when = clock_type::now(); vi[j]->serialize(vs[i].text, 1000); @@ -453,150 +453,3 @@ main( return 0; } - -/* -Parse File 1 array.json (609414 bytes) - boost(block): 397ms - boost(block): 398ms - boost(block): 393ms - boost(block): 394ms - boost(block): 396ms - rapidjson: 380ms - rapidjson: 378ms - rapidjson: 381ms - rapidjson: 378ms - rapidjson: 378ms - -Parse File 2 arrays.json (1461524 bytes) - boost(block): 1517ms - boost(block): 1517ms - boost(block): 1512ms - boost(block): 1521ms - boost(block): 1511ms - rapidjson: 937ms - rapidjson: 936ms - rapidjson: 937ms - rapidjson: 936ms - rapidjson: 938ms - -Parse File 3 strings.json (1016132 bytes) - boost(block): 49ms - boost(block): 49ms - boost(block): 49ms - boost(block): 49ms - boost(block): 49ms - rapidjson: 451ms - rapidjson: 449ms - rapidjson: 451ms - rapidjson: 450ms - rapidjson: 452ms - -Parse File 4 twitter.json (646995 bytes) - boost(block): 762ms - boost(block): 762ms - boost(block): 762ms - boost(block): 761ms - boost(block): 761ms - rapidjson: 434ms - rapidjson: 436ms - rapidjson: 453ms - rapidjson: 432ms - rapidjson: 434ms - -Parse File 5 citm_catalog.json (1777672 bytes) - boost(block): 1487ms - boost(block): 1495ms - boost(block): 1488ms - boost(block): 1492ms - boost(block): 1490ms - rapidjson: 903ms - rapidjson: 903ms - rapidjson: 903ms - rapidjson: 902ms - rapidjson: 903ms - -Parse File 6 canada.json (2251060 bytes) - boost(block): 4282ms - boost(block): 4247ms - boost(block): 4252ms - boost(block): 4234ms - boost(block): 4257ms - rapidjson: 1418ms - rapidjson: 1417ms - rapidjson: 1417ms - rapidjson: 1417ms - rapidjson: 1417ms - -Serialize File 1 array.json (609414 bytes) - boost(block): 854ms - boost(block): 853ms - boost(block): 851ms - boost(block): 853ms - boost(block): 853ms - rapidjson: 785ms - rapidjson: 785ms - rapidjson: 783ms - rapidjson: 785ms - rapidjson: 785ms - -Serialize File 2 arrays.json (1461524 bytes) - boost(block): 1269ms - boost(block): 1259ms - boost(block): 1257ms - boost(block): 1259ms - boost(block): 1260ms - rapidjson: 3112ms - rapidjson: 3110ms - rapidjson: 3109ms - rapidjson: 3109ms - rapidjson: 3114ms - -Serialize File 3 strings.json (1016132 bytes) - boost(block): 582ms - boost(block): 584ms - boost(block): 582ms - boost(block): 585ms - boost(block): 584ms - rapidjson: 2072ms - rapidjson: 2072ms - rapidjson: 2073ms - rapidjson: 2074ms - rapidjson: 2074ms - -Serialize File 4 twitter.json (646995 bytes) - boost(block): 658ms - boost(block): 659ms - boost(block): 659ms - boost(block): 659ms - boost(block): 659ms - rapidjson: 871ms - rapidjson: 871ms - rapidjson: 872ms - rapidjson: 872ms - rapidjson: 872ms - -Serialize File 5 citm_catalog.json (1777672 bytes) - boost(block): 1049ms - boost(block): 1049ms - boost(block): 1048ms - boost(block): 1047ms - boost(block): 1047ms - rapidjson: 898ms - rapidjson: 896ms - rapidjson: 898ms - rapidjson: 898ms - rapidjson: 898ms - -Serialize File 6 canada.json (2251060 bytes) - boost(block): 10521ms - boost(block): 10616ms - boost(block): 10560ms - boost(block): 10553ms - boost(block): 10446ms - rapidjson: 10788ms - rapidjson: 10776ms - rapidjson: 10784ms - rapidjson: 10805ms - rapidjson: 10789ms - -*/ diff --git a/include/boost/json/detail/config.hpp b/include/boost/json/detail/config.hpp index 9e1dd9c2..9b525892 100644 --- a/include/boost/json/detail/config.hpp +++ b/include/boost/json/detail/config.hpp @@ -48,7 +48,7 @@ # ifdef _MSC_VER # define BOOST_JSON_FORCEINLINE __forceinline # else -# define BOOST_JSON_FORCEINLINE +# define BOOST_JSON_FORCEINLINE inline # endif #endif diff --git a/include/boost/json/detail/ieee_parser.hpp b/include/boost/json/detail/ieee_parser.hpp index daf613e4..c78f43a0 100644 --- a/include/boost/json/detail/ieee_parser.hpp +++ b/include/boost/json/detail/ieee_parser.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include namespace boost { namespace json { @@ -63,17 +63,17 @@ public: maybe_init(char ch) noexcept; BOOST_JSON_DECL - std::size_t + size_t write_some( char const* data, - std::size_t size, + size_t size, error_code& ec) noexcept; BOOST_JSON_DECL - std::size_t + size_t write( char const* data, - std::size_t size, + size_t size, error_code& ec) noexcept; BOOST_JSON_DECL diff --git a/include/boost/json/detail/ieee_parser.ipp b/include/boost/json/detail/ieee_parser.ipp index fc8d9409..2b4f762e 100644 --- a/include/boost/json/detail/ieee_parser.ipp +++ b/include/boost/json/detail/ieee_parser.ipp @@ -12,6 +12,7 @@ #include #include +#include namespace boost { namespace json { @@ -44,11 +45,11 @@ maybe_init(char ch) noexcept return true; } -std::size_t +size_t ieee_parser:: write_some( char const* const data, - std::size_t const size, + size_t const size, error_code& ec) noexcept { auto p = data; @@ -80,48 +81,26 @@ loop: ec = error::expected_mantissa; break; } - ++p; if(d == 0) { + ++p; st_ = state::frac1; goto loop; } // 64-bit unsigned is at most 20 digits - if(p1 - p < 18) + if(p1 - p < 16) { + ++p; dec_.mantissa = d; st_ = state::mant2; goto loop; } - // fast loop - unsigned long long m = d; - for(int i = 0; i < 18; ++i) - { - d = *p - '0'; - if(d < 10) - { - m = 10 * m + d; - ++p; - continue; - } - if(*p == '.') - { - ++p; - dec_.mantissa = m; - st_ = state::frac2; - goto loop; - } - if(*p == 'e' || *p == 'E') - { - ++p; - dec_.mantissa = m; - st_ = state::exp1; - goto loop; - } - st_ = state::done; - goto finish; - } - dec_.mantissa = m; + // fast path + auto const result = + detail::parse_unsigned( + dec_.mantissa, p); + dec_.mantissa = result.m; + p += result.n; st_ = state::mant2; BOOST_FALLTHROUGH; } @@ -190,23 +169,49 @@ loop: { if(p >= p1) break; - unsigned char const d = *p - '0'; - if(d >= 10) + // three digits or less + if( dec_.mantissa < 1000 && + p1 - p >= 16) { - ec = error::expected_fraction; - break; - } - auto tmp = dec_.mantissa * 10 + d; - if(dec_.mantissa < tmp) - { - --off_; - dec_.mantissa = tmp; + // fast path + auto const result = + parse_unsigned(dec_.mantissa, p); + if(result.n == 0) + { + ec = error::expected_fraction; + break; + } + p += result.n; + off_ -= static_cast(result.n); + if(result.n < 16) + { + dec_.mantissa = result.m; + dec_.exponent = off_; + st_ = state::done; + goto finish; + } + dec_.mantissa = result.m; } else { - // limit of precision + unsigned char const d = *p - '0'; + if(d >= 10) + { + ec = error::expected_fraction; + break; + } + auto tmp = dec_.mantissa * 10 + d; + if(dec_.mantissa < tmp) + { + --off_; + dec_.mantissa = tmp; + } + else + { + // limit of precision + } + ++p; } - ++p; st_ = state::frac3; BOOST_FALLTHROUGH; } @@ -363,11 +368,11 @@ finish: return p - p0; } -std::size_t +size_t ieee_parser:: write( char const* data, - std::size_t size, + size_t size, error_code& ec) noexcept { auto n = write_some(data, size, ec); diff --git a/include/boost/json/detail/sse2.hpp b/include/boost/json/detail/sse2.hpp index 97b14333..6d3a1a79 100644 --- a/include/boost/json/detail/sse2.hpp +++ b/include/boost/json/detail/sse2.hpp @@ -1,5 +1,6 @@ // -// Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com) +// Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com), +// Vinnie Falco (vinnie dot falco at gmail dot com) // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -11,21 +12,41 @@ #define BOOST_JSON_DETAIL_SSE2_HPP #include +#include #ifdef BOOST_JSON_USE_SSE2 -#include -#include +# include +# include +# ifdef _MSC_VER +# include +# endif #endif namespace boost { namespace json { namespace detail { -#ifdef BOOST_JSON_USE_SSE2 +struct parse_unsigned_result +{ + uint64_t m; + int n; +}; + inline -unsigned long long +bool +operator==( + parse_unsigned_result const& lhs, + parse_unsigned_result const& rhs) noexcept +{ + return lhs.m == rhs.m && lhs.n == rhs.n; +} + +#ifdef BOOST_JSON_USE_SSE2 + +inline +size_t count_unescaped( char const* s, - unsigned long long n) noexcept + size_t n) noexcept { __m128i const q1 = _mm_set1_epi8( '"' ); __m128i const q2 = _mm_set1_epi8( '\\' ); @@ -71,10 +92,51 @@ count_unescaped( return s - s0; }; +// assumes p..p+15 are valid +inline +parse_unsigned_result +parse_unsigned( uint64_t r, char const* p ) noexcept +{ + __m128i const q1 = _mm_set1_epi8( '0' ); + __m128i const q2 = _mm_set1_epi8( '9' ); + + __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); + + v1 = _mm_or_si128( + _mm_cmplt_epi8( v1, q1 ), + _mm_cmpgt_epi8( v1, q2 ) ); + + int m = _mm_movemask_epi8( v1 ); + + int n; + + if( m == 0 ) + { + n = 16; + } + else + { +#if defined(__GNUC__) || defined(__clang__) + n = __builtin_ffs( m ) - 1; +#else + unsigned long index; + _BitScanForward( &index, m ); + n = index; +#endif + } + + for( int i = 0; i < n; ++i ) + { + r = r * 10 + p[ i ] - '0'; + } + + return { r, n }; +} + #else inline -unsigned long long +size_t count_unescaped( char const*, unsigned long long) noexcept @@ -82,6 +144,21 @@ count_unescaped( return 0; } +inline +parse_unsigned_result +parse_unsigned( uint64_t r, char const* p ) noexcept +{ + int n = 0; + for(; n< 16; ++n ) + { + unsigned char const d = *p++ - '0'; + if(d > 9) + break; + r = r * 10 + d; + } + return { r, n }; +} + #endif } // detail diff --git a/include/boost/json/number.hpp b/include/boost/json/number.hpp index 949dd13e..02a3a432 100644 --- a/include/boost/json/number.hpp +++ b/include/boost/json/number.hpp @@ -11,6 +11,7 @@ #define BOOST_JSON_NUMBER_HPP #include +#include namespace boost { namespace json { @@ -19,7 +20,7 @@ namespace json { */ struct ieee_decimal { - unsigned long long mantissa; + uint64_t mantissa; short exponent; bool sign; };