Move utilities to common

2026-02-21 15:02:20 +00:00 · 2023-04-11 15:11:53 +02:00
parent d63f1efde2
commit 00e2076bf6
2 changed files with 64 additions and 241 deletions
--- a/include/boost/charconv/detail/dragonbox.hpp
+++ b/include/boost/charconv/detail/dragonbox.hpp
@@ -24,6 +24,9 @@
 #include <cstring>
 #include <limits>
 #include <type_traits>
+#include <boost/core/bit.hpp>
+#include <boost/charconv/detail/emulated128.hpp>
+#include <boost/charconv/detail/dragonbox_common.hpp>

 // Suppress additional buffer overrun check.
 // I have no idea why MSVC thinks some functions here are vulnerable to the buffer overrun
@@ -313,200 +316,6 @@ namespace jkj::dragonbox {
    };

    namespace detail {
-        ////////////////////////////////////////////////////////////////////////////////////////
-        // Bit operation intrinsics.
-        ////////////////////////////////////////////////////////////////////////////////////////
-
-        namespace bits {
-            // Most compilers should be able to optimize this into the ROR instruction.
-            inline std::uint32_t rotr(std::uint32_t n, std::uint32_t r) noexcept {
-                r &= 31;
-                return (n >> r) | (n << (32 - r));
-            }
-            inline std::uint64_t rotr(std::uint64_t n, std::uint32_t r) noexcept {
-                r &= 63;
-                return (n >> r) | (n << (64 - r));
-            }
-        }
-
-        ////////////////////////////////////////////////////////////////////////////////////////
-        // Utilities for wide unsigned integer arithmetic.
-        ////////////////////////////////////////////////////////////////////////////////////////
-
-        namespace wuint {
-            // Compilers might support built-in 128-bit integer types. However, it seems that
-            // emulating them with a pair of 64-bit integers actually produces a better code,
-            // so we avoid using those built-ins. That said, they are still useful for
-            // implementing 64-bit x 64-bit -> 128-bit multiplication.
-
-            // clang-format off
-#if defined(__SIZEOF_INT128__)
-		// To silence "error: ISO C++ does not support '__int128' for 'type name'
-		// [-Wpedantic]"
-#if defined(__GNUC__)
-			__extension__
-#endif
-				using builtin_uint128_t = unsigned __int128;
-#endif
-            // clang-format on
-
-            struct uint128 {
-                uint128() = default;
-
-                std::uint64_t high_;
-                std::uint64_t low_;
-
-                constexpr uint128(std::uint64_t high, std::uint64_t low) noexcept
-                    : high_{high}, low_{low} {}
-
-                constexpr std::uint64_t high() const noexcept { return high_; }
-                constexpr std::uint64_t low() const noexcept { return low_; }
-
-                uint128& operator+=(std::uint64_t n) & noexcept {
-#if JKJ_DRAGONBOX_HAS_BUILTIN(__builtin_addcll)
-                    unsigned long long carry;
-                    low_ = __builtin_addcll(low_, n, 0, &carry);
-                    high_ = __builtin_addcll(high_, 0, carry, &carry);
-#elif JKJ_DRAGONBOX_HAS_BUILTIN(__builtin_ia32_addcarryx_u64)
-                    unsigned long long result;
-                    auto carry = __builtin_ia32_addcarryx_u64(0, low_, n, &result);
-                    low_ = result;
-                    __builtin_ia32_addcarryx_u64(carry, high_, 0, &result);
-                    high_ = result;
-#elif defined(_MSC_VER) && defined(_M_X64)
-                    auto carry = _addcarry_u64(0, low_, n, &low_);
-                    _addcarry_u64(carry, high_, 0, &high_);
-#else
-                    auto sum = low_ + n;
-                    high_ += (sum < low_ ? 1 : 0);
-                    low_ = sum;
-#endif
-                    return *this;
-                }
-            };
-
-            static inline std::uint64_t umul64(std::uint32_t x, std::uint32_t y) noexcept {
-#if defined(_MSC_VER) && defined(_M_IX86)
-                return __emulu(x, y);
-#else
-                return x * std::uint64_t(y);
-#endif
-            }
-
-            // Get 128-bit result of multiplication of two 64-bit unsigned integers.
-            JKJ_SAFEBUFFERS inline uint128 umul128(std::uint64_t x, std::uint64_t y) noexcept {
-#if defined(__SIZEOF_INT128__)
-                auto result = builtin_uint128_t(x) * builtin_uint128_t(y);
-                return {std::uint64_t(result >> 64), std::uint64_t(result)};
-#elif defined(_MSC_VER) && defined(_M_X64)
-                uint128 result;
-                result.low_ = _umul128(x, y, &result.high_);
-                return result;
-#else
-                auto a = std::uint32_t(x >> 32);
-                auto b = std::uint32_t(x);
-                auto c = std::uint32_t(y >> 32);
-                auto d = std::uint32_t(y);
-
-                auto ac = umul64(a, c);
-                auto bc = umul64(b, c);
-                auto ad = umul64(a, d);
-                auto bd = umul64(b, d);
-
-                auto intermediate = (bd >> 32) + std::uint32_t(ad) + std::uint32_t(bc);
-
-                return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32),
-                        (intermediate << 32) + std::uint32_t(bd)};
-#endif
-            }
-
-            JKJ_SAFEBUFFERS inline std::uint64_t umul128_upper64(std::uint64_t x,
-                                                                 std::uint64_t y) noexcept {
-#if defined(__SIZEOF_INT128__)
-                auto result = builtin_uint128_t(x) * builtin_uint128_t(y);
-                return std::uint64_t(result >> 64);
-#elif defined(_MSC_VER) && defined(_M_X64)
-                return __umulh(x, y);
-#else
-                auto a = std::uint32_t(x >> 32);
-                auto b = std::uint32_t(x);
-                auto c = std::uint32_t(y >> 32);
-                auto d = std::uint32_t(y);
-
-                auto ac = umul64(a, c);
-                auto bc = umul64(b, c);
-                auto ad = umul64(a, d);
-                auto bd = umul64(b, d);
-
-                auto intermediate = (bd >> 32) + std::uint32_t(ad) + std::uint32_t(bc);
-
-                return ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32);
-#endif
-            }
-
-            // Get upper 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit
-            // unsigned integer.
-            JKJ_SAFEBUFFERS inline uint128 umul192_upper128(std::uint64_t x, uint128 y) noexcept {
-                auto r = umul128(x, y.high());
-                r += umul128_upper64(x, y.low());
-                return r;
-            }
-
-            // Get upper 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit
-            // unsigned integer.
-            inline std::uint64_t umul96_upper64(std::uint32_t x, std::uint64_t y) noexcept {
-#if defined(__SIZEOF_INT128__) || (defined(_MSC_VER) && defined(_M_X64))
-                return umul128_upper64(std::uint64_t(x) << 32, y);
-#else
-                auto yh = std::uint32_t(y >> 32);
-                auto yl = std::uint32_t(y);
-
-                auto xyh = umul64(x, yh);
-                auto xyl = umul64(x, yl);
-
-                return xyh + (xyl >> 32);
-#endif
-            }
-
-            // Get lower 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit
-            // unsigned integer.
-            JKJ_SAFEBUFFERS inline uint128 umul192_lower128(std::uint64_t x, uint128 y) noexcept {
-                auto high = x * y.high();
-                auto high_low = umul128(x, y.low());
-                return {high + high_low.high(), high_low.low()};
-            }
-
-            // Get lower 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit
-            // unsigned integer.
-            inline std::uint64_t umul96_lower64(std::uint32_t x, std::uint64_t y) noexcept {
-                return x * y;
-            }
-        }
-
-        ////////////////////////////////////////////////////////////////////////////////////////
-        // Some simple utilities for constexpr computation.
-        ////////////////////////////////////////////////////////////////////////////////////////
-
-        template <int k, class Int>
-        constexpr Int compute_power(Int a) noexcept {
-            static_assert(k >= 0);
-            Int p = 1;
-            for (int i = 0; i < k; ++i) {
-                p *= a;
-            }
-            return p;
-        }
-
-        template <int a, class UInt>
-        constexpr int count_factors(UInt n) noexcept {
-            static_assert(a > 1);
-            int c = 0;
-            while (n % a == 0) {
-                n /= a;
-                ++c;
-            }
-            return c;
-        }

        ////////////////////////////////////////////////////////////////////////////////////////
        // Utilities for fast/constexpr log computation.
@@ -614,7 +423,7 @@ namespace jkj::dragonbox {
            constexpr bool check_divisibility_and_divide_by_pow10(std::uint32_t& n) noexcept {
                // Make sure the computation for max_n does not overflow.
                static_assert(N + 1 <= log::floor_log10_pow2(31));
-                assert(n <= compute_power<N + 1>(std::uint32_t(10)));
+                assert(n <= boost::charconv::detail::compute_power(std::uint32_t(10), N + 1));

                using info = divide_by_pow10_info<N>;
                n *= info::magic_number;
@@ -632,7 +441,7 @@ namespace jkj::dragonbox {
            constexpr std::uint32_t small_division_by_pow10(std::uint32_t n) noexcept {
                // Make sure the computation for max_n does not overflow.
                static_assert(N + 1 <= log::floor_log10_pow2(31));
-                assert(n <= compute_power<N + 1>(std::uint32_t(10)));
+                assert(n <= boost::charconv::detail::compute_power(std::uint32_t(10), N + 1));

                return (n * divide_by_pow10_info<N>::magic_number) >>
                       divide_by_pow10_info<N>::shift_amount;
@@ -650,16 +459,16 @@ namespace jkj::dragonbox {
                // (mul + mov for no apparent reason, instead of single imul),
                // so we does this manually.
                if constexpr (std::is_same_v<UInt, std::uint32_t> && N == 2) {
-                    return std::uint32_t(wuint::umul64(n, std::uint32_t(1374389535)) >> 37);
+                    return std::uint32_t(boost::charconv::detail::umul64(n, std::uint32_t(1374389535)) >> 37);
                }
                // Specialize for 64-bit division by 1000.
                // Ensure that the correctness condition is met.
                else if constexpr (std::is_same_v<UInt, std::uint64_t> && N == 3 &&
                              n_max <= std::uint64_t(15534100272597517998ull)) {
-                    return wuint::umul128_upper64(n, std::uint64_t(2361183241434822607ull)) >> 7;
+                    return boost::charconv::detail::umul128_upper64(n, std::uint64_t(2361183241434822607ull)) >> 7;
                }
                else {
-                    constexpr auto divisor = compute_power<N>(UInt(10));
+                    constexpr auto divisor = boost::charconv::detail::compute_power(UInt(10), N);
                    return n / divisor;
                }
            }
@@ -755,7 +564,7 @@ namespace jkj::dragonbox {

        template <>
        struct cache_holder<ieee754_binary64> {
-            using cache_entry_type = wuint::uint128;
+            using cache_entry_type = boost::charconv::detail::uint128;
            static constexpr int cache_bits = 128;
            static constexpr int min_k = -292;
            static constexpr int max_k = 326;
@@ -1081,7 +890,7 @@ namespace jkj::dragonbox {
                compression_ratio;

            struct cache_holder_t {
-                wuint::uint128 table[compressed_table_size];
+                boost::charconv::detail::uint128 table[compressed_table_size];
            };
            static constexpr cache_holder_t cache = [] {
                cache_holder_t res{};
@@ -1612,21 +1421,21 @@ namespace jkj::dragonbox {

                                // Try to recover the real cache.
                                auto const pow5 = compressed_cache_detail::pow5.table[offset];
-                                auto recovered_cache = wuint::umul128(base_cache.high(), pow5);
-                                auto const middle_low = wuint::umul128(base_cache.low(), pow5);
+                                auto recovered_cache = boost::charconv::detail::umul128(base_cache.high, pow5);
+                                auto const middle_low = boost::charconv::detail::umul128(base_cache.low, pow5);

-                                recovered_cache += middle_low.high();
+                                recovered_cache += middle_low.high;

-                                auto const high_to_middle = recovered_cache.high() << (64 - alpha);
-                                auto const middle_to_low = recovered_cache.low() << (64 - alpha);
+                                auto const high_to_middle = recovered_cache.high << (64 - alpha);
+                                auto const middle_to_low = recovered_cache.low << (64 - alpha);

-                                recovered_cache = wuint::uint128{
-                                    (recovered_cache.low() >> alpha) | high_to_middle,
-                                    ((middle_low.low() >> alpha) | middle_to_low)};
+                                recovered_cache = boost::charconv::detail::uint128{
+                                    (recovered_cache.low >> alpha) | high_to_middle,
+                                    ((middle_low.low >> alpha) | middle_to_low)};

-                                assert(recovered_cache.low() + 1 != 0);
-                                recovered_cache = {recovered_cache.high(),
-                                                   recovered_cache.low() + 1};
+                                assert(recovered_cache.low + 1 != 0);
+                                recovered_cache = {recovered_cache.high,
+                                                   recovered_cache.low + 1};

                                return recovered_cache;
                            }
@@ -1756,16 +1565,15 @@ namespace jkj::dragonbox {
            static constexpr int case_shorter_interval_left_endpoint_upper_threshold =
                2 +
                log::floor_log2(
-                    compute_power<
-                        count_factors<5>((carrier_uint(1) << (significand_bits + 2)) - 1) + 1>(10) /
+                    boost::charconv::detail::compute_power
+                        (10, boost::charconv::detail::count_factors<5>((carrier_uint(1) << (significand_bits + 2)) - 1) + 1) /
                    3);

            static constexpr int case_shorter_interval_right_endpoint_lower_threshold = 0;
            static constexpr int case_shorter_interval_right_endpoint_upper_threshold =
                2 +
                log::floor_log2(
-                    compute_power<
-                        count_factors<5>((carrier_uint(1) << (significand_bits + 1)) + 1) + 1>(10) /
+                    boost::charconv::detail::compute_power(10, boost::charconv::detail::count_factors<5>((carrier_uint(1) << (significand_bits + 1)) + 1) + 1) /
                    3);

            static constexpr int shorter_interval_tie_lower_threshold =
@@ -1821,8 +1629,8 @@ namespace jkj::dragonbox {
                // Step 2: Try larger divisor; remove trailing zeros if necessary
                //////////////////////////////////////////////////////////////////////

-                constexpr auto big_divisor = compute_power<kappa + 1>(std::uint32_t(10));
-                constexpr auto small_divisor = compute_power<kappa>(std::uint32_t(10));
+                constexpr auto big_divisor = boost::charconv::detail::compute_power(std::uint32_t(10), kappa + 1);
+                constexpr auto small_divisor = boost::charconv::detail::compute_power(std::uint32_t(10), kappa);

                // Using an upper bound on zi, we might be able to optimize the division
                // better than the compiler; we are computing zi / big_divisor here.
@@ -2034,7 +1842,7 @@ namespace jkj::dragonbox {
                // Step 2: Try larger divisor; remove trailing zeros if necessary
                //////////////////////////////////////////////////////////////////////

-                constexpr auto big_divisor = compute_power<kappa + 1>(std::uint32_t(10));
+                constexpr auto big_divisor = boost::charconv::detail::compute_power(std::uint32_t(10), kappa + 1);

                // Using an upper bound on xi, we might be able to optimize the division
                // better than the compiler; we are computing xi / big_divisor here.
@@ -2111,7 +1919,7 @@ namespace jkj::dragonbox {
                // Step 2: Try larger divisor; remove trailing zeros if necessary
                //////////////////////////////////////////////////////////////////////

-                constexpr auto big_divisor = compute_power<kappa + 1>(std::uint32_t(10));
+                constexpr auto big_divisor = boost::charconv::detail::compute_power(std::uint32_t(10), kappa + 1);

                // Using an upper bound on zi, we might be able to optimize the division better than
                // the compiler; we are computing zi / big_divisor here.
@@ -2160,7 +1968,7 @@ namespace jkj::dragonbox {

                    int s = 0;
                    while (true) {
-                        auto q = bits::rotr(n * mod_inv_25, 2);
+                        auto q = boost::core::rotr(n * mod_inv_25, 2);
                        if (q <= std::numeric_limits<std::uint32_t>::max() / 100) {
                            n = q;
                            s += 2;
@@ -2169,7 +1977,7 @@ namespace jkj::dragonbox {
                            break;
                        }
                    }
-                    auto q = bits::rotr(n * mod_inv_5, 1);
+                    auto q = boost::core::rotr(n * mod_inv_5, 1);
                    if (q <= std::numeric_limits<std::uint32_t>::max() / 10) {
                        n = q;
                        s |= 1;
@@ -2186,20 +1994,20 @@ namespace jkj::dragonbox {

                    // This magic number is ceil(2^90 / 10^8).
                    constexpr auto magic_number = std::uint64_t(12379400392853802749ull);
-                    auto nm = wuint::umul128(n, magic_number);
+                    auto nm = boost::charconv::detail::umul128(n, magic_number);

                    // Is n is divisible by 10^8?
-                    if ((nm.high() & ((std::uint64_t(1) << (90 - 64)) - 1)) == 0 &&
-                        nm.low() < magic_number) {
+                    if ((nm.high & ((std::uint64_t(1) << (90 - 64)) - 1)) == 0 &&
+                        nm.low < magic_number) {
                        // If yes, work with the quotient.
-                        auto n32 = std::uint32_t(nm.high() >> (90 - 64));
+                        auto n32 = std::uint32_t(nm.high >> (90 - 64));

                        constexpr auto mod_inv_5 = std::uint32_t(0xcccc'cccd);
                        constexpr auto mod_inv_25 = mod_inv_5 * mod_inv_5;

                        int s = 8;
                        while (true) {
-                            auto q = bits::rotr(n32 * mod_inv_25, 2);
+                            auto q = boost::core::rotr(n32 * mod_inv_25, 2);
                            if (q <= std::numeric_limits<std::uint32_t>::max() / 100) {
                                n32 = q;
                                s += 2;
@@ -2208,7 +2016,7 @@ namespace jkj::dragonbox {
                                break;
                            }
                        }
-                        auto q = bits::rotr(n32 * mod_inv_5, 1);
+                        auto q = boost::core::rotr(n32 * mod_inv_5, 1);
                        if (q <= std::numeric_limits<std::uint32_t>::max() / 10) {
                            n32 = q;
                            s |= 1;
@@ -2224,7 +2032,7 @@ namespace jkj::dragonbox {

                    int s = 0;
                    while (true) {
-                        auto q = bits::rotr(n * mod_inv_25, 2);
+                        auto q = boost::core::rotr(n * mod_inv_25, 2);
                        if (q <= std::numeric_limits<std::uint64_t>::max() / 100) {
                            n = q;
                            s += 2;
@@ -2233,7 +2041,7 @@ namespace jkj::dragonbox {
                            break;
                        }
                    }
-                    auto q = bits::rotr(n * mod_inv_5, 1);
+                    auto q = boost::core::rotr(n * mod_inv_5, 1);
                    if (q <= std::numeric_limits<std::uint64_t>::max() / 10) {
                        n = q;
                        s |= 1;
@@ -2246,13 +2054,13 @@ namespace jkj::dragonbox {
            static compute_mul_result compute_mul(carrier_uint u,
                                                  cache_entry_type const& cache) noexcept {
                if constexpr (std::is_same_v<format, ieee754_binary32>) {
-                    auto r = wuint::umul96_upper64(u, cache);
+                    auto r = boost::charconv::detail::umul96_upper64(u, cache);
                    return {carrier_uint(r >> 32), carrier_uint(r) == 0};
                }
                else {
                    static_assert(std::is_same_v<format, ieee754_binary64>);
-                    auto r = wuint::umul192_upper128(u, cache);
-                    return {r.high(), r.low() == 0};
+                    auto r = boost::charconv::detail::umul192_upper128(u, cache);
+                    return {r.high, r.low == 0};
                }
            }

@@ -2263,7 +2071,7 @@ namespace jkj::dragonbox {
                }
                else {
                    static_assert(std::is_same_v<format, ieee754_binary64>);
-                    return std::uint32_t(cache.high() >> (carrier_bits - 1 - beta));
+                    return std::uint32_t(cache.high >> (carrier_bits - 1 - beta));
                }
            }

@@ -2274,14 +2082,14 @@ namespace jkj::dragonbox {
                assert(beta < 64);

                if constexpr (std::is_same_v<format, ieee754_binary32>) {
-                    auto r = wuint::umul96_lower64(two_f, cache);
+                    auto r = boost::charconv::detail::umul96_lower64(two_f, cache);
                    return {((r >> (64 - beta)) & 1) != 0, std::uint32_t(r >> (32 - beta)) == 0};
                }
                else {
                    static_assert(std::is_same_v<format, ieee754_binary64>);
-                    auto r = wuint::umul192_lower128(two_f, cache);
-                    return {((r.high() >> (64 - beta)) & 1) != 0,
-                            ((r.high() << beta) | (r.low() >> (64 - beta))) == 0};
+                    auto r = boost::charconv::detail::umul192_lower128(two_f, cache);
+                    return {((r.high >> (64 - beta)) & 1) != 0,
+                            ((r.high << beta) | (r.low >> (64 - beta))) == 0};
                }
            }

@@ -2294,7 +2102,7 @@ namespace jkj::dragonbox {
                }
                else {
                    static_assert(std::is_same_v<format, ieee754_binary64>);
-                    return (cache.high() - (cache.high() >> (significand_bits + 2))) >>
+                    return (cache.high - (cache.high >> (significand_bits + 2))) >>
                           (carrier_bits - significand_bits - 1 - beta);
                }
            }
@@ -2308,7 +2116,7 @@ namespace jkj::dragonbox {
                }
                else {
                    static_assert(std::is_same_v<format, ieee754_binary64>);
-                    return (cache.high() + (cache.high() >> (significand_bits + 1))) >>
+                    return (cache.high + (cache.high >> (significand_bits + 1))) >>
                           (carrier_bits - significand_bits - 1 - beta);
                }
            }
@@ -2322,7 +2130,7 @@ namespace jkj::dragonbox {
                }
                else {
                    static_assert(std::is_same_v<format, ieee754_binary64>);
-                    return ((cache.high() >> (carrier_bits - significand_bits - 2 - beta)) + 1) / 2;
+                    return ((cache.high >> (carrier_bits - significand_bits - 2 - beta)) + 1) / 2;
                }
            }

--- a/include/boost/charconv/detail/dragonbox_common.hpp
+++ b/include/boost/charconv/detail/dragonbox_common.hpp
@@ -330,6 +330,21 @@ static constexpr std::uint64_t power_of_10[] = {

 static_assert(sizeof(power_of_10) == 20 * sizeof(std::uint64_t), "There should be the first 20 powers of 10");

+
+template <int a, typename UInt>
+BOOST_CHARCONV_CXX14_CONSTEXPR int count_factors(UInt n) noexcept
+{
+    static_assert(a > 1);
+    int c = 0;
+
+    while (n % a == 0)
+    {
+        n /= a;
+        ++c;
+    }
+    return c;
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////
 // Utilities for fast/constexpr log computation.
 ////////////////////////////////////////////////////////////////////////////////////////