Use BOOST_FORCEINLINE

This commit is contained in:
Matt Borland
2023-05-30 16:07:20 +02:00
parent 98c9f43eda
commit bca4cc5d89
6 changed files with 52 additions and 58 deletions

View File

@@ -19,11 +19,11 @@ namespace boost { namespace charconv { namespace detail { namespace fast_float {
// Next function can be micro-optimized, but compilers are entirely
// able to optimize it well.
template <typename UC>
fastfloat_really_inline constexpr bool is_integer(UC c) noexcept {
BOOST_FORCEINLINE constexpr bool is_integer(UC c) noexcept {
return !(c > UC('9') || c < UC('0'));
}
fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
BOOST_FORCEINLINE constexpr uint64_t byteswap(uint64_t val) {
return (val & 0xFF00000000000000) >> 56
| (val & 0x00FF000000000000) >> 40
| (val & 0x0000FF0000000000) >> 24
@@ -34,7 +34,7 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
| (val & 0x00000000000000FF) << 56;
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
uint64_t read_u64(const char *chars) {
if (cpp20_and_in_constexpr()) {
uint64_t val = 0;
@@ -53,7 +53,7 @@ uint64_t read_u64(const char *chars) {
return val;
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
void write_u64(uint8_t *chars, uint64_t val) {
if (cpp20_and_in_constexpr()) {
for(int i = 0; i < 8; ++i) {
@@ -71,7 +71,7 @@ void write_u64(uint8_t *chars, uint64_t val) {
}
// credit @aqrit
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
uint32_t parse_eight_digits_unrolled(uint64_t val) {
const uint64_t mask = 0x000000FF000000FF;
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
@@ -82,38 +82,38 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
return uint32_t(val);
}
fastfloat_really_inline constexpr
BOOST_FORCEINLINE constexpr
uint32_t parse_eight_digits_unrolled(const char16_t *) noexcept {
return 0;
}
fastfloat_really_inline constexpr
BOOST_FORCEINLINE constexpr
uint32_t parse_eight_digits_unrolled(const char32_t *) noexcept {
return 0;
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
return parse_eight_digits_unrolled(read_u64(chars));
}
// credit @aqrit
fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
BOOST_FORCEINLINE constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
0x8080808080808080));
}
fastfloat_really_inline constexpr
BOOST_FORCEINLINE constexpr
bool is_made_of_eight_digits_fast(const char16_t *) noexcept {
return false;
}
fastfloat_really_inline constexpr
BOOST_FORCEINLINE constexpr
bool is_made_of_eight_digits_fast(const char32_t *) noexcept {
return false;
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
bool is_made_of_eight_digits_fast(const char *chars) noexcept {
return is_made_of_eight_digits_fast(read_u64(chars));
}
@@ -135,7 +135,7 @@ using parsed_number_string = parsed_number_string_t<char>;
// Assuming that you use no more than 19 digits, this will
// parse an ASCII string.
template <typename UC>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, parse_options_t<UC> options) noexcept {
chars_format const fmt = options.format;
UC const decimal_point = options.decimal_point;

View File

@@ -161,20 +161,20 @@ struct stackvec {
}
};
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
uint64_t empty_hi64(bool& truncated) noexcept {
truncated = false;
return 0;
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
uint64_t uint64_hi64(uint64_t r0, bool& truncated) noexcept {
truncated = false;
int shl = leading_zeroes(r0);
return r0 << shl;
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
uint64_t uint64_hi64(uint64_t r0, uint64_t r1, bool& truncated) noexcept {
int shl = leading_zeroes(r0);
if (shl == 0) {
@@ -187,19 +187,19 @@ uint64_t uint64_hi64(uint64_t r0, uint64_t r1, bool& truncated) noexcept {
}
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
uint64_t uint32_hi64(uint32_t r0, bool& truncated) noexcept {
return uint64_hi64(r0, truncated);
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
uint64_t uint32_hi64(uint32_t r0, uint32_t r1, bool& truncated) noexcept {
uint64_t x0 = r0;
uint64_t x1 = r1;
return uint64_hi64((x0 << 32) | x1, truncated);
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
uint64_t uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool& truncated) noexcept {
uint64_t x0 = r0;
uint64_t x1 = r1;
@@ -211,7 +211,7 @@ uint64_t uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool& truncated) noe
// we want an efficient operation. for msvc, where
// we don't have built-in intrinsics, this is still
// pretty fast.
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
limb scalar_add(limb x, limb y, bool& overflow) noexcept {
limb z;
// gcc and clang
@@ -231,7 +231,7 @@ limb scalar_add(limb x, limb y, bool& overflow) noexcept {
}
// multiply two small integers, getting both the high and low bits.
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
limb scalar_mul(limb x, limb y, limb& carry) noexcept {
#ifdef BOOST_CHARCONV_FASTFLOAT_64BIT_LIMB
#if defined(__SIZEOF_INT128__)
@@ -277,7 +277,7 @@ bool small_add_from(stackvec<size>& vec, limb y, size_t start) noexcept {
// add scalar value to bigint.
template <uint16_t size>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
bool small_add(stackvec<size>& vec, limb y) noexcept {
return small_add_from(vec, y, 0);
}
@@ -330,7 +330,7 @@ bool large_add_from(stackvec<size>& x, limb_span y, size_t start) noexcept {
// add bigint to bigint.
template <uint16_t size>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
bool large_add_from(stackvec<size>& x, limb_span y) noexcept {
return large_add_from(x, y, 0);
}

View File

@@ -24,7 +24,7 @@ namespace boost { namespace charconv { namespace detail { namespace fast_float {
// low part corresponding to the least significant bits.
//
template <int bit_precision>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
value128 compute_product_approximation(int64_t q, uint64_t w) {
const int index = 2 * int(q - powers::smallest_power_of_five);
// For small values of q, e.g., q in [0,27], the answer is always exact because
@@ -62,7 +62,7 @@ namespace detail {
* where
* p = log(5**-q)/log(2) = -q * log(5)/log(2)
*/
constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept {
constexpr BOOST_FORCEINLINE int32_t power(int32_t q) noexcept {
return (((152170 + 65536) * q) >> 16) + 63;
}
} // namespace detail
@@ -70,7 +70,7 @@ namespace detail {
// create an adjusted mantissa, biased by the invalid power2
// for significant digits already multiplied by 10 ** q.
template <typename binary>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
adjusted_mantissa compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept {
int hilz = int(w >> 63) ^ 1;
adjusted_mantissa answer;
@@ -83,7 +83,7 @@ adjusted_mantissa compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept
// w * 10 ** q, without rounding the representation up.
// the power2 in the exponent will be adjusted by invalid_am_bias.
template <typename binary>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
adjusted_mantissa compute_error(int64_t q, uint64_t w) noexcept {
int lz = leading_zeroes(w);
w <<= lz;
@@ -97,7 +97,7 @@ adjusted_mantissa compute_error(int64_t q, uint64_t w) noexcept {
// return an adjusted_mantissa with a negative power of 2: the caller should recompute
// in such cases.
template <typename binary>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept {
adjusted_mantissa answer;
if ((w == 0) || (q < binary::smallest_power_of_ten())) {

View File

@@ -30,7 +30,7 @@ constexpr static uint64_t powers_of_ten_uint64[] = {
// effect on performance: in order to have a faster algorithm, we'd need
// to slow down performance for faster algorithms, and this is still fast.
template <typename UC>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
int32_t scientific_exponent(parsed_number_string_t<UC> & num) noexcept {
uint64_t mantissa = num.mantissa;
int32_t exponent = int32_t(num.exponent);
@@ -51,7 +51,7 @@ int32_t scientific_exponent(parsed_number_string_t<UC> & num) noexcept {
// this converts a native floating-point number to an extended-precision float.
template <typename T>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
adjusted_mantissa to_extended(T value) noexcept {
using equiv_uint = typename binary_format<T>::equiv_uint;
constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
@@ -84,7 +84,7 @@ adjusted_mantissa to_extended(T value) noexcept {
// we are given a native float that represents b, so we need to adjust it
// halfway between b and b+u.
template <typename T>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
adjusted_mantissa to_extended_halfway(T value) noexcept {
adjusted_mantissa am = to_extended(value);
am.mantissa <<= 1;
@@ -95,7 +95,7 @@ adjusted_mantissa to_extended_halfway(T value) noexcept {
// round an extended-precision float to the nearest machine float.
template <typename T, typename callback>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
void round(adjusted_mantissa& am, callback cb) noexcept {
int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
if (-am.power2 >= mantissa_shift) {
@@ -125,7 +125,7 @@ void round(adjusted_mantissa& am, callback cb) noexcept {
}
template <typename callback>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
void round_nearest_tie_even(adjusted_mantissa& am, int32_t shift, callback cb) noexcept {
const uint64_t mask
= (shift == 64)
@@ -151,7 +151,7 @@ void round_nearest_tie_even(adjusted_mantissa& am, int32_t shift, callback cb) n
am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above));
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
void round_down(adjusted_mantissa& am, int32_t shift) noexcept {
if (shift == 64) {
am.mantissa = 0;
@@ -161,7 +161,7 @@ void round_down(adjusted_mantissa& am, int32_t shift) noexcept {
am.power2 += shift;
}
template <typename UC>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
void skip_zeros(UC const * & first, UC const * last) noexcept {
uint64_t val;
while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len<UC>()) {
@@ -182,7 +182,7 @@ void skip_zeros(UC const * & first, UC const * last) noexcept {
// determine if any non-zero digits were truncated.
// all characters must be valid digits.
template <typename UC>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
bool is_truncated(UC const * first, UC const * last) noexcept {
// do 8-bit optimizations, can just compare to 8 literal 0s.
uint64_t val;
@@ -202,22 +202,22 @@ bool is_truncated(UC const * first, UC const * last) noexcept {
return false;
}
template <typename UC>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
bool is_truncated(span<const UC> s) noexcept {
return is_truncated(s.ptr, s.ptr + s.len());
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
void parse_eight_digits(const char16_t*& , limb& , size_t& , size_t& ) noexcept {
// currently unused
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
void parse_eight_digits(const char32_t*& , limb& , size_t& , size_t& ) noexcept {
// currently unused
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
value = value * 100000000 + parse_eight_digits_unrolled(p);
p += 8;
@@ -226,7 +226,7 @@ void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& co
}
template <typename UC>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
void parse_one_digit(UC const *& p, limb& value, size_t& counter, size_t& count) noexcept {
value = value * 10 + limb(*p - UC('0'));
p++;
@@ -234,13 +234,13 @@ void parse_one_digit(UC const *& p, limb& value, size_t& counter, size_t& count)
count++;
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
void add_native(bigint& big, limb power, limb value) noexcept {
big.mul(power);
big.add(value);
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
void round_up_bigint(bigint& big, size_t& count) noexcept {
// need to round-up the digits, but need to avoid rounding
// ....9999 to ...10000, which could cause a false halfway point.

View File

@@ -107,12 +107,6 @@ using parse_options = parse_options_t<char>;
#endif
#endif
#ifdef BOOST_CHARCONV_FASTFLOAT_VISUAL_STUDIO
#define fastfloat_really_inline __forceinline
#else
#define fastfloat_really_inline inline __attribute__((always_inline))
#endif
#ifndef BOOST_CHARCONV_FASTFLOAT_ASSERT
#define BOOST_CHARCONV_FASTFLOAT_ASSERT(x) { ((void)(x)); }
#endif
@@ -126,7 +120,7 @@ using parse_options = parse_options_t<char>;
namespace boost { namespace charconv { namespace detail { namespace fast_float {
fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
BOOST_FORCEINLINE constexpr bool cpp20_and_in_constexpr() {
#if BOOST_CHARCONV_FASTFLOAT_HAS_IS_CONSTANT_EVALUATED
return std::is_constant_evaluated();
#else
@@ -175,7 +169,7 @@ struct value128 {
};
/* Helper C++11 constexpr generic implementation of leading_zeroes */
fastfloat_really_inline constexpr
BOOST_FORCEINLINE constexpr
int leading_zeroes_generic(uint64_t input_num, int last_bit = 0) {
return (
((input_num & uint64_t(0xffffffff00000000)) && (input_num >>= 32, last_bit |= 32)),
@@ -189,7 +183,7 @@ int leading_zeroes_generic(uint64_t input_num, int last_bit = 0) {
}
/* result might be undefined when input_num is zero */
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
int leading_zeroes(uint64_t input_num) {
assert(input_num > 0);
if (cpp20_and_in_constexpr()) {
@@ -211,11 +205,11 @@ int leading_zeroes(uint64_t input_num) {
}
// slow emulation routine for 32-bit
fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) {
BOOST_FORCEINLINE constexpr uint64_t emulu(uint32_t x, uint32_t y) {
return x * (uint64_t)y;
}
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
uint64_t umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) {
uint64_t ad = emulu((uint32_t)(ab >> 32), (uint32_t)cd);
uint64_t bd = emulu((uint32_t)ab, (uint32_t)cd);
@@ -231,7 +225,7 @@ uint64_t umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) {
// slow emulation routine for 32-bit
#if !defined(__MINGW64__)
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
return umul128_generic(ab, cd, hi);
}
@@ -241,7 +235,7 @@ uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
// compute 64-bit a*b
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
value128 full_multiplication(uint64_t a, uint64_t b) {
if (cpp20_and_in_constexpr()) {
value128 answer;
@@ -527,7 +521,7 @@ template <> inline constexpr binary_format<double>::equiv_uint
}
template<typename T>
fastfloat_really_inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
void to_float(bool negative, adjusted_mantissa am, T &value) {
using uint = typename binary_format<T>::equiv_uint;
uint word = (uint)am.mantissa;

View File

@@ -89,7 +89,7 @@ parse_infnan(UC const * first, UC const * last, T &value) noexcept {
* It is the default on most system. This function is meant to be inexpensive.
* Credit : @mwalcott3
*/
fastfloat_really_inline bool rounds_to_nearest() noexcept {
BOOST_FORCEINLINE bool rounds_to_nearest() noexcept {
// https://lemire.me/blog/2020/06/26/gcc-not-nearest/
#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
return false;