Review and synchronize Alternate Algos work

This commit is contained in:
Christopher Kormanyos
2023-01-20 18:52:24 +01:00
parent de95f577f8
commit ea2ebdf7b4
7 changed files with 3 additions and 138 deletions

View File

@@ -71,63 +71,6 @@ struct exact_arithmetic
using float_pair = std::pair<float_type, float_type>;
using float_tuple = std::tuple<float_type, float_type, float_type, float_type>;
static
#if (defined(_MSC_VER) && (_MSC_VER <= 1900))
BOOST_MP_CXX14_CONSTEXPR
#else
constexpr
#endif
float_pair split(const float_type& a)
{
// Split a floating point number in two (high and low) parts approximating the
// upper-half and lower-half bits of the float
static_assert(is_floating_point_or_float128<FloatingPointType>::value,
"Error: exact_arithmetic<>::split invoked with unknown floating-point type");
// TODO Replace bit shifts with constexpr funcs or ldexp for better compaitibility
constexpr int MantissaBits = cpp_df_qf_detail::ccmath::numeric_limits<float_type>::digits;
constexpr int SplitBits = MantissaBits / 2 + 1;
// Check if the integer is wide enough to hold the Splitter.
static_assert(std::numeric_limits<std::uintmax_t>::digits > SplitBits,
"Inadequate integer width for binary shifting needed in split(), try using ldexp instead");
// If the above line gives an compilation error, replace the
// line below it with the commented line
constexpr float_type Splitter = FloatingPointType((static_cast<std::uintmax_t>(UINT8_C(1)) << SplitBits) + 1);
const float_type SplitThreshold = (cpp_df_qf_detail::ccmath::numeric_limits<float_type>::max)() / (Splitter * 2);
float_type hi { };
float_type lo { };
// Handle if multiplication with the splitter would cause overflow
if (a > SplitThreshold || a < -SplitThreshold)
{
constexpr float_type Normalizer = float_type(1ULL << (SplitBits + 1));
const float_type a_ = a / Normalizer;
const float_type temp = Splitter * a_;
hi = temp - (temp - a_);
lo = a_ - hi;
hi *= Normalizer;
lo *= Normalizer;
}
else
{
const float_type temp = Splitter * a;
hi = temp - (temp - a);
lo = a - hi;
}
return std::make_pair(hi, lo);
}
static
#if (defined(_MSC_VER) && (_MSC_VER <= 1900))
BOOST_MP_CXX14_CONSTEXPR
@@ -144,87 +87,6 @@ struct exact_arithmetic
return result;
}
static
#if (defined(_MSC_VER) && (_MSC_VER <= 1900))
BOOST_MP_CXX14_CONSTEXPR
#else
constexpr
#endif
void sum(float_pair& result, float_type a, float_type b)
{
// Exact addition of two floating point numbers
const float_type a_plus_b = a + b;
const float_type v = a_plus_b - a;
const float_pair tmp(a_plus_b, (a - (a_plus_b - v)) + (b - v));
result.first = tmp.first;
result.second = tmp.second;
}
#if 0
static
#if (defined(_MSC_VER) && (_MSC_VER <= 1900))
BOOST_MP_CXX14_CONSTEXPR
#else
constexpr
#endif
void sum(float_pair& p, float_type& e)
{
using std::tie;
float_pair t;
float_type t_;
t = sum(p.first, p.second);
tie(p.first, t_) = sum(e, t.first);
tie(p.second, e) = sum(t.second, t_);
}
#endif
static
#if (defined(_MSC_VER) && (_MSC_VER <= 1900))
BOOST_MP_CXX14_CONSTEXPR
#else
constexpr
#endif
float_pair difference(const float_type& a, const float_type& b)
{
// Exact subtraction of two floating point numbers
const float_type a_minus_b = a - b;
const float_type v = a_minus_b - a;
return float_pair(a_minus_b, (a - (a_minus_b - v)) - (b + v));
}
static
#if (defined(_MSC_VER) && (_MSC_VER <= 1900))
BOOST_MP_CXX14_CONSTEXPR
#else
constexpr
#endif
float_pair product(const float_type& a, const float_type& b)
{
// Exact product of two floating point numbers
const float_pair a_split = split(a);
const float_pair b_split = split(b);
const volatile float_type pf = a * b;
return
std::make_pair
(
const_cast<const float_type&>(pf),
(
((a_split.first * b_split.first) - const_cast<const float_type&>(pf))
+ (a_split.first * b_split.second)
+ (a_split.second * b_split.first)
)
+
(a_split.second * b_split.second)
);
}
static
#if (defined(_MSC_VER) && (_MSC_VER <= 1900))
BOOST_MP_CXX14_CONSTEXPR

View File

@@ -246,6 +246,9 @@ class cpp_double_fp_backend
static constexpr int my_max_exponent10 = static_cast<int>(static_cast<float>(my_max_exponent) * 0.301F);
static constexpr int my_min_exponent10 = static_cast<int>(static_cast<float>(my_min_exponent) * 0.301F);
// TBD: Did we justify this static assertion during the GSoC?
// Does anyone remember what the meaning of the number 77 is?
static_assert(((my_max_exponent - my_digits) >= 77),
"Error: floating-point constituent does not have wide enough exponent range");