2
0
mirror of https://github.com/boostorg/math.git synced 2026-01-28 19:32:08 +00:00

Continued work on tgamma() for multiprecision.

This commit is contained in:
Christopher Kormanyos
2014-01-02 00:08:21 +01:00
parent 825c2b97fd
commit 03e4dbc8c6
2 changed files with 166 additions and 218 deletions

View File

@@ -1,20 +1,21 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright 2013 John Maddock
// Distributed under the Boost
// Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// Copyright 2013 - 2014 John Maddock
// Distributed under the Boost
// Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_MATH_BERNOULLI_DETAIL_HPP
#define BOOST_MATH_BERNOULLI_DETAIL_HPP
#ifndef BOOST_MATH_BERNOULLI_DETAILS_HPP
#define BOOST_MATH_BERNOULLI_DETAILS_HPP
#include <boost/config.hpp>
#include <boost/detail/lightweight_mutex.hpp>
#include <boost/math/tools/fixed_capacity_heap_vector.hpp>
#ifdef BOOST_HAS_THREADS
#ifndef BOOST_NO_CXX11_HDR_ATOMIC
# include <atomic>
# define BOOST_MATH_ATOMIC_NS std
# include <atomic>
# define BOOST_MATH_ATOMIC_NS std
#if ATOMIC_INT_LOCK_FREE == 2
typedef std::atomic<int> atomic_counter_type;
#elif ATOMIC_SHORT_LOCK_FREE == 2
@@ -24,7 +25,7 @@ typedef std::atomic<long> atomic_counter_type;
#elif ATOMIC_LLONG_LOCK_FREE == 2
typedef std::atomic<long long> atomic_counter_type;
#else
# define BOOST_MATH_NO_ATOMIC_INT
# define BOOST_MATH_NO_ATOMIC_INT
#endif
#else // BOOST_NO_CXX11_HDR_ATOMIC
@@ -34,7 +35,7 @@ typedef std::atomic<long long> atomic_counter_type;
//
#define BOOST_ATOMIC_NO_LIB
#include <boost/atomic.hpp>
# define BOOST_MATH_ATOMIC_NS boost
# define BOOST_MATH_ATOMIC_NS boost
namespace boost{ namespace math{ namespace detail{
@@ -50,12 +51,12 @@ typedef boost::atomic<long> atomic_counter_type;
#elif BOOST_ATOMIC_LLONG_LOCK_FREE == 2
typedef boost::atomic<long long> atomic_counter_type;
#else
# define BOOST_MATH_NO_ATOMIC_INT
# define BOOST_MATH_NO_ATOMIC_INT
#endif
}}} // namespaces
#endif // BOOST_NO_CXX11_HDR_ATOMIC
#endif // BOOST_NO_CXX11_HDR_ATOMIC
#endif // BOOST_HAS_THREADS
@@ -71,7 +72,7 @@ namespace boost{ namespace math{ namespace detail{
template <class T>
struct bernoulli_overflow_variant
{
static const unsigned value =
static const unsigned value =
(std::numeric_limits<T>::max_exponent == 128)
&& (std::numeric_limits<T>::radix == 2) ? 1 :
(
@@ -85,98 +86,120 @@ struct bernoulli_overflow_variant
};
template<class T>
inline bool bernouli_impl_index_does_overflow(std::size_t, const mpl::int_<0>&)
T bernouli_impl_index_might_overflow_function(const T& nx)
{
return false; // We have no idea, perform a runtime check
}
// There are certain cases for which the index n, when trying
// to compute Bn, is known to overflow. In particular, when
// using 32-bit float and 64-bit double (IEEE 754 conformant),
// overflow will occur if the index exceeds the amount allowed
// in the tables of Bn.
template<class T>
inline bool bernouli_impl_index_does_overflow(std::size_t n, const mpl::int_<1>&)
{
// This corresponds to 4-byte float, IEEE 745 conformant.
return n >= max_bernoulli_index<1>::value * 2;
}
template<class T>
inline bool bernouli_impl_index_does_overflow(std::size_t n, const mpl::int_<2>&)
{
// This corresponds to 8-byte float, IEEE 745 conformant.
return n >= max_bernoulli_index<2>::value * 2;
}
template<class T>
inline bool bernouli_impl_index_does_overflow(std::size_t n, const mpl::int_<3>&)
{
// This corresponds to 16-byte float, IEEE 745 conformant.
return n >= max_bernoulli_index<3>::value * 2;
}
template<class T>
inline bool bernouli_impl_index_does_overflow(std::size_t n)
{
typedef mpl::int_<bernoulli_overflow_variant<T>::value> tag_type;
return bernouli_impl_index_does_overflow<T>(n, tag_type());
}
template<class T>
bool bernouli_impl_index_might_overflow(std::size_t n)
{
if(bernouli_impl_index_does_overflow<T>(n))
{
// If the index *does* overflow, then it also *might* overflow.
return true;
}
BOOST_MATH_STD_USING
// Luschny LogB3(n) formula.
const T nx (n);
const T nx2(nx * nx);
const T t_one(1);
const T approximate_log_of_bernoulli_bn
= ((boost::math::constants::half<T>() + nx) * log(nx))
= ((boost::math::constants::half<T>() + nx) * log(nx))
+ ((boost::math::constants::half<T>() - nx) * log(boost::math::constants::pi<T>()))
+ (((T(3) / 2) - nx) * boost::math::constants::ln_two<T>())
+ ((nx * (T(2) - (nx2 * 7) * (t_one + ((nx2 * 30) * ((nx2 * 12) - t_one))))) / (((nx2 * nx2) * nx2) * 2520));
return ((approximate_log_of_bernoulli_bn * 1.1F) > boost::math::tools::log_max_value<T>());
return (approximate_log_of_bernoulli_bn - boost::math::tools::log_max_value<T>());
}
template<class T>
T bernouli_impl_index_might_overflow_tolerance(const T& min_value, const T& max_value)
{
BOOST_MATH_STD_USING
return abs(max_value - min_value) < T(0.9F);
}
template<class T>
inline std::size_t bernouli_impl_index_might_overflow(const mpl::int_<0>&)
{
// We have no idea, perform a runtime check
BOOST_MATH_STD_USING
// Find the index of B2n that will overflow the numeric type.
// Use Luschny's order-3 approximation of the logarithm of
// Bernoulli numbers in combination with a root-finding tool
// from boost::math::tools for this.
boost::uintmax_t number_of_iterations = 128U;
const std::pair<T, T> index_might_overflow_root =
boost::math::tools::toms748_solve(bernouli_impl_index_might_overflow_function<T>,
T(20),
T((std::numeric_limits<std::size_t>::max)()),
bernouli_impl_index_might_overflow_tolerance<T>,
number_of_iterations);
// Compute the index that might overflow by averaging the bracketed
// values in the root pair.
const T index_might_overflow_value = (index_might_overflow_root.first + index_might_overflow_root.second) / 2;
// Determine a conservative estimate of the size_t integral value
// of the index that might. Do this by evaluating the binary exponent
// of the index that might overflow expressed as a floating-point number.
int p2;
frexp(index_might_overflow_value, &p2);
// Armed with the binary exponent, we left shift by an amount
// one less than this binary exponent such that the value of
// the index that might overflow is guessed conservatively.
const std::size_t index_might_overflow_conservative_value = static_cast<std::size_t>(static_cast<std::size_t>(1U) << (p2 - 1));
// Example: A floating-point numeric type with a 32-bit signed
// exponent overflows at an index of approximately 11,513,000.
// The result of the conservative value for the overflow index
// is approximately 8,389,000.
return index_might_overflow_conservative_value;
}
// There are certain cases for which the index n, when trying
// to compute Bn, is known to overflow. In particular, when
// using IEEE 754 conformant 32-bit float, 64-bit double,
// or 128-bit quad, overflow will occur if the index exceeds
// the amount allowed in the tables of Bn.
template<class T>
inline std::size_t bernouli_impl_index_might_overflow(const mpl::int_<1>&)
{
// This corresponds to 4-byte float, IEEE 745 conformant.
return max_bernoulli_index<1>::value;
}
template<class T>
inline std::size_t bernouli_impl_index_might_overflow(const mpl::int_<2>&)
{
// This corresponds to 8-byte float, IEEE 745 conformant.
return max_bernoulli_index<2>::value;
}
template<class T>
inline std::size_t bernouli_impl_index_might_overflow(const mpl::int_<3>&)
{
// This corresponds to 16-byte float, IEEE 745 conformant.
return max_bernoulli_index<3>::value;
}
template<class T>
inline std::size_t bernouli_impl_index_might_overflow()
{
typedef mpl::int_<bernoulli_overflow_variant<T>::value> tag_type;
return bernouli_impl_index_might_overflow<T>(tag_type());
}
template<class T>
std::size_t possible_overflow_index()
{
// We use binary search to determine a good approximation for an index that might overflow.
// We use binary search to determine a good approximation for
// an index that might overflow.
// This code is called ONCE ONLY for each T at program startup.
std::size_t upper_limit = (std::min)(static_cast<std::size_t>(boost::math::itrunc(tools::log_max_value<T>() / (2 * constants::ln_two<T>()) - 2)), static_cast<std::size_t>(10000u));
std::size_t lower_limit = 1;
const std::size_t the_possible_overflow_index = bernouli_impl_index_might_overflow<T>();
if(bernouli_impl_index_might_overflow<T>(upper_limit * 2) == 0)
{
return upper_limit;
}
while(upper_limit > (lower_limit + 4))
{
const int mid = (upper_limit + lower_limit) / 2;
if(bernouli_impl_index_might_overflow<T>(mid * 2) == 0)
{
lower_limit = mid;
}
else
{
upper_limit = mid;
}
}
return lower_limit;
return the_possible_overflow_index;
}
//
// The tangent numbers grow larger much more rapidly than the Bernoulli numbers do....
@@ -226,68 +249,21 @@ struct bernoulli_initializer
template <class T, class Policy>
const typename bernoulli_initializer<T, Policy>::init bernoulli_initializer<T, Policy>::initializer;
//
// We need something to act as a cache for our calculated Bernoulli numbers. In order to
// ensure both fast access and thread safety, we need a stable table which may be extended
// in size, but which never reallocates: that way values already calculated may be accessed
// concurrently with another thread extending the table with new values.
//
// Very very simple vector class that will never allocate more than once, we could use
// boost::container::static_vector here, but that allocates on the stack, which may well
// cause issues for the amount of memory we want in the extreme case...
//
template <class T>
struct fixed_vector : private std::allocator<T>
{
typedef unsigned size_type;
typedef T* iterator;
typedef const T* const_iterator;
fixed_vector() : m_used(0)
{
std::size_t overflow_limit = 100 + 3 * possible_overflow_index<T>();
m_capacity = static_cast<size_type>((std::min)(overflow_limit, static_cast<std::size_t>(100000u)));
m_data = this->allocate(m_capacity);
}
~fixed_vector()
{
for(unsigned i = 0; i < m_used; ++i)
this->destroy(&m_data[i]);
this->deallocate(m_data, m_capacity);
}
T& operator[](unsigned n) { BOOST_ASSERT(n < m_used); return m_data[n]; }
const T& operator[](unsigned n)const { BOOST_ASSERT(n < m_used); return m_data[n]; }
unsigned size()const { return m_used; }
unsigned size() { return m_used; }
void resize(unsigned n, const T& val)
{
if(n > m_capacity)
throw std::runtime_error("Exhausted storage for Bernoulli numbers.");
for(unsigned i = m_used; i < n; ++i)
new (m_data + i) T(val);
m_used = n;
}
void resize(unsigned n) { resize(n, T()); }
T* begin() { return m_data; }
T* end() { return m_data + m_used; }
T* begin()const { return m_data; }
T* end()const { return m_data + m_used; }
private:
T* m_data;
unsigned m_used, m_capacity;
};
template <class T, class Policy>
class bernoulli_numbers_cache
{
public:
bernoulli_numbers_cache() : m_overflow_limit((std::numeric_limits<std::size_t>::max)())
typedef boost::math::tools::fixed_capacity_heap_vector<T> container_type;
bernoulli_numbers_cache() : bn (static_cast<container_type::size_type>(100U + (2U * possible_overflow_index<T>()))),
tn (static_cast<container_type::size_type>(100U + (2U * possible_overflow_index<T>()))),
m_overflow_limit((std::numeric_limits<std::size_t>::max)()),
m_intermediates ()
#if defined(BOOST_HAS_THREADS) && !defined(BOOST_MATH_NO_ATOMIC_INT)
, m_counter(0)
#endif
{}
typedef fixed_vector<T> container_type;
void tangent(std::size_t m)
{
static const std::size_t min_overflow_index = possible_overflow_index<T>();
@@ -310,32 +286,34 @@ public:
for(std::size_t i = std::max<size_t>(2, prev_size); i < m; i++)
{
bool overflow_check = false;
if(i >= min_overflow_index && (boost::math::tools::max_value<T>() / (i-1) < m_intermediates[1]) )
{
std::fill(tn.begin() + i, tn.end(), boost::math::tools::max_value<T>());
break;
}
m_intermediates[1] = m_intermediates[1] * (i-1);
for(std::size_t j = 2; j <= i; j++)
for(std::size_t j = 2U; j <= i; ++j)
{
overflow_check =
(i >= min_overflow_index) && (
(boost::math::tools::max_value<T>() / (i - j) < m_intermediates[j])
|| (boost::math::tools::max_value<T>() / (i - j + 2) < m_intermediates[j-1])
|| (boost::math::tools::max_value<T>() - m_intermediates[j] * (i - j) < m_intermediates[j-1] * (i - j + 2))
|| ((boost::math::isinf)(m_intermediates[j]))
);
overflow_check = ( (i >= min_overflow_index)
&& (boost::math::tools::max_value<T>() / (i - j) < m_intermediates[j]));
if(overflow_check)
{
std::fill(tn.begin() + i, tn.end(), boost::math::tools::max_value<T>());
break;
}
m_intermediates[j] = m_intermediates[j] * (i - j) + m_intermediates[j-1] * (i - j + 2);
m_intermediates[j] = (m_intermediates[j] * (i - j)) + (m_intermediates[j - 1] * (i - j + 2));
}
if(overflow_check)
break; // already filled the tn...
tn[static_cast<container_type::size_type>(i)] = m_intermediates[i];
BOOST_MATH_INSTRUMENT_VARIABLE(i);
BOOST_MATH_INSTRUMENT_VARIABLE(tn[i]);
}
@@ -367,16 +345,19 @@ public:
// the calculation, but we also need to avoid overflow altogether in case
// we're calculating with a type where "bad things" happen in that case:
//
b = b / (power_two * tangent_scale_factor<T>());
b = b / (power_two * tangent_scale_factor<T>());
b /= (power_two - 1);
bool overflow_check = (i >= min_overflow_index) && (tools::max_value<T>() / tn[static_cast<container_type::size_type>(i)] < b);
const bool overflow_check = ( (i >= min_overflow_index)
&& (tools::max_value<T>() / tn[static_cast<container_type::size_type>(i)] < b));
if(overflow_check)
{
m_overflow_limit = i;
while(i < m)
{
b = std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : tools::max_value<T>();
bn[static_cast<container_type::size_type>(i)] = ((i % 2) ? b : -b);
bn[static_cast<container_type::size_type>(i)] = ((i % 2U) ? b : -b);
++i;
}
break;
@@ -401,13 +382,13 @@ public:
// There are basically 3 thread safety options:
//
// 1) There are no threads (BOOST_HAS_THREADS is not defined).
// 2) There are threads, but we do not have a true atomic integer type,
// in this case we just use a mutex to guard against race conditions.
// 2) There are threads, but we do not have a true atomic integer type,
// in this case we just use a mutex to guard against race conditions.
// 3) There are threads, and we have an atomic integer: in this case we can
// use the double-checked locking pattern to avoid thread synchronisation
// when accessing values already in the cache.
// use the double-checked locking pattern to avoid thread synchronisation
// when accessing values already in the cache.
//
#if !defined(BOOST_HAS_THREADS)
#if !defined(BOOST_HAS_THREADS)
//
// Single threaded code, very simple:
//
@@ -422,7 +403,7 @@ public:
*out = (i >= m_overflow_limit) ? policies::raise_overflow_error<T>("boost::math::bernoulli<%1%>(std::size_t)", 0, pol) : bn[i];
++out;
}
#elif defined(BOOST_MATH_NO_ATOMIC_INT)
#elif defined(BOOST_MATH_NO_ATOMIC_INT)
//
// We need to grab a mutex every time we get here, for both readers and writers:
//
@@ -439,7 +420,7 @@ public:
++out;
}
#else
#else
//
// Double-checked locking pattern, lets us access cached already cached values
// without locking:
@@ -467,7 +448,7 @@ public:
++out;
}
#endif
#endif
return out;
}
@@ -478,13 +459,13 @@ public:
// There are basically 3 thread safety options:
//
// 1) There are no threads (BOOST_HAS_THREADS is not defined).
// 2) There are threads, but we do not have a true atomic integer type,
// in this case we just use a mutex to guard against race conditions.
// 2) There are threads, but we do not have a true atomic integer type,
// in this case we just use a mutex to guard against race conditions.
// 3) There are threads, and we have an atomic integer: in this case we can
// use the double-checked locking pattern to avoid thread synchronisation
// when accessing values already in the cache.
// use the double-checked locking pattern to avoid thread synchronisation
// when accessing values already in the cache.
//
#if !defined(BOOST_HAS_THREADS)
#if !defined(BOOST_HAS_THREADS)
//
// Single threaded code, very simple:
//
@@ -507,7 +488,7 @@ public:
}
++out;
}
#elif defined(BOOST_MATH_NO_ATOMIC_INT)
#elif defined(BOOST_MATH_NO_ATOMIC_INT)
//
// We need to grab a mutex every time we get here, for both readers and writers:
//
@@ -532,7 +513,7 @@ public:
++out;
}
#else
#else
//
// Double-checked locking pattern, lets us access cached already cached values
// without locking:
@@ -568,7 +549,7 @@ public:
++out;
}
#endif
#endif
return out;
}
@@ -578,10 +559,14 @@ private:
// these must NEVER EVER reallocate as it breaks our thread
// safety guarentees:
//
fixed_vector<T> bn, tn;
container_type bn;
container_type tn;
std::vector<T> m_intermediates;
// The value at which we know overflow has already occured for the Bn:
std::size_t m_overflow_limit;
#if !defined(BOOST_HAS_THREADS)
#elif defined(BOOST_MATH_NO_ATOMIC_INT)
boost::detail::lightweight_mutex m_mutex;
@@ -605,4 +590,4 @@ inline bernoulli_numbers_cache<T, Policy>& get_bernoulli_numbers_cache()
}}}
#endif // BOOST_MATH_BERNOULLI_DETAIL_HPP
#endif // BOOST_MATH_BERNOULLI_DETAILS_HPP

View File

@@ -115,52 +115,25 @@ template<class T>
std::size_t highest_bernoulli_index()
{
// Find the high index n for Bn to produce the desired precision in Stirling's calculation.
return static_cast<std::size_t>(18.0F + (0.65F * static_cast<float>(std::numeric_limits<T>::digits10)));
return static_cast<std::size_t>(18.0F + (0.6F * static_cast<float>(std::numeric_limits<T>::digits10)));
}
template <class T, class Policy>
T gamma_imp_bernoulli(T x, const Policy& pol)
{
T result = 1;
BOOST_MATH_STD_USING
//the following handling of negative arguments modelled after the handling in function
//T gamma_imp(T z, const Policy& pol, const Lanczos& l)
if(x <= 0)
const bool b_neg = (x < 0);
if(b_neg)
{
static const char* function = "boost::math::tgamma<%1%>(%1%)";
if(floor(x) == x)
return policies::raise_pole_error<T>(function, "Evaluation of tgamma at a negative integer %1%.", x, pol);
if(x <= -20)
{
result = gamma_imp_bernoulli(T(-x), pol) * sinpx(x);
BOOST_MATH_INSTRUMENT_VARIABLE(result);
if((fabs(result) < 1) && (tools::max_value<T>() * fabs(result) < boost::math::constants::pi<T>()))
return policies::raise_overflow_error<T>(function, "Result of tgamma is too large to represent.", pol);
result = -boost::math::constants::pi<T>() / result;
if(result == 0)
return policies::raise_underflow_error<T>(function, "Result of tgamma is too small to represent.", pol);
if((boost::math::fpclassify)(result) == (int)FP_SUBNORMAL)
return policies::raise_denorm_error<T>(function, "Result of tgamma is denormalized.", result, pol);
BOOST_MATH_INSTRUMENT_VARIABLE(result);
return result;
}
while(x < 0)
{
result /= x;
x += 1;
}
// TBD: Restore error handling.
}
// Make a local, unsigned copy of the input argument.
T xx(x);
T original_x(xx);
T xx((!b_neg) ? x : -x);
// Scale the argument up and use downward recursion later for the final result.
const T min_arg_for_recursion(static_cast<float>(std::numeric_limits<T>::digits10) * 1.7F);
int n_recur;
@@ -168,23 +141,20 @@ T gamma_imp_bernoulli(T x, const Policy& pol)
if(xx < min_arg_for_recursion)
{
n_recur = boost::math::ltrunc(min_arg_for_recursion - xx) + 1;
xx += n_recur;
}
else
{
n_recur = 0;
}
if(n_recur != 0)
{
xx += n_recur;
}
const std::size_t number_of_bernoullis_b2n = highest_bernoulli_index<T>();
std::vector<T> bernoulli_table(number_of_bernoullis_b2n);
boost::math::bernoulli_b2n<T>(0,
static_cast<unsigned>(number_of_bernoullis_b2n),
static_cast<unsigned>(bernoulli_table.size()),
bernoulli_table.begin());
T one_over_x_pow_two_n_minus_one = 1 / xx;
@@ -210,22 +180,15 @@ T gamma_imp_bernoulli(T x, const Policy& pol)
T gamma_value = exp(log_gamma_value);
// Rescale the result using downward recursion if necessary.
if(n_recur != 0)
// Rescale the result using downward recursion if necessary.
for(int k = 0; k < n_recur; ++k)
{
// We need to divide by every x+k in the range [x, x+n_recur), we save
// division by x till last, as we may have x < 1 which could cause
// spurious overflow if we divided by that first.
for(int k = 1; k < n_recur; k++)
{
gamma_value /= (original_x + k);
}
gamma_value /= original_x;
xx -= 1;
gamma_value /= xx;
}
// Return the result, accounting for possible negative arguments.
return result * gamma_value;
return ((!b_neg) ? gamma_value : -boost::math::constants::pi<T>() / (xx * gamma_value * sin(boost::math::constants::pi<T>() * xx)));
}
template<class T, class Policy>