diff --git a/include/boost/math/special_functions/detail/bernoulli_details.hpp b/include/boost/math/special_functions/detail/bernoulli_details.hpp index fa50dd7c8..9c6c61646 100644 --- a/include/boost/math/special_functions/detail/bernoulli_details.hpp +++ b/include/boost/math/special_functions/detail/bernoulli_details.hpp @@ -1,20 +1,21 @@ /////////////////////////////////////////////////////////////////////////////// -// Copyright 2013 John Maddock -// Distributed under the Boost -// Software License, Version 1.0. (See accompanying file -// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// Copyright 2013 - 2014 John Maddock +// Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef BOOST_MATH_BERNOULLI_DETAIL_HPP -#define BOOST_MATH_BERNOULLI_DETAIL_HPP +#ifndef BOOST_MATH_BERNOULLI_DETAILS_HPP +#define BOOST_MATH_BERNOULLI_DETAILS_HPP #include #include +#include #ifdef BOOST_HAS_THREADS #ifndef BOOST_NO_CXX11_HDR_ATOMIC -# include -# define BOOST_MATH_ATOMIC_NS std +# include +# define BOOST_MATH_ATOMIC_NS std #if ATOMIC_INT_LOCK_FREE == 2 typedef std::atomic atomic_counter_type; #elif ATOMIC_SHORT_LOCK_FREE == 2 @@ -24,7 +25,7 @@ typedef std::atomic atomic_counter_type; #elif ATOMIC_LLONG_LOCK_FREE == 2 typedef std::atomic atomic_counter_type; #else -# define BOOST_MATH_NO_ATOMIC_INT +# define BOOST_MATH_NO_ATOMIC_INT #endif #else // BOOST_NO_CXX11_HDR_ATOMIC @@ -34,7 +35,7 @@ typedef std::atomic atomic_counter_type; // #define BOOST_ATOMIC_NO_LIB #include -# define BOOST_MATH_ATOMIC_NS boost +# define BOOST_MATH_ATOMIC_NS boost namespace boost{ namespace math{ namespace detail{ @@ -50,12 +51,12 @@ typedef boost::atomic atomic_counter_type; #elif BOOST_ATOMIC_LLONG_LOCK_FREE == 2 typedef boost::atomic atomic_counter_type; #else -# define BOOST_MATH_NO_ATOMIC_INT +# define BOOST_MATH_NO_ATOMIC_INT #endif }}} // namespaces -#endif // BOOST_NO_CXX11_HDR_ATOMIC +#endif // BOOST_NO_CXX11_HDR_ATOMIC #endif // BOOST_HAS_THREADS @@ -71,7 +72,7 @@ namespace boost{ namespace math{ namespace detail{ template struct bernoulli_overflow_variant { - static const unsigned value = + static const unsigned value = (std::numeric_limits::max_exponent == 128) && (std::numeric_limits::radix == 2) ? 1 : ( @@ -85,98 +86,120 @@ struct bernoulli_overflow_variant }; template -inline bool bernouli_impl_index_does_overflow(std::size_t, const mpl::int_<0>&) +T bernouli_impl_index_might_overflow_function(const T& nx) { - return false; // We have no idea, perform a runtime check -} - -// There are certain cases for which the index n, when trying -// to compute Bn, is known to overflow. In particular, when -// using 32-bit float and 64-bit double (IEEE 754 conformant), -// overflow will occur if the index exceeds the amount allowed -// in the tables of Bn. -template -inline bool bernouli_impl_index_does_overflow(std::size_t n, const mpl::int_<1>&) -{ - // This corresponds to 4-byte float, IEEE 745 conformant. - return n >= max_bernoulli_index<1>::value * 2; -} -template -inline bool bernouli_impl_index_does_overflow(std::size_t n, const mpl::int_<2>&) -{ - // This corresponds to 8-byte float, IEEE 745 conformant. - return n >= max_bernoulli_index<2>::value * 2; -} -template -inline bool bernouli_impl_index_does_overflow(std::size_t n, const mpl::int_<3>&) -{ - // This corresponds to 16-byte float, IEEE 745 conformant. - return n >= max_bernoulli_index<3>::value * 2; -} - -template -inline bool bernouli_impl_index_does_overflow(std::size_t n) -{ - typedef mpl::int_::value> tag_type; - return bernouli_impl_index_does_overflow(n, tag_type()); -} - -template -bool bernouli_impl_index_might_overflow(std::size_t n) -{ - if(bernouli_impl_index_does_overflow(n)) - { - // If the index *does* overflow, then it also *might* overflow. - return true; - } - BOOST_MATH_STD_USING // Luschny LogB3(n) formula. - const T nx (n); const T nx2(nx * nx); const T t_one(1); const T approximate_log_of_bernoulli_bn - = ((boost::math::constants::half() + nx) * log(nx)) + = ((boost::math::constants::half() + nx) * log(nx)) + ((boost::math::constants::half() - nx) * log(boost::math::constants::pi())) + (((T(3) / 2) - nx) * boost::math::constants::ln_two()) + ((nx * (T(2) - (nx2 * 7) * (t_one + ((nx2 * 30) * ((nx2 * 12) - t_one))))) / (((nx2 * nx2) * nx2) * 2520)); - return ((approximate_log_of_bernoulli_bn * 1.1F) > boost::math::tools::log_max_value()); + return (approximate_log_of_bernoulli_bn - boost::math::tools::log_max_value()); +} + +template +T bernouli_impl_index_might_overflow_tolerance(const T& min_value, const T& max_value) +{ + BOOST_MATH_STD_USING + + return abs(max_value - min_value) < T(0.9F); +} + +template +inline std::size_t bernouli_impl_index_might_overflow(const mpl::int_<0>&) +{ + // We have no idea, perform a runtime check + + BOOST_MATH_STD_USING + + // Find the index of B2n that will overflow the numeric type. + // Use Luschny's order-3 approximation of the logarithm of + // Bernoulli numbers in combination with a root-finding tool + // from boost::math::tools for this. + + boost::uintmax_t number_of_iterations = 128U; + + const std::pair index_might_overflow_root = + boost::math::tools::toms748_solve(bernouli_impl_index_might_overflow_function, + T(20), + T((std::numeric_limits::max)()), + bernouli_impl_index_might_overflow_tolerance, + number_of_iterations); + + // Compute the index that might overflow by averaging the bracketed + // values in the root pair. + const T index_might_overflow_value = (index_might_overflow_root.first + index_might_overflow_root.second) / 2; + + // Determine a conservative estimate of the size_t integral value + // of the index that might. Do this by evaluating the binary exponent + // of the index that might overflow expressed as a floating-point number. + int p2; + frexp(index_might_overflow_value, &p2); + + // Armed with the binary exponent, we left shift by an amount + // one less than this binary exponent such that the value of + // the index that might overflow is guessed conservatively. + + const std::size_t index_might_overflow_conservative_value = static_cast(static_cast(1U) << (p2 - 1)); + + // Example: A floating-point numeric type with a 32-bit signed + // exponent overflows at an index of approximately 11,513,000. + // The result of the conservative value for the overflow index + // is approximately 8,389,000. + + return index_might_overflow_conservative_value; +} + +// There are certain cases for which the index n, when trying +// to compute Bn, is known to overflow. In particular, when +// using IEEE 754 conformant 32-bit float, 64-bit double, +// or 128-bit quad, overflow will occur if the index exceeds +// the amount allowed in the tables of Bn. +template +inline std::size_t bernouli_impl_index_might_overflow(const mpl::int_<1>&) +{ + // This corresponds to 4-byte float, IEEE 745 conformant. + return max_bernoulli_index<1>::value; +} +template +inline std::size_t bernouli_impl_index_might_overflow(const mpl::int_<2>&) +{ + // This corresponds to 8-byte float, IEEE 745 conformant. + return max_bernoulli_index<2>::value; +} +template +inline std::size_t bernouli_impl_index_might_overflow(const mpl::int_<3>&) +{ + // This corresponds to 16-byte float, IEEE 745 conformant. + return max_bernoulli_index<3>::value; +} + +template +inline std::size_t bernouli_impl_index_might_overflow() +{ + typedef mpl::int_::value> tag_type; + + return bernouli_impl_index_might_overflow(tag_type()); } template std::size_t possible_overflow_index() { - // We use binary search to determine a good approximation for an index that might overflow. + // We use binary search to determine a good approximation for + // an index that might overflow. // This code is called ONCE ONLY for each T at program startup. - std::size_t upper_limit = (std::min)(static_cast(boost::math::itrunc(tools::log_max_value() / (2 * constants::ln_two()) - 2)), static_cast(10000u)); - std::size_t lower_limit = 1; + const std::size_t the_possible_overflow_index = bernouli_impl_index_might_overflow(); - if(bernouli_impl_index_might_overflow(upper_limit * 2) == 0) - { - return upper_limit; - } - - while(upper_limit > (lower_limit + 4)) - { - const int mid = (upper_limit + lower_limit) / 2; - - if(bernouli_impl_index_might_overflow(mid * 2) == 0) - { - lower_limit = mid; - } - else - { - upper_limit = mid; - } - } - - return lower_limit; + return the_possible_overflow_index; } // // The tangent numbers grow larger much more rapidly than the Bernoulli numbers do.... @@ -226,68 +249,21 @@ struct bernoulli_initializer template const typename bernoulli_initializer::init bernoulli_initializer::initializer; -// -// We need something to act as a cache for our calculated Bernoulli numbers. In order to -// ensure both fast access and thread safety, we need a stable table which may be extended -// in size, but which never reallocates: that way values already calculated may be accessed -// concurrently with another thread extending the table with new values. -// -// Very very simple vector class that will never allocate more than once, we could use -// boost::container::static_vector here, but that allocates on the stack, which may well -// cause issues for the amount of memory we want in the extreme case... -// -template -struct fixed_vector : private std::allocator -{ - typedef unsigned size_type; - typedef T* iterator; - typedef const T* const_iterator; - fixed_vector() : m_used(0) - { - std::size_t overflow_limit = 100 + 3 * possible_overflow_index(); - m_capacity = static_cast((std::min)(overflow_limit, static_cast(100000u))); - m_data = this->allocate(m_capacity); - } - ~fixed_vector() - { - for(unsigned i = 0; i < m_used; ++i) - this->destroy(&m_data[i]); - this->deallocate(m_data, m_capacity); - } - T& operator[](unsigned n) { BOOST_ASSERT(n < m_used); return m_data[n]; } - const T& operator[](unsigned n)const { BOOST_ASSERT(n < m_used); return m_data[n]; } - unsigned size()const { return m_used; } - unsigned size() { return m_used; } - void resize(unsigned n, const T& val) - { - if(n > m_capacity) - throw std::runtime_error("Exhausted storage for Bernoulli numbers."); - for(unsigned i = m_used; i < n; ++i) - new (m_data + i) T(val); - m_used = n; - } - void resize(unsigned n) { resize(n, T()); } - T* begin() { return m_data; } - T* end() { return m_data + m_used; } - T* begin()const { return m_data; } - T* end()const { return m_data + m_used; } -private: - T* m_data; - unsigned m_used, m_capacity; -}; - template class bernoulli_numbers_cache { public: - bernoulli_numbers_cache() : m_overflow_limit((std::numeric_limits::max)()) + typedef boost::math::tools::fixed_capacity_heap_vector container_type; + + bernoulli_numbers_cache() : bn (static_cast(100U + (2U * possible_overflow_index()))), + tn (static_cast(100U + (2U * possible_overflow_index()))), + m_overflow_limit((std::numeric_limits::max)()), + m_intermediates () #if defined(BOOST_HAS_THREADS) && !defined(BOOST_MATH_NO_ATOMIC_INT) , m_counter(0) #endif {} - typedef fixed_vector container_type; - void tangent(std::size_t m) { static const std::size_t min_overflow_index = possible_overflow_index(); @@ -310,32 +286,34 @@ public: for(std::size_t i = std::max(2, prev_size); i < m; i++) { bool overflow_check = false; + if(i >= min_overflow_index && (boost::math::tools::max_value() / (i-1) < m_intermediates[1]) ) { std::fill(tn.begin() + i, tn.end(), boost::math::tools::max_value()); break; } + m_intermediates[1] = m_intermediates[1] * (i-1); - for(std::size_t j = 2; j <= i; j++) + + for(std::size_t j = 2U; j <= i; ++j) { - overflow_check = - (i >= min_overflow_index) && ( - (boost::math::tools::max_value() / (i - j) < m_intermediates[j]) - || (boost::math::tools::max_value() / (i - j + 2) < m_intermediates[j-1]) - || (boost::math::tools::max_value() - m_intermediates[j] * (i - j) < m_intermediates[j-1] * (i - j + 2)) - || ((boost::math::isinf)(m_intermediates[j])) - ); + overflow_check = ( (i >= min_overflow_index) + && (boost::math::tools::max_value() / (i - j) < m_intermediates[j])); if(overflow_check) { std::fill(tn.begin() + i, tn.end(), boost::math::tools::max_value()); break; } - m_intermediates[j] = m_intermediates[j] * (i - j) + m_intermediates[j-1] * (i - j + 2); + + m_intermediates[j] = (m_intermediates[j] * (i - j)) + (m_intermediates[j - 1] * (i - j + 2)); } + if(overflow_check) break; // already filled the tn... + tn[static_cast(i)] = m_intermediates[i]; + BOOST_MATH_INSTRUMENT_VARIABLE(i); BOOST_MATH_INSTRUMENT_VARIABLE(tn[i]); } @@ -367,16 +345,19 @@ public: // the calculation, but we also need to avoid overflow altogether in case // we're calculating with a type where "bad things" happen in that case: // - b = b / (power_two * tangent_scale_factor()); + b = b / (power_two * tangent_scale_factor()); b /= (power_two - 1); - bool overflow_check = (i >= min_overflow_index) && (tools::max_value() / tn[static_cast(i)] < b); + + const bool overflow_check = ( (i >= min_overflow_index) + && (tools::max_value() / tn[static_cast(i)] < b)); + if(overflow_check) { m_overflow_limit = i; while(i < m) { b = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : tools::max_value(); - bn[static_cast(i)] = ((i % 2) ? b : -b); + bn[static_cast(i)] = ((i % 2U) ? b : -b); ++i; } break; @@ -401,13 +382,13 @@ public: // There are basically 3 thread safety options: // // 1) There are no threads (BOOST_HAS_THREADS is not defined). - // 2) There are threads, but we do not have a true atomic integer type, - // in this case we just use a mutex to guard against race conditions. + // 2) There are threads, but we do not have a true atomic integer type, + // in this case we just use a mutex to guard against race conditions. // 3) There are threads, and we have an atomic integer: in this case we can - // use the double-checked locking pattern to avoid thread synchronisation - // when accessing values already in the cache. + // use the double-checked locking pattern to avoid thread synchronisation + // when accessing values already in the cache. // - #if !defined(BOOST_HAS_THREADS) +#if !defined(BOOST_HAS_THREADS) // // Single threaded code, very simple: // @@ -422,7 +403,7 @@ public: *out = (i >= m_overflow_limit) ? policies::raise_overflow_error("boost::math::bernoulli<%1%>(std::size_t)", 0, pol) : bn[i]; ++out; } - #elif defined(BOOST_MATH_NO_ATOMIC_INT) +#elif defined(BOOST_MATH_NO_ATOMIC_INT) // // We need to grab a mutex every time we get here, for both readers and writers: // @@ -439,7 +420,7 @@ public: ++out; } - #else +#else // // Double-checked locking pattern, lets us access cached already cached values // without locking: @@ -467,7 +448,7 @@ public: ++out; } - #endif +#endif return out; } @@ -478,13 +459,13 @@ public: // There are basically 3 thread safety options: // // 1) There are no threads (BOOST_HAS_THREADS is not defined). - // 2) There are threads, but we do not have a true atomic integer type, - // in this case we just use a mutex to guard against race conditions. + // 2) There are threads, but we do not have a true atomic integer type, + // in this case we just use a mutex to guard against race conditions. // 3) There are threads, and we have an atomic integer: in this case we can - // use the double-checked locking pattern to avoid thread synchronisation - // when accessing values already in the cache. + // use the double-checked locking pattern to avoid thread synchronisation + // when accessing values already in the cache. // - #if !defined(BOOST_HAS_THREADS) +#if !defined(BOOST_HAS_THREADS) // // Single threaded code, very simple: // @@ -507,7 +488,7 @@ public: } ++out; } - #elif defined(BOOST_MATH_NO_ATOMIC_INT) +#elif defined(BOOST_MATH_NO_ATOMIC_INT) // // We need to grab a mutex every time we get here, for both readers and writers: // @@ -532,7 +513,7 @@ public: ++out; } - #else +#else // // Double-checked locking pattern, lets us access cached already cached values // without locking: @@ -568,7 +549,7 @@ public: ++out; } - #endif +#endif return out; } @@ -578,10 +559,14 @@ private: // these must NEVER EVER reallocate as it breaks our thread // safety guarentees: // - fixed_vector bn, tn; + container_type bn; + container_type tn; + std::vector m_intermediates; + // The value at which we know overflow has already occured for the Bn: std::size_t m_overflow_limit; + #if !defined(BOOST_HAS_THREADS) #elif defined(BOOST_MATH_NO_ATOMIC_INT) boost::detail::lightweight_mutex m_mutex; @@ -605,4 +590,4 @@ inline bernoulli_numbers_cache& get_bernoulli_numbers_cache() }}} -#endif // BOOST_MATH_BERNOULLI_DETAIL_HPP +#endif // BOOST_MATH_BERNOULLI_DETAILS_HPP diff --git a/include/boost/math/special_functions/gamma.hpp b/include/boost/math/special_functions/gamma.hpp index 8453acb41..062d53b04 100644 --- a/include/boost/math/special_functions/gamma.hpp +++ b/include/boost/math/special_functions/gamma.hpp @@ -115,52 +115,25 @@ template std::size_t highest_bernoulli_index() { // Find the high index n for Bn to produce the desired precision in Stirling's calculation. - return static_cast(18.0F + (0.65F * static_cast(std::numeric_limits::digits10))); + return static_cast(18.0F + (0.6F * static_cast(std::numeric_limits::digits10))); } template T gamma_imp_bernoulli(T x, const Policy& pol) { - T result = 1; + BOOST_MATH_STD_USING - //the following handling of negative arguments modelled after the handling in function - //T gamma_imp(T z, const Policy& pol, const Lanczos& l) - if(x <= 0) + const bool b_neg = (x < 0); + + if(b_neg) { - static const char* function = "boost::math::tgamma<%1%>(%1%)"; - if(floor(x) == x) - return policies::raise_pole_error(function, "Evaluation of tgamma at a negative integer %1%.", x, pol); - - if(x <= -20) - { - result = gamma_imp_bernoulli(T(-x), pol) * sinpx(x); - BOOST_MATH_INSTRUMENT_VARIABLE(result); - if((fabs(result) < 1) && (tools::max_value() * fabs(result) < boost::math::constants::pi())) - return policies::raise_overflow_error(function, "Result of tgamma is too large to represent.", pol); - result = -boost::math::constants::pi() / result; - if(result == 0) - return policies::raise_underflow_error(function, "Result of tgamma is too small to represent.", pol); - if((boost::math::fpclassify)(result) == (int)FP_SUBNORMAL) - return policies::raise_denorm_error(function, "Result of tgamma is denormalized.", result, pol); - BOOST_MATH_INSTRUMENT_VARIABLE(result); - return result; - } - - while(x < 0) - { - result /= x; - - x += 1; - } + // TBD: Restore error handling. } // Make a local, unsigned copy of the input argument. - - T xx(x); - T original_x(xx); + T xx((!b_neg) ? x : -x); // Scale the argument up and use downward recursion later for the final result. - const T min_arg_for_recursion(static_cast(std::numeric_limits::digits10) * 1.7F); int n_recur; @@ -168,23 +141,20 @@ T gamma_imp_bernoulli(T x, const Policy& pol) if(xx < min_arg_for_recursion) { n_recur = boost::math::ltrunc(min_arg_for_recursion - xx) + 1; + + xx += n_recur; } else { n_recur = 0; } - if(n_recur != 0) - { - xx += n_recur; - } - const std::size_t number_of_bernoullis_b2n = highest_bernoulli_index(); std::vector bernoulli_table(number_of_bernoullis_b2n); boost::math::bernoulli_b2n(0, - static_cast(number_of_bernoullis_b2n), + static_cast(bernoulli_table.size()), bernoulli_table.begin()); T one_over_x_pow_two_n_minus_one = 1 / xx; @@ -210,22 +180,15 @@ T gamma_imp_bernoulli(T x, const Policy& pol) T gamma_value = exp(log_gamma_value); // Rescale the result using downward recursion if necessary. - if(n_recur != 0) + // Rescale the result using downward recursion if necessary. + for(int k = 0; k < n_recur; ++k) { - // We need to divide by every x+k in the range [x, x+n_recur), we save - // division by x till last, as we may have x < 1 which could cause - // spurious overflow if we divided by that first. - for(int k = 1; k < n_recur; k++) - { - gamma_value /= (original_x + k); - } - - gamma_value /= original_x; + xx -= 1; + gamma_value /= xx; } // Return the result, accounting for possible negative arguments. - - return result * gamma_value; + return ((!b_neg) ? gamma_value : -boost::math::constants::pi() / (xx * gamma_value * sin(boost::math::constants::pi() * xx))); } template