Continued work on tgamma() for multiprecision.

2026-01-28 19:32:08 +00:00 · 2014-01-02 00:08:21 +01:00
parent 825c2b97fd
commit 03e4dbc8c6
2 changed files with 166 additions and 218 deletions
--- a/include/boost/math/special_functions/detail/bernoulli_details.hpp
+++ b/include/boost/math/special_functions/detail/bernoulli_details.hpp
@@ -1,20 +1,21 @@
 ///////////////////////////////////////////////////////////////////////////////
-//  Copyright 2013 John Maddock
-//  Distributed under the Boost
-//  Software License, Version 1.0. (See accompanying file
-//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+// Copyright 2013 - 2014 John Maddock
+// Distributed under the Boost
+// Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

-#ifndef BOOST_MATH_BERNOULLI_DETAIL_HPP
-#define BOOST_MATH_BERNOULLI_DETAIL_HPP
+#ifndef BOOST_MATH_BERNOULLI_DETAILS_HPP
+#define BOOST_MATH_BERNOULLI_DETAILS_HPP

 #include <boost/config.hpp>
 #include <boost/detail/lightweight_mutex.hpp>
+#include <boost/math/tools/fixed_capacity_heap_vector.hpp>

 #ifdef BOOST_HAS_THREADS

 #ifndef BOOST_NO_CXX11_HDR_ATOMIC
-#  include <atomic>
-#  define BOOST_MATH_ATOMIC_NS std
+# include <atomic>
+# define BOOST_MATH_ATOMIC_NS std
 #if ATOMIC_INT_LOCK_FREE == 2
 typedef std::atomic<int> atomic_counter_type;
 #elif ATOMIC_SHORT_LOCK_FREE == 2
@@ -24,7 +25,7 @@ typedef std::atomic<long> atomic_counter_type;
 #elif ATOMIC_LLONG_LOCK_FREE == 2
 typedef std::atomic<long long> atomic_counter_type;
 #else
-#  define BOOST_MATH_NO_ATOMIC_INT
+# define BOOST_MATH_NO_ATOMIC_INT
 #endif

 #else // BOOST_NO_CXX11_HDR_ATOMIC
@@ -34,7 +35,7 @@ typedef std::atomic<long long> atomic_counter_type;
 //
 #define BOOST_ATOMIC_NO_LIB
 #include <boost/atomic.hpp>
-#  define BOOST_MATH_ATOMIC_NS boost
+# define BOOST_MATH_ATOMIC_NS boost

 namespace boost{ namespace math{ namespace detail{

@@ -50,12 +51,12 @@ typedef boost::atomic<long> atomic_counter_type;
 #elif BOOST_ATOMIC_LLONG_LOCK_FREE == 2
 typedef boost::atomic<long long> atomic_counter_type;
 #else
-#  define BOOST_MATH_NO_ATOMIC_INT
+# define BOOST_MATH_NO_ATOMIC_INT
 #endif

 }}} // namespaces

-#endif  // BOOST_NO_CXX11_HDR_ATOMIC
+#endif // BOOST_NO_CXX11_HDR_ATOMIC

 #endif // BOOST_HAS_THREADS

@@ -71,7 +72,7 @@ namespace boost{ namespace math{ namespace detail{
 template <class T>
 struct bernoulli_overflow_variant
 {
-   static const unsigned value = 
+   static const unsigned value =
      (std::numeric_limits<T>::max_exponent == 128)
      && (std::numeric_limits<T>::radix == 2) ? 1 :
      (
@@ -85,98 +86,120 @@ struct bernoulli_overflow_variant
 };

 template<class T>
-inline bool bernouli_impl_index_does_overflow(std::size_t, const mpl::int_<0>&)
+T bernouli_impl_index_might_overflow_function(const T& nx)
 {
-   return false;  // We have no idea, perform a runtime check
-}
-
-// There are certain cases for which the index n, when trying
-// to compute Bn, is known to overflow. In particular, when
-// using 32-bit float and 64-bit double (IEEE 754 conformant),
-// overflow will occur if the index exceeds the amount allowed
-// in the tables of Bn.
-template<class T>
-inline bool bernouli_impl_index_does_overflow(std::size_t n, const mpl::int_<1>&)
-{
-   // This corresponds to 4-byte float, IEEE 745 conformant.
-   return n >= max_bernoulli_index<1>::value * 2;
-}
-template<class T>
-inline bool bernouli_impl_index_does_overflow(std::size_t n, const mpl::int_<2>&)
-{
-   // This corresponds to 8-byte float, IEEE 745 conformant.
-   return n >= max_bernoulli_index<2>::value * 2;
-}
-template<class T>
-inline bool bernouli_impl_index_does_overflow(std::size_t n, const mpl::int_<3>&)
-{
-   // This corresponds to 16-byte float, IEEE 745 conformant.
-   return n >= max_bernoulli_index<3>::value * 2;
-}
-
-template<class T>
-inline bool bernouli_impl_index_does_overflow(std::size_t n)
-{
-   typedef mpl::int_<bernoulli_overflow_variant<T>::value> tag_type;
-   return bernouli_impl_index_does_overflow<T>(n, tag_type());
-}
-
-template<class T>
-bool bernouli_impl_index_might_overflow(std::size_t n)
-{
-   if(bernouli_impl_index_does_overflow<T>(n))
-   {
-      // If the index *does* overflow, then it also *might* overflow.
-      return true;
-   }
-
   BOOST_MATH_STD_USING

   // Luschny LogB3(n) formula.

-   const T nx (n);
   const T nx2(nx * nx);

   const T t_one(1);

   const T approximate_log_of_bernoulli_bn
-      =   ((boost::math::constants::half<T>() + nx) * log(nx))
+      = ((boost::math::constants::half<T>() + nx) * log(nx))
        + ((boost::math::constants::half<T>() - nx) * log(boost::math::constants::pi<T>()))
        + (((T(3) / 2) - nx) * boost::math::constants::ln_two<T>())
        + ((nx * (T(2) - (nx2 * 7) * (t_one + ((nx2 * 30) * ((nx2 * 12) - t_one))))) / (((nx2 * nx2) * nx2) * 2520));

-   return ((approximate_log_of_bernoulli_bn * 1.1F) > boost::math::tools::log_max_value<T>());
+   return (approximate_log_of_bernoulli_bn - boost::math::tools::log_max_value<T>());
+}
+
+template<class T>
+T bernouli_impl_index_might_overflow_tolerance(const T& min_value, const T& max_value)
+{
+   BOOST_MATH_STD_USING
+
+   return abs(max_value - min_value) < T(0.9F);
+}
+
+template<class T>
+inline std::size_t bernouli_impl_index_might_overflow(const mpl::int_<0>&)
+{
+   // We have no idea, perform a runtime check
+
+   BOOST_MATH_STD_USING
+
+   // Find the index of B2n that will overflow the numeric type.
+   // Use Luschny's order-3 approximation of the logarithm of
+   // Bernoulli numbers in combination with a root-finding tool
+   // from boost::math::tools for this.
+
+   boost::uintmax_t number_of_iterations = 128U;
+
+   const std::pair<T, T> index_might_overflow_root =
+      boost::math::tools::toms748_solve(bernouli_impl_index_might_overflow_function<T>,
+                                        T(20),
+                                        T((std::numeric_limits<std::size_t>::max)()),
+                                        bernouli_impl_index_might_overflow_tolerance<T>,
+                                        number_of_iterations);
+
+   // Compute the index that might overflow by averaging the bracketed
+   // values in the root pair.
+   const T index_might_overflow_value = (index_might_overflow_root.first + index_might_overflow_root.second) / 2;
+
+   // Determine a conservative estimate of the size_t integral value
+   // of the index that might. Do this by evaluating the binary exponent
+   // of the index that might overflow expressed as a floating-point number.
+   int p2;
+   frexp(index_might_overflow_value, &p2);
+
+   // Armed with the binary exponent, we left shift by an amount
+   // one less than this binary exponent such that the value of
+   // the index that might overflow is guessed conservatively.
+
+   const std::size_t index_might_overflow_conservative_value = static_cast<std::size_t>(static_cast<std::size_t>(1U) << (p2 - 1));
+
+   // Example: A floating-point numeric type with a 32-bit signed
+   // exponent overflows at an index of approximately 11,513,000.
+   // The result of the conservative value for the overflow index
+   // is approximately 8,389,000.
+
+   return index_might_overflow_conservative_value;
+}
+
+// There are certain cases for which the index n, when trying
+// to compute Bn, is known to overflow. In particular, when
+// using IEEE 754 conformant 32-bit float, 64-bit double,
+// or 128-bit quad, overflow will occur if the index exceeds
+// the amount allowed in the tables of Bn.
+template<class T>
+inline std::size_t bernouli_impl_index_might_overflow(const mpl::int_<1>&)
+{
+   // This corresponds to 4-byte float, IEEE 745 conformant.
+   return max_bernoulli_index<1>::value;
+}
+template<class T>
+inline std::size_t bernouli_impl_index_might_overflow(const mpl::int_<2>&)
+{
+   // This corresponds to 8-byte float, IEEE 745 conformant.
+   return max_bernoulli_index<2>::value;
+}
+template<class T>
+inline std::size_t bernouli_impl_index_might_overflow(const mpl::int_<3>&)
+{
+   // This corresponds to 16-byte float, IEEE 745 conformant.
+   return max_bernoulli_index<3>::value;
+}
+
+template<class T>
+inline std::size_t bernouli_impl_index_might_overflow()
+{
+   typedef mpl::int_<bernoulli_overflow_variant<T>::value> tag_type;
+
+   return bernouli_impl_index_might_overflow<T>(tag_type());
 }

 template<class T>
 std::size_t possible_overflow_index()
 {
-   // We use binary search to determine a good approximation for an index that might overflow.
+   // We use binary search to determine a good approximation for
+   // an index that might overflow.
   // This code is called ONCE ONLY for each T at program startup.

-   std::size_t upper_limit = (std::min)(static_cast<std::size_t>(boost::math::itrunc(tools::log_max_value<T>() / (2 * constants::ln_two<T>()) - 2)), static_cast<std::size_t>(10000u));
-   std::size_t lower_limit = 1;
+   const std::size_t the_possible_overflow_index = bernouli_impl_index_might_overflow<T>();

-   if(bernouli_impl_index_might_overflow<T>(upper_limit * 2) == 0)
-   {
-      return upper_limit;
-   }
-
-   while(upper_limit > (lower_limit + 4))
-   {
-      const int mid = (upper_limit + lower_limit) / 2;
-
-      if(bernouli_impl_index_might_overflow<T>(mid * 2) == 0)
-      {
-         lower_limit = mid;
-      }
-      else
-      {
-         upper_limit = mid;
-      }
-   }
-
-   return lower_limit;
+   return the_possible_overflow_index;
 }
 //
 // The tangent numbers grow larger much more rapidly than the Bernoulli numbers do....
@@ -226,68 +249,21 @@ struct bernoulli_initializer
 template <class T, class Policy>
 const typename bernoulli_initializer<T, Policy>::init bernoulli_initializer<T, Policy>::initializer;

-//
-// We need something to act as a cache for our calculated Bernoulli numbers.  In order to
-// ensure both fast access and thread safety, we need a stable table which may be extended
-// in size, but which never reallocates: that way values already calculated may be accessed
-// concurrently with another thread extending the table with new values.
-//
-// Very very simple vector class that will never allocate more than once, we could use
-// boost::container::static_vector here, but that allocates on the stack, which may well
-// cause issues for the amount of memory we want in the extreme case...
-//
-template <class T>
-struct fixed_vector : private std::allocator<T>
-{
-   typedef unsigned size_type;
-   typedef T* iterator;
-   typedef const T* const_iterator;
-   fixed_vector() : m_used(0)
-   { 
-      std::size_t overflow_limit = 100 + 3 * possible_overflow_index<T>();
-      m_capacity = static_cast<size_type>((std::min)(overflow_limit, static_cast<std::size_t>(100000u)));
-      m_data = this->allocate(m_capacity);
-   }
-   ~fixed_vector()
-   {
-      for(unsigned i = 0; i < m_used; ++i)
-         this->destroy(&m_data[i]);
-      this->deallocate(m_data, m_capacity);
-   }
-   T& operator[](unsigned n) { BOOST_ASSERT(n < m_used); return m_data[n]; }
-   const T& operator[](unsigned n)const { BOOST_ASSERT(n < m_used); return m_data[n]; }
-   unsigned size()const { return m_used; }
-   unsigned size() { return m_used; }
-   void resize(unsigned n, const T& val)
-   {
-      if(n > m_capacity)
-         throw std::runtime_error("Exhausted storage for Bernoulli numbers.");
-      for(unsigned i = m_used; i < n; ++i)
-         new (m_data + i) T(val);
-      m_used = n;
-   }
-   void resize(unsigned n) { resize(n, T()); }
-   T* begin() { return m_data; }
-   T* end() { return m_data + m_used; }
-   T* begin()const { return m_data; }
-   T* end()const { return m_data + m_used; }
-private:
-   T* m_data;
-   unsigned m_used, m_capacity;
-};
-
 template <class T, class Policy>
 class bernoulli_numbers_cache
 {
 public:
-   bernoulli_numbers_cache() : m_overflow_limit((std::numeric_limits<std::size_t>::max)())
+   typedef boost::math::tools::fixed_capacity_heap_vector<T> container_type;
+
+   bernoulli_numbers_cache() : bn              (static_cast<container_type::size_type>(100U + (2U * possible_overflow_index<T>()))),
+                               tn              (static_cast<container_type::size_type>(100U + (2U * possible_overflow_index<T>()))),
+                               m_overflow_limit((std::numeric_limits<std::size_t>::max)()),
+                               m_intermediates ()
 #if defined(BOOST_HAS_THREADS) && !defined(BOOST_MATH_NO_ATOMIC_INT)
      , m_counter(0)
 #endif
   {}

-   typedef fixed_vector<T> container_type;
-
   void tangent(std::size_t m)
   {
      static const std::size_t min_overflow_index = possible_overflow_index<T>();
@@ -310,32 +286,34 @@ public:
      for(std::size_t i = std::max<size_t>(2, prev_size); i < m; i++)
      {
         bool overflow_check = false;
+
         if(i >= min_overflow_index && (boost::math::tools::max_value<T>() / (i-1) < m_intermediates[1]) )
         {
            std::fill(tn.begin() + i, tn.end(), boost::math::tools::max_value<T>());
            break;
         }
+
         m_intermediates[1] = m_intermediates[1] * (i-1);
-         for(std::size_t j = 2; j <= i; j++)
+
+         for(std::size_t j = 2U; j <= i; ++j)
         {
-            overflow_check =
-                  (i >= min_overflow_index) && (
-                  (boost::math::tools::max_value<T>() / (i - j) < m_intermediates[j])
-                  || (boost::math::tools::max_value<T>() / (i - j + 2) < m_intermediates[j-1])
-                  || (boost::math::tools::max_value<T>() - m_intermediates[j] * (i - j) < m_intermediates[j-1] * (i - j + 2))
-                  || ((boost::math::isinf)(m_intermediates[j]))
-                );
+            overflow_check = (   (i >= min_overflow_index)
+                              && (boost::math::tools::max_value<T>() / (i - j) < m_intermediates[j]));

            if(overflow_check)
            {
               std::fill(tn.begin() + i, tn.end(), boost::math::tools::max_value<T>());
               break;
            }
-            m_intermediates[j] = m_intermediates[j] * (i - j) + m_intermediates[j-1] * (i - j + 2);
+
+            m_intermediates[j] = (m_intermediates[j] * (i - j)) + (m_intermediates[j - 1] * (i - j + 2));
         }
+
         if(overflow_check)
            break; // already filled the tn...
+
         tn[static_cast<container_type::size_type>(i)] = m_intermediates[i];
+
         BOOST_MATH_INSTRUMENT_VARIABLE(i);
         BOOST_MATH_INSTRUMENT_VARIABLE(tn[i]);
      }
@@ -367,16 +345,19 @@ public:
         // the calculation, but we also need to avoid overflow altogether in case
         // we're calculating with a type where "bad things" happen in that case:
         //
-         b  = b / (power_two * tangent_scale_factor<T>());
+         b = b / (power_two * tangent_scale_factor<T>());
         b /= (power_two - 1);
-         bool overflow_check = (i >= min_overflow_index) && (tools::max_value<T>() / tn[static_cast<container_type::size_type>(i)] < b);
+
+         const bool overflow_check = (   (i >= min_overflow_index)
+                                      && (tools::max_value<T>() / tn[static_cast<container_type::size_type>(i)] < b));
+
         if(overflow_check)
         {
            m_overflow_limit = i;
            while(i < m)
            {
               b = std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : tools::max_value<T>();
-               bn[static_cast<container_type::size_type>(i)] = ((i % 2) ? b : -b);
+               bn[static_cast<container_type::size_type>(i)] = ((i % 2U) ? b : -b);
               ++i;
            }
            break;
@@ -401,13 +382,13 @@ public:
      // There are basically 3 thread safety options:
      //
      // 1) There are no threads (BOOST_HAS_THREADS is not defined).
-      // 2) There are threads, but we do not have a true atomic integer type, 
-      //    in this case we just use a mutex to guard against race conditions.
+      // 2) There are threads, but we do not have a true atomic integer type,
+      // in this case we just use a mutex to guard against race conditions.
      // 3) There are threads, and we have an atomic integer: in this case we can
-      //    use the double-checked locking pattern to avoid thread synchronisation
-      //    when accessing values already in the cache.
+      // use the double-checked locking pattern to avoid thread synchronisation
+      // when accessing values already in the cache.
      //
-   #if !defined(BOOST_HAS_THREADS)
+#if !defined(BOOST_HAS_THREADS)
      //
      // Single threaded code, very simple:
      //
@@ -422,7 +403,7 @@ public:
         *out = (i >= m_overflow_limit) ? policies::raise_overflow_error<T>("boost::math::bernoulli<%1%>(std::size_t)", 0, pol) : bn[i];
         ++out;
      }
-   #elif defined(BOOST_MATH_NO_ATOMIC_INT)
+#elif defined(BOOST_MATH_NO_ATOMIC_INT)
      //
      // We need to grab a mutex every time we get here, for both readers and writers:
      //
@@ -439,7 +420,7 @@ public:
         ++out;
      }

-   #else
+#else
      //
      // Double-checked locking pattern, lets us access cached already cached values
      // without locking:
@@ -467,7 +448,7 @@ public:
         ++out;
      }

-   #endif
+#endif
      return out;
   }

@@ -478,13 +459,13 @@ public:
      // There are basically 3 thread safety options:
      //
      // 1) There are no threads (BOOST_HAS_THREADS is not defined).
-      // 2) There are threads, but we do not have a true atomic integer type, 
-      //    in this case we just use a mutex to guard against race conditions.
+      // 2) There are threads, but we do not have a true atomic integer type,
+      // in this case we just use a mutex to guard against race conditions.
      // 3) There are threads, and we have an atomic integer: in this case we can
-      //    use the double-checked locking pattern to avoid thread synchronisation
-      //    when accessing values already in the cache.
+      // use the double-checked locking pattern to avoid thread synchronisation
+      // when accessing values already in the cache.
      //
-   #if !defined(BOOST_HAS_THREADS)
+#if !defined(BOOST_HAS_THREADS)
      //
      // Single threaded code, very simple:
      //
@@ -507,7 +488,7 @@ public:
         }
         ++out;
      }
-   #elif defined(BOOST_MATH_NO_ATOMIC_INT)
+#elif defined(BOOST_MATH_NO_ATOMIC_INT)
      //
      // We need to grab a mutex every time we get here, for both readers and writers:
      //
@@ -532,7 +513,7 @@ public:
         ++out;
      }

-   #else
+#else
      //
      // Double-checked locking pattern, lets us access cached already cached values
      // without locking:
@@ -568,7 +549,7 @@ public:
         ++out;
      }

-   #endif
+#endif
      return out;
   }

@@ -578,10 +559,14 @@ private:
   // these must NEVER EVER reallocate as it breaks our thread
   // safety guarentees:
   //
-   fixed_vector<T> bn, tn;
+   container_type bn;
+   container_type tn;
+
   std::vector<T> m_intermediates;
+
   // The value at which we know overflow has already occured for the Bn:
   std::size_t m_overflow_limit;
+
 #if !defined(BOOST_HAS_THREADS)
 #elif defined(BOOST_MATH_NO_ATOMIC_INT)
   boost::detail::lightweight_mutex m_mutex;
@@ -605,4 +590,4 @@ inline bernoulli_numbers_cache<T, Policy>& get_bernoulli_numbers_cache()

 }}}

-#endif // BOOST_MATH_BERNOULLI_DETAIL_HPP
+#endif // BOOST_MATH_BERNOULLI_DETAILS_HPP
--- a/include/boost/math/special_functions/gamma.hpp
+++ b/include/boost/math/special_functions/gamma.hpp
@@ -115,52 +115,25 @@ template<class T>
 std::size_t highest_bernoulli_index()
 {
   // Find the high index n for Bn to produce the desired precision in Stirling's calculation.
-   return static_cast<std::size_t>(18.0F + (0.65F * static_cast<float>(std::numeric_limits<T>::digits10)));
+   return static_cast<std::size_t>(18.0F + (0.6F * static_cast<float>(std::numeric_limits<T>::digits10)));
 }

 template <class T, class Policy>
 T gamma_imp_bernoulli(T x, const Policy& pol)
 {
-   T result = 1;
+   BOOST_MATH_STD_USING

-   //the following handling of negative arguments modelled after the handling in function
-   //T gamma_imp(T z, const Policy& pol, const Lanczos& l)
-   if(x <= 0)
+   const bool b_neg = (x < 0);
+
+   if(b_neg)
   {
-      static const char* function = "boost::math::tgamma<%1%>(%1%)";
-      if(floor(x) == x)
-         return policies::raise_pole_error<T>(function, "Evaluation of tgamma at a negative integer %1%.", x, pol);
-
-      if(x <= -20)
-      {
-         result = gamma_imp_bernoulli(T(-x), pol) * sinpx(x);
-         BOOST_MATH_INSTRUMENT_VARIABLE(result);
-         if((fabs(result) < 1) && (tools::max_value<T>() * fabs(result) < boost::math::constants::pi<T>()))
-            return policies::raise_overflow_error<T>(function, "Result of tgamma is too large to represent.", pol);
-         result = -boost::math::constants::pi<T>() / result;
-         if(result == 0)
-            return policies::raise_underflow_error<T>(function, "Result of tgamma is too small to represent.", pol);
-         if((boost::math::fpclassify)(result) == (int)FP_SUBNORMAL)
-            return policies::raise_denorm_error<T>(function, "Result of tgamma is denormalized.", result, pol);
-         BOOST_MATH_INSTRUMENT_VARIABLE(result);
-         return result;
-      }
-
-      while(x < 0)
-      {
-         result /= x;
-
-         x += 1;
-      }
+      // TBD: Restore error handling.
   }

   // Make a local, unsigned copy of the input argument.
-
-   T xx(x);
-   T original_x(xx);
+   T xx((!b_neg) ? x : -x);

   // Scale the argument up and use downward recursion later for the final result.
-
   const T min_arg_for_recursion(static_cast<float>(std::numeric_limits<T>::digits10) * 1.7F);

   int n_recur;
@@ -168,23 +141,20 @@ T gamma_imp_bernoulli(T x, const Policy& pol)
   if(xx < min_arg_for_recursion)
   {
      n_recur = boost::math::ltrunc(min_arg_for_recursion - xx) + 1;
+
+      xx += n_recur;
   }
   else
   {
      n_recur = 0;
   }

-   if(n_recur != 0)
-   {
-     xx += n_recur;
-   }
-
   const std::size_t number_of_bernoullis_b2n = highest_bernoulli_index<T>();

   std::vector<T> bernoulli_table(number_of_bernoullis_b2n);

   boost::math::bernoulli_b2n<T>(0,
-                                 static_cast<unsigned>(number_of_bernoullis_b2n),
+                                 static_cast<unsigned>(bernoulli_table.size()),
                                 bernoulli_table.begin());

         T one_over_x_pow_two_n_minus_one = 1 / xx;
@@ -210,22 +180,15 @@ T gamma_imp_bernoulli(T x, const Policy& pol)
   T gamma_value = exp(log_gamma_value);

   // Rescale the result using downward recursion if necessary.
-   if(n_recur != 0)
+   // Rescale the result using downward recursion if necessary.
+   for(int k = 0; k < n_recur; ++k)
   {
-      // We need to divide by every x+k in the range [x, x+n_recur), we save
-      // division by x till last, as we may have x < 1 which could cause
-      // spurious overflow if we divided by that first.
-      for(int k = 1; k < n_recur; k++)
-      {
-         gamma_value /= (original_x + k);
-      }
-
-      gamma_value /= original_x;
+      xx -= 1;
+      gamma_value /= xx;
   }

   // Return the result, accounting for possible negative arguments.
-
-   return result * gamma_value;
+   return ((!b_neg) ? gamma_value : -boost::math::constants::pi<T>() / (xx * gamma_value * sin(boost::math::constants::pi<T>() * xx)));
 }

 template<class T, class Policy>