From 9e6f2b1b401c28a7eed0be4aa816027646a14a4b Mon Sep 17 00:00:00 2001
From: jzmaddock <john@johnmaddock.co.uk>
Date: Fri, 31 May 2024 11:54:52 +0100
Subject: [PATCH] Update Non central T PDF: Lot's more tests, especially in the
 tails. Added Hypergeometric and Integration methods as fallbacks.

---
 .../math/distributions/non_central_beta.hpp   |  17 ++-
 .../math/distributions/non_central_t.hpp      | 107 +++++++++++++++---
 test/test_nc_t.cpp                            |  14 +++
 test/test_nc_t.hpp                            |  30 +++++
 tools/non_central_t_pdf_data.cpp              |  92 +++++++++++++++
 5 files changed, 242 insertions(+), 18 deletions(-)
 create mode 100644 tools/non_central_t_pdf_data.cpp

diff --git a/include/boost/math/distributions/non_central_beta.hpp b/include/boost/math/distributions/non_central_beta.hpp
index 592ac1951..66b12e870 100644
--- a/include/boost/math/distributions/non_central_beta.hpp
+++ b/include/boost/math/distributions/non_central_beta.hpp
@@ -91,7 +91,7 @@ namespace boost
             {
                T term = beta * pois;
                sum += term;
-               if(((fabs(term/sum) < errtol) && (last_term >= term)) || (term == 0))
+               if(((fabs(term/sum) < errtol) && (fabs(last_term) >= fabs(term))) || (term == 0))
                {
                   count = k - i;
                   break;
@@ -106,6 +106,7 @@ namespace boost
                
                last_term = term;
             }
+            last_term = 0;
             for(auto i = k + 1; ; ++i)
             {
                poisf *= l2 / i;
@@ -114,10 +115,11 @@ namespace boost
 
                T term = poisf * betaf;
                sum += term;
-               if((fabs(term/sum) < errtol) || (term == 0))
+               if(((fabs(term/sum) < errtol) && (fabs(last_term) >= fabs(term))) || (term == 0))
                {
                   break;
                }
+               last_term = term;
                if(static_cast<std::uintmax_t>(count + i - k) > max_iter)
                {
                   return policies::raise_evaluation_error("cdf(non_central_beta_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE
@@ -579,15 +581,19 @@ namespace boost
             // Stable backwards recursion first:
             //
             std::uintmax_t count = k;
+            T ratio = 0;
+            T old_ratio = 0;
             for(auto i = k; i >= 0; --i)
             {
                T term = beta * pois;
                sum += term;
-               if((fabs(term/sum) < errtol) || (term == 0))
+               ratio = fabs(term / sum);
+               if(((ratio < errtol) && (ratio < old_ratio)) || (term == 0))
                {
                   count = k - i;
                   break;
                }
+               ratio = old_ratio;
                pois *= i / l2;
 
                if (a + b + i != 1)
@@ -595,6 +601,7 @@ namespace boost
                   beta *= (a + i - 1) / (x * (a + i + b - 1));
                }
             }
+            old_ratio = 0;
             for(auto i = k + 1; ; ++i)
             {
                poisf *= l2 / i;
@@ -602,10 +609,12 @@ namespace boost
 
                T term = poisf * betaf;
                sum += term;
-               if((fabs(term/sum) < errtol) || (term == 0))
+               ratio = fabs(term / sum);
+               if(((ratio < errtol) && (ratio < old_ratio)) || (term == 0))
                {
                   break;
                }
+               old_ratio = ratio;
                if(static_cast<std::uintmax_t>(count + i - k) > max_iter)
                {
                   return policies::raise_evaluation_error("pdf(non_central_beta_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE
diff --git a/include/boost/math/distributions/non_central_t.hpp b/include/boost/math/distributions/non_central_t.hpp
index ca3e9d53f..d3d58cd20 100644
--- a/include/boost/math/distributions/non_central_t.hpp
+++ b/include/boost/math/distributions/non_central_t.hpp
@@ -16,6 +16,8 @@
 #include <boost/math/distributions/students_t.hpp>
 #include <boost/math/distributions/detail/generic_quantile.hpp> // quantile
 #include <boost/math/special_functions/trunc.hpp>
+#include <boost/math/special_functions/detail/hypergeometric_series.hpp>
+#include <boost/math/quadrature/exp_sinh.hpp>
 
 namespace boost
 {
@@ -390,6 +392,60 @@ namespace boost
                function);
          }
 
+         template <class T, class Policy>
+         T non_central_t_pdf_integral(T x, T v, T mu, const Policy& pol)
+         {
+            BOOST_MATH_STD_USING
+            boost::math::quadrature::exp_sinh<T, Policy> integrator;
+            T integral = pow(v, v / 2) * exp(-v * mu * mu / (2 * (x * x + v)));
+            if (integral != 0)
+            {
+               integral *= integrator.integrate([&x, v, mu](T y) 
+                  {
+                     T p;
+                     if (v * log(y) < tools::log_max_value<T>())
+                        p = pow(y, v) * exp(boost::math::pow<2>((y - mu * x / sqrt(x * x + v))) / -2);
+                     else
+                        p = exp(log(y) * v + boost::math::pow<2>((y - mu * x / sqrt(x * x + v))) / -2);
+                     return p; 
+                  });
+            }
+            integral /= boost::math::constants::root_pi<T>() * boost::math::tgamma(v / 2, pol) * pow(T(2), (v - 1) / 2) * pow(x * x + v, (v + 1) / 2);
+            return integral;
+         }
+
+         template <class T, class Policy>
+         T non_central_t_pdf_hypergeometric(T x, T v, T mu, const Policy& pol)
+         {
+            BOOST_MATH_STD_USING
+            long long scale = 0;
+            const char* function = "non central T PDF";
+            //
+            // We only call this routine when we know that the series form of 1F1 is cheap to evaluate,
+            // so no need to call the whole 1F1 function, just the series will do:
+            //
+            T Av = hypergeometric_1F1_generic_series((v + 1) / 2, boost::math::constants::half<T>(), mu * mu * x * x / (2 * (x * x + v)), pol, scale, function);
+            Av = ldexp(Av, static_cast<int>(scale));
+            scale = 0;
+            T Bv = hypergeometric_1F1_generic_series(v / 2 + T(1), T(3) / 2, mu * mu * x * x / (2 * (x * x + v)), pol, scale, function);
+            Bv = ldexp(Bv, static_cast<int>(scale));
+            Bv *= boost::math::tgamma_delta_ratio(v / 2 + T(1), -constants::half<T>(), pol);
+            Bv *= boost::math::constants::root_two<T>() * mu * x / sqrt(x * x + v);
+
+            T tolerance = tools::root_epsilon<T>() * Av * 4;
+            Av += Bv;
+
+            if (Av < tolerance)
+            {
+               // More than half the digits have cancelled, fall back to the integral method:
+               return non_central_t_pdf_integral(x, v, mu, pol);
+            }
+
+            Av *= exp(-mu * mu / 2) * pow(1 + x * x / v, -(v + 1) / 2) * boost::math::tgamma_delta_ratio(v / 2 + constants::half<T>(), -constants::half<T>(), pol);
+            Av /= sqrt(v) * boost::math::constants::root_pi<T>();
+            return Av;
+         }
+
          template <class T, class Policy>
          T non_central_t2_pdf(T n, T delta, T x, T y, const Policy& pol, T init_val)
          {
@@ -462,15 +518,18 @@ namespace boost
                }
             }
             BOOST_MATH_INSTRUMENT_VARIABLE(sum);
+            old_ratio = 0;
             for(auto i = k + 1; ; ++i)
             {
                poisf *= d2 / (i + 0.5f);
                xtermf *= (x * (n / 2 + i)) / (i);
                T term = poisf * xtermf;
                sum += term;
-               if((fabs(term/sum) < errtol) || (term == 0))
+               T ratio = fabs(term / sum);
+               if(((ratio < errtol) && (ratio < old_ratio)) || (term == 0))
                   break;
                ++count;
+               old_ratio = ratio;
                if(count > max_iter)
                {
                   return policies::raise_evaluation_error("pdf(non_central_t_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE
@@ -489,14 +548,7 @@ namespace boost
                normal_distribution<T, Policy> norm(delta, 1);
                return pdf(norm, t);
             }
-            //
-            // Otherwise, for t < 0 we have to use the reflection formula:
-            if(t < 0)
-            {
-               t = -t;
-               delta = -delta;
-            }
-            if(t * t == 0)
+            if(t * t < tools::epsilon<T>())
             {
                //
                // Handle this as a special case, using the formula
@@ -527,13 +579,30 @@ namespace boost
                return pdf(students_t_distribution<T, Policy>(n), t - delta);
             }
             //
+            // Figure out if the hypergeometric formula will be efficient or not, 
+            // based on where the summit of the series.
+            //
+            T a = (n + 1) / 2;
+            T x = delta * delta * t * t / (2 * (t * t + n));
+            T summit = (sqrt(x * (4 * a + x)) + x) / 2;
+            if (summit < 40)
+               return non_central_t_pdf_hypergeometric(t, n, delta, pol);
+            // 
+            // Otherwise, for t < 0 we have to use the reflection formula:
+            //
+            if (t < 0)
+            {
+               t = -t;
+               delta = -delta;
+            }
+            //
             // x and y are the corresponding random
             // variables for the noncentral beta distribution,
             // with y = 1 - x:
             //
-            T x = t * t / (n + t * t);
+            x = t * t / (n + t * t);
             T y = n / (n + t * t);
-            T a = 0.5f;
+            a = 0.5f;
             T b = n / 2;
             T d2 = delta * delta;
             //
@@ -543,12 +612,22 @@ namespace boost
             BOOST_MATH_INSTRUMENT_VARIABLE(dt);
             T result = non_central_beta_pdf(a, b, d2, x, y, pol);
             BOOST_MATH_INSTRUMENT_VARIABLE(result);
-            T tol = tools::epsilon<T>() * result * 500;
+            T tol = tools::root_epsilon<T>() * result;
             result = non_central_t2_pdf(n, delta, x, y, pol, result);
             BOOST_MATH_INSTRUMENT_VARIABLE(result);
-            if(result <= tol)
-               result = 0;
             result *= dt;
+            if (result <= tol)
+            {
+               // More than half the digits in the result have cancelled, 
+               // Try direct integration... super slow but reliable as far as we can tell...
+               if (delta < 0)
+               {
+                  // reflect back:
+                  delta = -delta;
+                  t = -t;
+               }
+               result = non_central_t_pdf_integral(t, n, delta, pol);
+            }
             return result;
          }
 
diff --git a/test/test_nc_t.cpp b/test/test_nc_t.cpp
index d9a8d4ed7..8a12635de 100644
--- a/test/test_nc_t.cpp
+++ b/test/test_nc_t.cpp
@@ -113,6 +113,20 @@ void expected_results()
       largest_type,                     // test type(s)
       "[^|]*",                          // test data group
       "[^|]*", 400, 100);               // test function
+   add_expected_result(
+      "[^|]*",                          // compiler
+      "[^|]*",                          // stdlib
+      "[^|]*",                          // platform
+      "double",                         // test type(s)
+      "[^|]*PDF",                  // test data group
+      "[^|]*", static_cast<std::uintmax_t>(1 / boost::math::tools::root_epsilon<double>()), static_cast<std::uintmax_t>(1 / boost::math::tools::root_epsilon<double>())); // test function
+   add_expected_result(
+      "[^|]*",                          // compiler
+      "[^|]*",                          // stdlib
+      "[^|]*",                          // platform
+      "long double",                         // test type(s)
+      "[^|]*PDF",                  // test data group
+      "[^|]*", static_cast<std::uintmax_t>(1 / boost::math::tools::root_epsilon<long double>()), static_cast<std::uintmax_t>(1 / boost::math::tools::root_epsilon<long double>())); // test function
    add_expected_result(
       "[^|]*",                          // compiler
       "[^|]*",                          // stdlib
diff --git a/test/test_nc_t.hpp b/test/test_nc_t.hpp
index b4fd17fb7..65af7e647 100644
--- a/test/test_nc_t.hpp
+++ b/test/test_nc_t.hpp
@@ -353,6 +353,12 @@ T nct_cdf(T df, T nc, T x)
    return cdf(boost::math::non_central_t_distribution<T>(df, nc), x);
 }
 
+template <class T>
+T nct_pdf(T df, T nc, T x)
+{
+   return pdf(boost::math::non_central_t_distribution<T>(df, nc), x);
+}
+
 template <class T>
 T nct_ccdf(T df, T nc, T x)
 {
@@ -399,6 +405,27 @@ void do_test_nc_t(T& data, const char* type_name, const char* test)
 #endif
 }
 
+template <typename Real, typename T>
+void do_test_nc_t_pdf(T& data, const char* type_name, const char* test)
+{
+   typedef Real                   value_type;
+
+   std::cout << "Testing: " << test << std::endl;
+
+   value_type(*fp1)(value_type, value_type, value_type) = nct_pdf;
+
+   boost::math::tools::test_result<value_type> result;
+
+   result = boost::math::tools::test_hetero<Real>(
+      data,
+      bind_func<Real>(fp1, 0, 1, 2),
+      extract_result<Real>(3));
+   handle_test_result(result, data[result.worst()], result.worst(),
+      type_name, "non central t PDF", test);
+
+   std::cout << std::endl;
+}
+
 template <typename Real, typename T>
 void quantile_sanity_check(T& data, const char* type_name, const char* test)
 {
@@ -522,6 +549,9 @@ void test_accuracy(T, const char* type_name)
 #include "nct_asym.ipp"
       do_test_nc_t<T>(nct_asym, type_name, "Non Central T (large parameters)");
       quantile_sanity_check<T>(nct_asym, type_name, "Non Central T (large parameters)");
+
+#include "nc_t_pdf_data.ipp"
+      do_test_nc_t_pdf<T>(nc_t_pdf_data, type_name, "Non Central T PDF");
    }
 }
 
diff --git a/tools/non_central_t_pdf_data.cpp b/tools/non_central_t_pdf_data.cpp
new file mode 100644
index 000000000..7d50f0080
--- /dev/null
+++ b/tools/non_central_t_pdf_data.cpp
@@ -0,0 +1,92 @@
+//  (C) Copyright John Maddock 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_MAX_SERIES_ITERATION_POLICY 10000000
+#define BOOST_MATH_USE_MPFR
+
+#include "mp_t.hpp"
+#include <boost/math/special_functions/hypergeometric_1F1.hpp>
+#include <boost/math/distributions/non_central_t.hpp>
+#include <boost/lexical_cast.hpp>
+#include <fstream>
+#include <map>
+#include <boost/math/tools/test_data.hpp>
+#include <boost/random.hpp>
+
+using namespace boost::math::tools;
+using namespace boost::math;
+using namespace std;
+
+struct nc_t_pdf_gen
+{
+   mp_t operator()(mp_t v, mp_t mu, mp_t x)
+   {
+      //
+      // Arbitrary smallest value we accept, below this, we will likely underflow to zero
+      // when computing at double precision:
+      //
+      static const mp_t minimum = 1e-270;
+
+      mp_t Av = boost::math::hypergeometric_1F1((v + 1) / 2, boost::math::constants::half<mp_t>(), mu * mu * x * x / (2 * (x * x + v)));
+      mp_t Bv = boost::math::hypergeometric_1F1(v / 2 + mp_t(1), mp_t(3) / 2, mu * mu * x * x / (2 * (x * x + v)));
+      Bv *= boost::math::tgamma_ratio(v / 2 + mp_t(1), (v + mp_t(1)) / 2);
+      Bv *= boost::math::constants::root_two<mp_t>() * mu * x / sqrt(x * x + v);
+
+      Av += Bv;
+      Av *= exp(-mu * mu / 2) * pow(1 + x * x / v, -(v + 1) / 2) * boost::math::tgamma_ratio((v + mp_t(1)) / 2, v / 2);
+      Av /= sqrt(v) * boost::math::constants::root_pi<mp_t>();
+
+      if (Av < minimum)
+         throw std::domain_error("Result too small!");
+
+      return Av;
+   }
+};
+
+
+int main(int, char* [])
+{
+   parameter_info<mp_t> arg1, arg2, arg3;
+   test_data<mp_t> data;
+
+   std::cout << "Welcome.\n"
+      "This program will generate spot tests for the non central t PDF:\n";
+
+   std::string line;
+   bool cont;
+
+   random_ns::mt19937 rnd;
+   random_ns::uniform_real_distribution<float> ur_a(0, 20);
+
+
+   do
+   {
+      if (0 == get_user_parameter_info(arg1, "v"))
+         return 1;
+      if (0 == get_user_parameter_info(arg2, "nc"))
+         return 1;
+
+      arg3 = make_periodic_param(mp_t(-20), mp_t(200), 170);
+
+      data.insert(nc_t_pdf_gen(), arg1, arg2, arg3);
+      std::cout << "Any more data [y/n]?";
+      std::getline(std::cin, line);
+      boost::algorithm::trim(line);
+      cont = (line == "y");
+   } while (cont);
+
+   std::cout << "Enter name of test data file [default=nc_t_pdf_data.ipp]";
+   std::getline(std::cin, line);
+   boost::algorithm::trim(line);
+   if (line == "")
+      line = "nc_t_pdf_data.ipp";
+   std::ofstream ofs(line.c_str());
+   ofs << std::scientific << std::setprecision(40);
+   write_code(ofs, data, line.c_str());
+
+   return 0;
+}
+
+