From 9e6f2b1b401c28a7eed0be4aa816027646a14a4b Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Fri, 31 May 2024 11:54:52 +0100 Subject: [PATCH] Update Non central T PDF: Lot's more tests, especially in the tails. Added Hypergeometric and Integration methods as fallbacks. --- .../math/distributions/non_central_beta.hpp | 17 ++- .../math/distributions/non_central_t.hpp | 107 +++++++++++++++--- test/test_nc_t.cpp | 14 +++ test/test_nc_t.hpp | 30 +++++ tools/non_central_t_pdf_data.cpp | 92 +++++++++++++++ 5 files changed, 242 insertions(+), 18 deletions(-) create mode 100644 tools/non_central_t_pdf_data.cpp diff --git a/include/boost/math/distributions/non_central_beta.hpp b/include/boost/math/distributions/non_central_beta.hpp index 592ac1951..66b12e870 100644 --- a/include/boost/math/distributions/non_central_beta.hpp +++ b/include/boost/math/distributions/non_central_beta.hpp @@ -91,7 +91,7 @@ namespace boost { T term = beta * pois; sum += term; - if(((fabs(term/sum) < errtol) && (last_term >= term)) || (term == 0)) + if(((fabs(term/sum) < errtol) && (fabs(last_term) >= fabs(term))) || (term == 0)) { count = k - i; break; @@ -106,6 +106,7 @@ namespace boost last_term = term; } + last_term = 0; for(auto i = k + 1; ; ++i) { poisf *= l2 / i; @@ -114,10 +115,11 @@ namespace boost T term = poisf * betaf; sum += term; - if((fabs(term/sum) < errtol) || (term == 0)) + if(((fabs(term/sum) < errtol) && (fabs(last_term) >= fabs(term))) || (term == 0)) { break; } + last_term = term; if(static_cast(count + i - k) > max_iter) { return policies::raise_evaluation_error("cdf(non_central_beta_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE @@ -579,15 +581,19 @@ namespace boost // Stable backwards recursion first: // std::uintmax_t count = k; + T ratio = 0; + T old_ratio = 0; for(auto i = k; i >= 0; --i) { T term = beta * pois; sum += term; - if((fabs(term/sum) < errtol) || (term == 0)) + ratio = fabs(term / sum); + if(((ratio < errtol) && (ratio < old_ratio)) || (term == 0)) { count = k - i; break; } + ratio = old_ratio; pois *= i / l2; if (a + b + i != 1) @@ -595,6 +601,7 @@ namespace boost beta *= (a + i - 1) / (x * (a + i + b - 1)); } } + old_ratio = 0; for(auto i = k + 1; ; ++i) { poisf *= l2 / i; @@ -602,10 +609,12 @@ namespace boost T term = poisf * betaf; sum += term; - if((fabs(term/sum) < errtol) || (term == 0)) + ratio = fabs(term / sum); + if(((ratio < errtol) && (ratio < old_ratio)) || (term == 0)) { break; } + old_ratio = ratio; if(static_cast(count + i - k) > max_iter) { return policies::raise_evaluation_error("pdf(non_central_beta_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE diff --git a/include/boost/math/distributions/non_central_t.hpp b/include/boost/math/distributions/non_central_t.hpp index ca3e9d53f..d3d58cd20 100644 --- a/include/boost/math/distributions/non_central_t.hpp +++ b/include/boost/math/distributions/non_central_t.hpp @@ -16,6 +16,8 @@ #include #include // quantile #include +#include +#include namespace boost { @@ -390,6 +392,60 @@ namespace boost function); } + template + T non_central_t_pdf_integral(T x, T v, T mu, const Policy& pol) + { + BOOST_MATH_STD_USING + boost::math::quadrature::exp_sinh integrator; + T integral = pow(v, v / 2) * exp(-v * mu * mu / (2 * (x * x + v))); + if (integral != 0) + { + integral *= integrator.integrate([&x, v, mu](T y) + { + T p; + if (v * log(y) < tools::log_max_value()) + p = pow(y, v) * exp(boost::math::pow<2>((y - mu * x / sqrt(x * x + v))) / -2); + else + p = exp(log(y) * v + boost::math::pow<2>((y - mu * x / sqrt(x * x + v))) / -2); + return p; + }); + } + integral /= boost::math::constants::root_pi() * boost::math::tgamma(v / 2, pol) * pow(T(2), (v - 1) / 2) * pow(x * x + v, (v + 1) / 2); + return integral; + } + + template + T non_central_t_pdf_hypergeometric(T x, T v, T mu, const Policy& pol) + { + BOOST_MATH_STD_USING + long long scale = 0; + const char* function = "non central T PDF"; + // + // We only call this routine when we know that the series form of 1F1 is cheap to evaluate, + // so no need to call the whole 1F1 function, just the series will do: + // + T Av = hypergeometric_1F1_generic_series((v + 1) / 2, boost::math::constants::half(), mu * mu * x * x / (2 * (x * x + v)), pol, scale, function); + Av = ldexp(Av, static_cast(scale)); + scale = 0; + T Bv = hypergeometric_1F1_generic_series(v / 2 + T(1), T(3) / 2, mu * mu * x * x / (2 * (x * x + v)), pol, scale, function); + Bv = ldexp(Bv, static_cast(scale)); + Bv *= boost::math::tgamma_delta_ratio(v / 2 + T(1), -constants::half(), pol); + Bv *= boost::math::constants::root_two() * mu * x / sqrt(x * x + v); + + T tolerance = tools::root_epsilon() * Av * 4; + Av += Bv; + + if (Av < tolerance) + { + // More than half the digits have cancelled, fall back to the integral method: + return non_central_t_pdf_integral(x, v, mu, pol); + } + + Av *= exp(-mu * mu / 2) * pow(1 + x * x / v, -(v + 1) / 2) * boost::math::tgamma_delta_ratio(v / 2 + constants::half(), -constants::half(), pol); + Av /= sqrt(v) * boost::math::constants::root_pi(); + return Av; + } + template T non_central_t2_pdf(T n, T delta, T x, T y, const Policy& pol, T init_val) { @@ -462,15 +518,18 @@ namespace boost } } BOOST_MATH_INSTRUMENT_VARIABLE(sum); + old_ratio = 0; for(auto i = k + 1; ; ++i) { poisf *= d2 / (i + 0.5f); xtermf *= (x * (n / 2 + i)) / (i); T term = poisf * xtermf; sum += term; - if((fabs(term/sum) < errtol) || (term == 0)) + T ratio = fabs(term / sum); + if(((ratio < errtol) && (ratio < old_ratio)) || (term == 0)) break; ++count; + old_ratio = ratio; if(count > max_iter) { return policies::raise_evaluation_error("pdf(non_central_t_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE @@ -489,14 +548,7 @@ namespace boost normal_distribution norm(delta, 1); return pdf(norm, t); } - // - // Otherwise, for t < 0 we have to use the reflection formula: - if(t < 0) - { - t = -t; - delta = -delta; - } - if(t * t == 0) + if(t * t < tools::epsilon()) { // // Handle this as a special case, using the formula @@ -527,13 +579,30 @@ namespace boost return pdf(students_t_distribution(n), t - delta); } // + // Figure out if the hypergeometric formula will be efficient or not, + // based on where the summit of the series. + // + T a = (n + 1) / 2; + T x = delta * delta * t * t / (2 * (t * t + n)); + T summit = (sqrt(x * (4 * a + x)) + x) / 2; + if (summit < 40) + return non_central_t_pdf_hypergeometric(t, n, delta, pol); + // + // Otherwise, for t < 0 we have to use the reflection formula: + // + if (t < 0) + { + t = -t; + delta = -delta; + } + // // x and y are the corresponding random // variables for the noncentral beta distribution, // with y = 1 - x: // - T x = t * t / (n + t * t); + x = t * t / (n + t * t); T y = n / (n + t * t); - T a = 0.5f; + a = 0.5f; T b = n / 2; T d2 = delta * delta; // @@ -543,12 +612,22 @@ namespace boost BOOST_MATH_INSTRUMENT_VARIABLE(dt); T result = non_central_beta_pdf(a, b, d2, x, y, pol); BOOST_MATH_INSTRUMENT_VARIABLE(result); - T tol = tools::epsilon() * result * 500; + T tol = tools::root_epsilon() * result; result = non_central_t2_pdf(n, delta, x, y, pol, result); BOOST_MATH_INSTRUMENT_VARIABLE(result); - if(result <= tol) - result = 0; result *= dt; + if (result <= tol) + { + // More than half the digits in the result have cancelled, + // Try direct integration... super slow but reliable as far as we can tell... + if (delta < 0) + { + // reflect back: + delta = -delta; + t = -t; + } + result = non_central_t_pdf_integral(t, n, delta, pol); + } return result; } diff --git a/test/test_nc_t.cpp b/test/test_nc_t.cpp index d9a8d4ed7..8a12635de 100644 --- a/test/test_nc_t.cpp +++ b/test/test_nc_t.cpp @@ -113,6 +113,20 @@ void expected_results() largest_type, // test type(s) "[^|]*", // test data group "[^|]*", 400, 100); // test function + add_expected_result( + "[^|]*", // compiler + "[^|]*", // stdlib + "[^|]*", // platform + "double", // test type(s) + "[^|]*PDF", // test data group + "[^|]*", static_cast(1 / boost::math::tools::root_epsilon()), static_cast(1 / boost::math::tools::root_epsilon())); // test function + add_expected_result( + "[^|]*", // compiler + "[^|]*", // stdlib + "[^|]*", // platform + "long double", // test type(s) + "[^|]*PDF", // test data group + "[^|]*", static_cast(1 / boost::math::tools::root_epsilon()), static_cast(1 / boost::math::tools::root_epsilon())); // test function add_expected_result( "[^|]*", // compiler "[^|]*", // stdlib diff --git a/test/test_nc_t.hpp b/test/test_nc_t.hpp index b4fd17fb7..65af7e647 100644 --- a/test/test_nc_t.hpp +++ b/test/test_nc_t.hpp @@ -353,6 +353,12 @@ T nct_cdf(T df, T nc, T x) return cdf(boost::math::non_central_t_distribution(df, nc), x); } +template +T nct_pdf(T df, T nc, T x) +{ + return pdf(boost::math::non_central_t_distribution(df, nc), x); +} + template T nct_ccdf(T df, T nc, T x) { @@ -399,6 +405,27 @@ void do_test_nc_t(T& data, const char* type_name, const char* test) #endif } +template +void do_test_nc_t_pdf(T& data, const char* type_name, const char* test) +{ + typedef Real value_type; + + std::cout << "Testing: " << test << std::endl; + + value_type(*fp1)(value_type, value_type, value_type) = nct_pdf; + + boost::math::tools::test_result result; + + result = boost::math::tools::test_hetero( + data, + bind_func(fp1, 0, 1, 2), + extract_result(3)); + handle_test_result(result, data[result.worst()], result.worst(), + type_name, "non central t PDF", test); + + std::cout << std::endl; +} + template void quantile_sanity_check(T& data, const char* type_name, const char* test) { @@ -522,6 +549,9 @@ void test_accuracy(T, const char* type_name) #include "nct_asym.ipp" do_test_nc_t(nct_asym, type_name, "Non Central T (large parameters)"); quantile_sanity_check(nct_asym, type_name, "Non Central T (large parameters)"); + +#include "nc_t_pdf_data.ipp" + do_test_nc_t_pdf(nc_t_pdf_data, type_name, "Non Central T PDF"); } } diff --git a/tools/non_central_t_pdf_data.cpp b/tools/non_central_t_pdf_data.cpp new file mode 100644 index 000000000..7d50f0080 --- /dev/null +++ b/tools/non_central_t_pdf_data.cpp @@ -0,0 +1,92 @@ +// (C) Copyright John Maddock 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_MAX_SERIES_ITERATION_POLICY 10000000 +#define BOOST_MATH_USE_MPFR + +#include "mp_t.hpp" +#include +#include +#include +#include +#include +#include +#include + +using namespace boost::math::tools; +using namespace boost::math; +using namespace std; + +struct nc_t_pdf_gen +{ + mp_t operator()(mp_t v, mp_t mu, mp_t x) + { + // + // Arbitrary smallest value we accept, below this, we will likely underflow to zero + // when computing at double precision: + // + static const mp_t minimum = 1e-270; + + mp_t Av = boost::math::hypergeometric_1F1((v + 1) / 2, boost::math::constants::half(), mu * mu * x * x / (2 * (x * x + v))); + mp_t Bv = boost::math::hypergeometric_1F1(v / 2 + mp_t(1), mp_t(3) / 2, mu * mu * x * x / (2 * (x * x + v))); + Bv *= boost::math::tgamma_ratio(v / 2 + mp_t(1), (v + mp_t(1)) / 2); + Bv *= boost::math::constants::root_two() * mu * x / sqrt(x * x + v); + + Av += Bv; + Av *= exp(-mu * mu / 2) * pow(1 + x * x / v, -(v + 1) / 2) * boost::math::tgamma_ratio((v + mp_t(1)) / 2, v / 2); + Av /= sqrt(v) * boost::math::constants::root_pi(); + + if (Av < minimum) + throw std::domain_error("Result too small!"); + + return Av; + } +}; + + +int main(int, char* []) +{ + parameter_info arg1, arg2, arg3; + test_data data; + + std::cout << "Welcome.\n" + "This program will generate spot tests for the non central t PDF:\n"; + + std::string line; + bool cont; + + random_ns::mt19937 rnd; + random_ns::uniform_real_distribution ur_a(0, 20); + + + do + { + if (0 == get_user_parameter_info(arg1, "v")) + return 1; + if (0 == get_user_parameter_info(arg2, "nc")) + return 1; + + arg3 = make_periodic_param(mp_t(-20), mp_t(200), 170); + + data.insert(nc_t_pdf_gen(), arg1, arg2, arg3); + std::cout << "Any more data [y/n]?"; + std::getline(std::cin, line); + boost::algorithm::trim(line); + cont = (line == "y"); + } while (cont); + + std::cout << "Enter name of test data file [default=nc_t_pdf_data.ipp]"; + std::getline(std::cin, line); + boost::algorithm::trim(line); + if (line == "") + line = "nc_t_pdf_data.ipp"; + std::ofstream ofs(line.c_str()); + ofs << std::scientific << std::setprecision(40); + write_code(ofs, data, line.c_str()); + + return 0; +} + +