Add unit tests demonstrating that the kurtosis of various continuous distributions is calculated correctly. Add excess_kurtosis to make clear the working definition of kurtosis. [CI SKIP]

2026-01-19 04:22:09 +00:00 · 2018-12-25 23:00:46 -07:00
parent 53ba643528
commit 4848accb77
3 changed files with 51 additions and 2 deletions
--- a/doc/vector_functionals/univariate_statistics.qbk
+++ b/doc/vector_functionals/univariate_statistics.qbk
@@ -45,6 +45,12 @@ namespace boost{ namespace math{ namespace tools {
    template<class ForwardIterator>
    auto kurtosis(ForwardIterator first, ForwardIterator last);

+    template<class Container>
+    auto excess_kurtosis(Container const & c);
+
+    template<class ForwardIterator>
+    auto excess_kurtosis(ForwardIterator first, ForwardIterator last);
+
    template<class Container>
    auto first_four_moments(Container const & c);

@@ -150,7 +156,8 @@ The implementation follows [@https://prod.sandia.gov/techlib-noauth/access-contr
 The input data must be forward iterable and must consist of real or integral values.
 If the input data is integral, the output is a double precision float.
 Note that this is /not/ the excess kurtosis.
-If you require the excess kurtosis, subtract 3 from the kurtosis.
+If you require the excess kurtosis, use `boost::math::tools::excess_kurtosis`.
+This function simply subtracts 3 from the kurtosis, but it makes eminently clear our definition of kurtosis.

 [heading First four moments]

--- a/include/boost/math/tools/univariate_statistics.hpp
+++ b/include/boost/math/tools/univariate_statistics.hpp
@@ -241,6 +241,18 @@ inline auto kurtosis(Container const & v)
    return kurtosis(v.cbegin(), v.cend());
 }

+template<class ForwardIterator>
+auto excess_kurtosis(ForwardIterator first, ForwardIterator last)
+{
+    return kurtosis(first, last) - 3;
+}
+
+template<class Container>
+inline auto excess_kurtosis(Container const & v)
+{
+    return excess_kurtosis(v.cbegin(), v.cend());
+}
+

 // Follows equation 1.5/1.6 of:
 // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
--- a/test/univariate_statistics_test.cpp
+++ b/test/univariate_statistics_test.cpp
@@ -220,6 +220,35 @@ void test_kurtosis()
    kurt = boost::math::tools::kurtosis(v2);
    BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol);

+    std::vector<Real> v3(10000);
+    std::mt19937 gen(42);
+    std::normal_distribution<long double> dis(0, 1);
+    for (size_t i = 0; i < v3.size(); ++i) {
+        v3[i] = dis(gen);
+    }
+    kurt = boost::math::tools::kurtosis(v3);
+    BOOST_TEST(abs(kurt - 3) < 0.1);
+
+    std::uniform_real_distribution<long double> udis(-1, 3);
+    for (size_t i = 0; i < v3.size(); ++i) {
+        v3[i] = udis(gen);
+    }
+    auto excess_kurtosis = boost::math::tools::excess_kurtosis(v3);
+    BOOST_TEST(abs(excess_kurtosis + 6.0/5.0) < 0.2);
+
+
+    // This test only passes when there are a large number of samples.
+    // Otherwise, the distribution doesn't generate enough outliers to give,
+    // or generates too many, giving pretty wildly different values of kurtosis on different runs.
+    // However, by kicking up the samples to 1,000,000, I got very close to 6 for the excess kurtosis on every run.
+    // The CI system, however, would die on a million long doubles.
+    //std::exponential_distribution<long double> edis(0.1);
+    //for (size_t i = 0; i < v3.size(); ++i) {
+    //    v3[i] = edis(gen);
+    //}
+    //excess_kurtosis = boost::math::tools::kurtosis(v3) - 3;
+    //BOOST_TEST(abs(excess_kurtosis - 6.0) < 0.2);
+
 }

 template<class Z>
@@ -427,7 +456,8 @@ int main()
    test_kurtosis<float>();
    test_kurtosis<double>();
    test_kurtosis<long double>();
-    test_kurtosis<cpp_bin_float_50>();
+    // Kinda expensive:
+    //test_kurtosis<cpp_bin_float_50>();

    test_first_four_moments<float>();
    test_first_four_moments<double>();