diff --git a/doc/vector_functionals/univariate_statistics.qbk b/doc/vector_functionals/univariate_statistics.qbk index 6f9b542b7..6d448ad52 100644 --- a/doc/vector_functionals/univariate_statistics.qbk +++ b/doc/vector_functionals/univariate_statistics.qbk @@ -45,6 +45,12 @@ namespace boost{ namespace math{ namespace tools { template auto kurtosis(ForwardIterator first, ForwardIterator last); + template + auto excess_kurtosis(Container const & c); + + template + auto excess_kurtosis(ForwardIterator first, ForwardIterator last); + template auto first_four_moments(Container const & c); @@ -150,7 +156,8 @@ The implementation follows [@https://prod.sandia.gov/techlib-noauth/access-contr The input data must be forward iterable and must consist of real or integral values. If the input data is integral, the output is a double precision float. Note that this is /not/ the excess kurtosis. -If you require the excess kurtosis, subtract 3 from the kurtosis. +If you require the excess kurtosis, use `boost::math::tools::excess_kurtosis`. +This function simply subtracts 3 from the kurtosis, but it makes eminently clear our definition of kurtosis. [heading First four moments] diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp index ca3411c43..0c5b2dca8 100644 --- a/include/boost/math/tools/univariate_statistics.hpp +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -241,6 +241,18 @@ inline auto kurtosis(Container const & v) return kurtosis(v.cbegin(), v.cend()); } +template +auto excess_kurtosis(ForwardIterator first, ForwardIterator last) +{ + return kurtosis(first, last) - 3; +} + +template +inline auto excess_kurtosis(Container const & v) +{ + return excess_kurtosis(v.cbegin(), v.cend()); +} + // Follows equation 1.5/1.6 of: // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf diff --git a/test/univariate_statistics_test.cpp b/test/univariate_statistics_test.cpp index 62cc74e86..a4baa0e9b 100644 --- a/test/univariate_statistics_test.cpp +++ b/test/univariate_statistics_test.cpp @@ -220,6 +220,35 @@ void test_kurtosis() kurt = boost::math::tools::kurtosis(v2); BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol); + std::vector v3(10000); + std::mt19937 gen(42); + std::normal_distribution dis(0, 1); + for (size_t i = 0; i < v3.size(); ++i) { + v3[i] = dis(gen); + } + kurt = boost::math::tools::kurtosis(v3); + BOOST_TEST(abs(kurt - 3) < 0.1); + + std::uniform_real_distribution udis(-1, 3); + for (size_t i = 0; i < v3.size(); ++i) { + v3[i] = udis(gen); + } + auto excess_kurtosis = boost::math::tools::excess_kurtosis(v3); + BOOST_TEST(abs(excess_kurtosis + 6.0/5.0) < 0.2); + + + // This test only passes when there are a large number of samples. + // Otherwise, the distribution doesn't generate enough outliers to give, + // or generates too many, giving pretty wildly different values of kurtosis on different runs. + // However, by kicking up the samples to 1,000,000, I got very close to 6 for the excess kurtosis on every run. + // The CI system, however, would die on a million long doubles. + //std::exponential_distribution edis(0.1); + //for (size_t i = 0; i < v3.size(); ++i) { + // v3[i] = edis(gen); + //} + //excess_kurtosis = boost::math::tools::kurtosis(v3) - 3; + //BOOST_TEST(abs(excess_kurtosis - 6.0) < 0.2); + } template @@ -427,7 +456,8 @@ int main() test_kurtosis(); test_kurtosis(); test_kurtosis(); - test_kurtosis(); + // Kinda expensive: + //test_kurtosis(); test_first_four_moments(); test_first_four_moments();