diff --git a/doc/vector_functionals/bivariate_statistics.qbk b/doc/vector_functionals/bivariate_statistics.qbk index 23b4a9ecf..8bb3aece0 100644 --- a/doc/vector_functionals/bivariate_statistics.qbk +++ b/doc/vector_functionals/bivariate_statistics.qbk @@ -11,9 +11,9 @@ [heading Synopsis] `` -#include +#include -namespace boost{ namespace math{ namespace tools { +namespace boost{ namespace math{ namespace statistics { template auto covariance(Container const & u, Container const & v); @@ -37,7 +37,7 @@ Computes the population covariance of two datasets: std::vector u{1,2,3,4,5}; std::vector v{1,2,3,4,5}; - double cov_uv = boost::math::tools::covariance(u, v); + double cov_uv = boost::math::statistics::covariance(u, v); The implementation follows [@https://doi.org/10.1109/CLUSTR.2009.5289161 Bennet et al]. The data is not modified. Requires a random-access container. @@ -49,7 +49,7 @@ As such, we provide `means_and_covariance`: std::vector u{1,2,3,4,5}; std::vector v{1,2,3,4,5}; - auto [mu_u, mu_v, cov_uv] = boost::math::tools::means_and_covariance(u, v); + auto [mu_u, mu_v, cov_uv] = boost::math::statistics::means_and_covariance(u, v); [heading Correlation Coefficient] @@ -57,7 +57,7 @@ Computes the [@https://en.wikipedia.org/wiki/Pearson_correlation_coefficient Pea std::vector u{1,2,3,4,5}; std::vector v{1,2,3,4,5}; - double rho_uv = boost::math::tools::correlation_coefficient(u, v); + double rho_uv = boost::math::statistics::correlation_coefficient(u, v); // rho_uv = 1. The data must be random access and cannot be complex. diff --git a/doc/vector_functionals/signal_statistics.qbk b/doc/vector_functionals/signal_statistics.qbk index 5ef0e1db2..b5303dde5 100644 --- a/doc/vector_functionals/signal_statistics.qbk +++ b/doc/vector_functionals/signal_statistics.qbk @@ -11,9 +11,9 @@ [heading Synopsis] `` -#include +#include -namespace boost::math::tools { +namespace boost::math::statistics { template auto absolute_gini_coefficient(Container & c); @@ -56,7 +56,7 @@ namespace boost::math::tools { [heading Description] -The file `boost/math/tools/signal_statistics.hpp` is a set of facilities for computing quantities commonly used in signal analysis. +The file `boost/math/statistics/signal_statistics.hpp` is a set of facilities for computing quantities commonly used in signal analysis. Our examples use `std::vector` to hold the data, but this not required. In general, you can store your data in an Eigen array, and Armadillo vector, `std::array`, and for many of the routines, a `std::forward_list`. diff --git a/doc/vector_functionals/univariate_statistics.qbk b/doc/vector_functionals/univariate_statistics.qbk index e07430530..20dc50a6f 100644 --- a/doc/vector_functionals/univariate_statistics.qbk +++ b/doc/vector_functionals/univariate_statistics.qbk @@ -11,9 +11,9 @@ [heading Synopsis] `` -#include +#include -namespace boost{ namespace math{ namespace tools { +namespace boost{ namespace math{ namespace statistics { template auto mean(Container const & c); @@ -33,6 +33,9 @@ namespace boost{ namespace math{ namespace tools { template auto sample_variance(ForwardIterator first, ForwardIterator last); + template + auto mean_and_sample_variance(Container const & c); + template auto skewness(Container const & c); @@ -104,9 +107,9 @@ For certain operations (total variation, for example) integer inputs are support [heading Mean] std::vector v{1,2,3,4,5}; - double mu = boost::math::tools::mean(v.cbegin(), v.cend()); + double mu = boost::math::statistics::mean(v.cbegin(), v.cend()); // Alternative syntax if you want to use entire container: - mu = boost::math::tools::mean(v); + mu = boost::math::statistics::mean(v); The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6a]. The data is not modified and must be forward iterable. @@ -116,23 +119,23 @@ If the input is an integer type, the output is a double precision float. [heading Variance] std::vector v{1,2,3,4,5}; - Real sigma_sq = boost::math::tools::variance(v.cbegin(), v.cend()); + Real sigma_sq = boost::math::statistics::variance(v.cbegin(), v.cend()); If you don't need to calculate on a subset of the input, then the range call is more terse: std::vector v{1,2,3,4,5}; - Real sigma_sq = boost::math::tools::variance(v); + Real sigma_sq = boost::math::statistics::variance(v); The implementation follows [@https://doi.org/10.1137/1.9780898718027 Higham 1.6b]. The input data must be forward iterable and the range `[first, last)` must contain at least two elements. It is /not/ in general sensible to pass complex numbers to this routine. If integers are passed as input, then the output is a double precision float. -`boost::math::tools::variance` returns the population variance. +`boost::math::statistics::variance` returns the population variance. If you want a sample variance, use std::vector v{1,2,3,4,5}; - Real sn_sq = boost::math::tools::sample_variance(v); + Real sn_sq = boost::math::statistics::sample_variance(v); [heading Skewness] @@ -140,7 +143,7 @@ If you want a sample variance, use Computes the skewness of a dataset: std::vector v{1,2,3,4,5}; - double skewness = boost::math::tools::skewness(v); + double skewness = boost::math::statistics::skewness(v); // skewness = 0. The input vector is not modified, works with integral and real data. @@ -155,14 +158,14 @@ The implementation follows [@https://prod.sandia.gov/techlib-noauth/access-contr Computes the kurtosis of a dataset: std::vector v{1,2,3,4,5}; - double kurtosis = boost::math::tools::kurtosis(v); + double kurtosis = boost::math::statistics::kurtosis(v); // kurtosis = 17/10 The implementation follows [@https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf Pebay]. The input data must be forward iterable and must consist of real or integral values. If the input data is integral, the output is a double precision float. Note that this is /not/ the excess kurtosis. -If you require the excess kurtosis, use `boost::math::tools::excess_kurtosis`. +If you require the excess kurtosis, use `boost::math::statistics::excess_kurtosis`. This function simply subtracts 3 from the kurtosis, but it makes eminently clear our definition of kurtosis. [heading First four moments] @@ -170,7 +173,7 @@ This function simply subtracts 3 from the kurtosis, but it makes eminently clear Simultaneously computes the first four [@https://en.wikipedia.org/wiki/Central_moment central moments] in a single pass through the data: std::vector v{1,2,3,4,5}; - auto [M1, M2, M3, M4] = boost::math::tools::first_four_moments(v); + auto [M1, M2, M3, M4] = boost::math::statistics::first_four_moments(v); [heading Median] @@ -178,7 +181,7 @@ Simultaneously computes the first four [@https://en.wikipedia.org/wiki/Central_m Computes the median of a dataset: std::vector v{1,2,3,4,5}; - double m = boost::math::tools::median(v.begin(), v.end()); + double m = boost::math::statistics::median(v.begin(), v.end()); /Nota bene: The input vector is modified./ The calculation of the median is a thin wrapper around the C++11 [@https://en.cppreference.com/w/cpp/algorithm/nth_element `nth_element`]. @@ -190,7 +193,7 @@ In particular, the container must allow random access. Computes the [@https://en.wikipedia.org/wiki/Median_absolute_deviation median absolute deviation] of a dataset: std::vector v{1,2,3,4,5}; - double mad = boost::math::tools::median_absolute_deviation(v); + double mad = boost::math::statistics::median_absolute_deviation(v); By default, the deviation from the median is used. If you have some prior that the median is zero, or wish to compute the median absolute deviation from the mean, @@ -198,11 +201,11 @@ use the following: // prior is that center is zero: double center = 0; - double mad = boost::math::tools::median_absolute_deviation(v, center); + double mad = boost::math::statistics::median_absolute_deviation(v, center); // compute median absolute deviation from the mean: - double mu = boost::math::tools::mean(v); - double mad = boost::math::tools::median_absolute_deviation(v, mu); + double mu = boost::math::statistics::mean(v); + double mad = boost::math::statistics::median_absolute_deviation(v, mu); /Nota bene:/ The input vector is modified. Again the vector is passed into a call to [@https://en.cppreference.com/w/cpp/algorithm/nth_element `nth_element`]. @@ -212,12 +215,12 @@ Again the vector is passed into a call to [@https://en.cppreference.com/w/cpp/al Compute the Gini coefficient of a dataset: std::vector v{1,0,0,0}; - double gini = boost::math::tools::gini_coefficient(v); + double gini = boost::math::statistics::gini_coefficient(v); // gini = 3/4 - double s_gini = boost::math::tools::sample_gini_coefficient(v); + double s_gini = boost::math::statistics::sample_gini_coefficient(v); // s_gini = 1. std::vector w{1,1,1,1}; - gini = boost::math::tools::gini_coefficient(w.begin(), w.end()); + gini = boost::math::statistics::gini_coefficient(w.begin(), w.end()); // gini = 0, as all elements are now equal. /Nota bene/: The input data is altered: in particular, it is sorted. Makes a call to `std::sort`, and as such requires random access iterators. diff --git a/include/boost/math/statistics/bivariate_statistics.hpp b/include/boost/math/statistics/bivariate_statistics.hpp new file mode 100644 index 000000000..6840ea7ab --- /dev/null +++ b/include/boost/math/statistics/bivariate_statistics.hpp @@ -0,0 +1,96 @@ +// (C) Copyright Nick Thompson 2018. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_STATISTICS_BIVARIATE_STATISTICS_HPP +#define BOOST_MATH_STATISTICS_BIVARIATE_STATISTICS_HPP + +#include +#include +#include + + +namespace boost{ namespace math{ namespace statistics { + +template +auto means_and_covariance(Container const & u, Container const & v) +{ + using Real = typename Container::value_type; + using std::size; + BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance."); + BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least one sample."); + + // See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al. + Real cov = 0; + Real mu_u = u[0]; + Real mu_v = v[0]; + + for(size_t i = 1; i < size(u); ++i) + { + Real u_tmp = (u[i] - mu_u)/(i+1); + Real v_tmp = v[i] - mu_v; + cov += i*u_tmp*v_tmp; + mu_u = mu_u + u_tmp; + mu_v = mu_v + v_tmp/(i+1); + } + + return std::make_tuple(mu_u, mu_v, cov/size(u)); +} + +template +auto covariance(Container const & u, Container const & v) +{ + auto [mu_u, mu_v, cov] = boost::math::statistics::means_and_covariance(u, v); + return cov; +} + +template +auto correlation_coefficient(Container const & u, Container const & v) +{ + using Real = typename Container::value_type; + using std::size; + BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance."); + BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least two samples."); + + Real cov = 0; + Real mu_u = u[0]; + Real mu_v = v[0]; + Real Qu = 0; + Real Qv = 0; + + for(size_t i = 1; i < size(u); ++i) + { + Real u_tmp = u[i] - mu_u; + Real v_tmp = v[i] - mu_v; + Qu = Qu + (i*u_tmp*u_tmp)/(i+1); + Qv = Qv + (i*v_tmp*v_tmp)/(i+1); + cov += i*u_tmp*v_tmp/(i+1); + mu_u = mu_u + u_tmp/(i+1); + mu_v = mu_v + v_tmp/(i+1); + } + + // If both datasets are constant, then they are perfectly correlated. + if (Qu == 0 && Qv == 0) + { + return Real(1); + } + // If one dataset is constant and the other isn't, then they have no correlation: + if (Qu == 0 || Qv == 0) + { + return Real(0); + } + + // Make sure rho in [-1, 1], even in the presence of numerical noise. + Real rho = cov/sqrt(Qu*Qv); + if (rho > 1) { + rho = 1; + } + if (rho < -1) { + rho = -1; + } + return rho; +} + +}}} +#endif diff --git a/include/boost/math/statistics/signal_statistics.hpp b/include/boost/math/statistics/signal_statistics.hpp new file mode 100644 index 000000000..17b50645c --- /dev/null +++ b/include/boost/math/statistics/signal_statistics.hpp @@ -0,0 +1,343 @@ +// (C) Copyright Nick Thompson 2018. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_SIGNAL_STATISTICS_HPP +#define BOOST_MATH_TOOLS_SIGNAL_STATISTICS_HPP + +#include +#include +#include +#include +#include +#include + + +namespace boost::math::statistics { + +template +auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) +{ + using std::abs; + using RealOrComplex = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); + + std::sort(first, last, [](RealOrComplex a, RealOrComplex b) { return abs(b) > abs(a); }); + + + decltype(abs(*first)) i = 1; + decltype(abs(*first)) num = 0; + decltype(abs(*first)) denom = 0; + for (auto it = first; it != last; ++it) + { + decltype(abs(*first)) tmp = abs(*it); + num += tmp*i; + denom += tmp; + ++i; + } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if (denom == 0) + { + decltype(abs(*first)) zero = 0; + return zero; + } + return ((2*num)/denom - i)/(i-1); +} + +template +inline auto absolute_gini_coefficient(RandomAccessContainer & v) +{ + return boost::math::statistics::absolute_gini_coefficient(v.begin(), v.end()); +} + +template +auto sample_absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) +{ + size_t n = std::distance(first, last); + return n*boost::math::statistics::absolute_gini_coefficient(first, last)/(n-1); +} + +template +inline auto sample_absolute_gini_coefficient(RandomAccessContainer & v) +{ + return boost::math::statistics::sample_absolute_gini_coefficient(v.begin(), v.end()); +} + + +// The Hoyer sparsity measure is defined in: +// https://arxiv.org/pdf/0811.4706.pdf +template +auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) +{ + using T = typename std::iterator_traits::value_type; + using std::abs; + using std::sqrt; + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Hoyer sparsity requires at least two samples."); + + if constexpr (std::is_unsigned::value) + { + T l1 = 0; + T l2 = 0; + size_t n = 0; + for (auto it = first; it != last; ++it) + { + l1 += *it; + l2 += (*it)*(*it); + n += 1; + } + + double rootn = sqrt(n); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); + } + else { + decltype(abs(*first)) l1 = 0; + decltype(abs(*first)) l2 = 0; + // We wouldn't need to count the elements if it was a random access iterator, + // but our only constraint is that it's a forward iterator. + size_t n = 0; + for (auto it = first; it != last; ++it) + { + decltype(abs(*first)) tmp = abs(*it); + l1 += tmp; + l2 += tmp*tmp; + n += 1; + } + if constexpr (std::is_integral::value) + { + double rootn = sqrt(n); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); + } + else + { + decltype(abs(*first)) rootn = sqrt(static_cast(n)); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); + } + } +} + +template +inline auto hoyer_sparsity(Container const & v) +{ + return boost::math::statistics::hoyer_sparsity(v.cbegin(), v.cend()); +} + + +template +auto oracle_snr(Container const & signal, Container const & noisy_signal) +{ + using Real = typename Container::value_type; + BOOST_ASSERT_MSG(signal.size() == noisy_signal.size(), + "Signal and noisy_signal must be have the same number of elements."); + if constexpr (std::is_integral::value) + { + double numerator = 0; + double denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + numerator += signal[i]*signal[i]; + denominator += (noisy_signal[i] - signal[i])*(noisy_signal[i] - signal[i]); + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits::infinity(); + } + return numerator/denominator; + } + else if constexpr (boost::math::tools::is_complex_type::value) + + { + using std::norm; + typename Real::value_type numerator = 0; + typename Real::value_type denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + numerator += norm(signal[i]); + denominator += norm(noisy_signal[i] - signal[i]); + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits::infinity(); + } + + return numerator/denominator; + } + else + { + Real numerator = 0; + Real denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + numerator += signal[i]*signal[i]; + denominator += (signal[i] - noisy_signal[i])*(signal[i] - noisy_signal[i]); + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits::infinity(); + } + + return numerator/denominator; + } +} + +template +auto mean_invariant_oracle_snr(Container const & signal, Container const & noisy_signal) +{ + using Real = typename Container::value_type; + BOOST_ASSERT_MSG(signal.size() == noisy_signal.size(), "Signal and noisy signal must be have the same number of elements."); + + Real mu = boost::math::statistics::mean(signal); + Real numerator = 0; + Real denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + Real tmp = signal[i] - mu; + numerator += tmp*tmp; + denominator += (signal[i] - noisy_signal[i])*(signal[i] - noisy_signal[i]); + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits::infinity(); + } + + return numerator/denominator; + +} + +template +auto mean_invariant_oracle_snr_db(Container const & signal, Container const & noisy_signal) +{ + using std::log10; + return 10*log10(boost::math::statistics::mean_invariant_oracle_snr(signal, noisy_signal)); +} + + +// Follows the definition of SNR given in Mallat, A Wavelet Tour of Signal Processing, equation 11.16. +template +auto oracle_snr_db(Container const & signal, Container const & noisy_signal) +{ + using std::log10; + return 10*log10(boost::math::statistics::oracle_snr(signal, noisy_signal)); +} + +// A good reference on the M2M4 estimator: +// D. R. Pauluzzi and N. C. Beaulieu, "A comparison of SNR estimation techniques for the AWGN channel," IEEE Trans. Communications, Vol. 48, No. 10, pp. 1681-1691, 2000. +// A nice python implementation: +// https://github.com/gnuradio/gnuradio/blob/master/gr-digital/examples/snr_estimators.py +template +auto m2m4_snr_estimator(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3) +{ + BOOST_ASSERT_MSG(estimated_signal_kurtosis > 0, "The estimated signal kurtosis must be positive"); + BOOST_ASSERT_MSG(estimated_noise_kurtosis > 0, "The estimated noise kurtosis must be positive."); + using Real = typename std::iterator_traits::value_type; + using std::sqrt; + if constexpr (std::is_floating_point::value || std::numeric_limits::max_exponent) + { + // If we first eliminate N, we obtain the quadratic equation: + // (ka+kw-6)S^2 + 2M2(3-kw)S + kw*M2^2 - M4 = 0 =: a*S^2 + bs*N + cs = 0 + // If we first eliminate S, we obtain the quadratic equation: + // (ka+kw-6)N^2 + 2M2(3-ka)N + ka*M2^2 - M4 = 0 =: a*N^2 + bn*N + cn = 0 + // I believe these equations are totally independent quadratics; + // if one has a complex solution it is not necessarily the case that the other must also. + // However, I can't prove that, so there is a chance that this does unnecessary work. + // Future improvements: There are algorithms which can solve quadratics much more effectively than the naive implementation found here. + // See: https://stackoverflow.com/questions/48979861/numerically-stable-method-for-solving-quadratic-equations/50065711#50065711 + auto [M1, M2, M3, M4] = boost::math::statistics::first_four_moments(first, last); + if (M4 == 0) + { + // The signal is constant. There is no noise: + return std::numeric_limits::infinity(); + } + // Change to notation in Pauluzzi, equation 41: + auto kw = estimated_noise_kurtosis; + auto ka = estimated_signal_kurtosis; + // A common case, since it's the default: + Real a = (ka+kw-6); + Real bs = 2*M2*(3-kw); + Real cs = kw*M2*M2 - M4; + Real bn = 2*M2*(3-ka); + Real cn = ka*M2*M2 - M4; + auto [S0, S1] = boost::math::tools::quadratic_roots(a, bs, cs); + if (S1 > 0) + { + auto N = M2 - S1; + if (N > 0) + { + return S1/N; + } + if (S0 > 0) + { + N = M2 - S0; + if (N > 0) + { + return S0/N; + } + } + } + auto [N0, N1] = boost::math::tools::quadratic_roots(a, bn, cn); + if (N1 > 0) + { + auto S = M2 - N1; + if (S > 0) + { + return S/N1; + } + if (N0 > 0) + { + S = M2 - N0; + if (S > 0) + { + return S/N0; + } + } + } + // This happens distressingly often. It's a limitation of the method. + return std::numeric_limits::quiet_NaN(); + } + else + { + BOOST_ASSERT_MSG(false, "The M2M4 estimator has not been implemented for this type."); + return std::numeric_limits::quiet_NaN(); + } +} + +template +inline auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3) +{ + return m2m4_snr_estimator(noisy_signal.cbegin(), noisy_signal.cend(), estimated_signal_kurtosis, estimated_noise_kurtosis); +} + +template +inline auto m2m4_snr_estimator_db(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3) +{ + using std::log10; + return 10*log10(m2m4_snr_estimator(first, last, estimated_signal_kurtosis, estimated_noise_kurtosis)); +} + + +template +inline auto m2m4_snr_estimator_db(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3) +{ + using std::log10; + return 10*log10(m2m4_snr_estimator(noisy_signal, estimated_signal_kurtosis, estimated_noise_kurtosis)); +} + +} +#endif diff --git a/include/boost/math/statistics/t_test.hpp b/include/boost/math/statistics/t_test.hpp new file mode 100644 index 000000000..98eaa2651 --- /dev/null +++ b/include/boost/math/statistics/t_test.hpp @@ -0,0 +1,21 @@ +// (C) Copyright Nick Thompson 2019. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_STATISTICS_T_TEST_HPP +#define BOOST_MATH_STATISTICS_T_TEST_HPP + +#include + +namespace boost::math::statistics { + +template +auto one_sample_t_test_statistic(RandomAccessContainer const & v, typename RandomAccessContainer::value_type assumed_mean) { + using Real = typename RandomAccessContainer::value_type; + auto [mu, s_sq] = mean_and_sample_variance(v.begin(), v.end()); + return (mu - assumed_mean)/sqrt(s_sq/v.size()); +} + +} +#endif diff --git a/include/boost/math/statistics/univariate_statistics.hpp b/include/boost/math/statistics/univariate_statistics.hpp new file mode 100644 index 000000000..424568014 --- /dev/null +++ b/include/boost/math/statistics/univariate_statistics.hpp @@ -0,0 +1,462 @@ +// (C) Copyright Nick Thompson 2018. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_HPP +#define BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_HPP + +#include +#include +#include + +namespace boost::math::statistics { + +template +auto mean(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean."); + if constexpr (std::is_integral::value) + { + double mu = 0; + double i = 1; + for(auto it = first; it != last; ++it) { + mu = mu + (*it - mu)/i; + i += 1; + } + return mu; + } + else if constexpr (std::is_same_v::iterator_category, std::random_access_iterator_tag>) + { + size_t elements = std::distance(first, last); + Real mu0 = 0; + Real mu1 = 0; + Real mu2 = 0; + Real mu3 = 0; + Real i = 1; + auto end = last - (elements % 4); + for(auto it = first; it != end; it += 4) { + Real inv = Real(1)/i; + Real tmp0 = (*it - mu0); + Real tmp1 = (*(it+1) - mu1); + Real tmp2 = (*(it+2) - mu2); + Real tmp3 = (*(it+3) - mu3); + // please generate a vectorized fma here + mu0 += tmp0*inv; + mu1 += tmp1*inv; + mu2 += tmp2*inv; + mu3 += tmp3*inv; + i += 1; + } + Real num1 = Real(elements - (elements %4))/Real(4); + Real num2 = num1 + Real(elements % 4); + + for (auto it = end; it != last; ++it) + { + mu3 += (*it-mu3)/i; + i += 1; + } + + return (num1*(mu0+mu1+mu2) + num2*mu3)/Real(elements); + } + else + { + auto it = first; + Real mu = *it; + Real i = 2; + while(++it != last) + { + mu += (*it - mu)/i; + i += 1; + } + return mu; + } +} + +template +inline auto mean(Container const & v) +{ + return mean(v.cbegin(), v.cend()); +} + +template +auto variance(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance."); + // Higham, Accuracy and Stability, equation 1.6a and 1.6b: + if constexpr (std::is_integral::value) + { + double M = *first; + double Q = 0; + double k = 2; + for (auto it = std::next(first); it != last; ++it) + { + double tmp = *it - M; + Q = Q + ((k-1)*tmp*tmp)/k; + M = M + tmp/k; + k += 1; + } + return Q/(k-1); + } + else + { + Real M = *first; + Real Q = 0; + Real k = 2; + for (auto it = std::next(first); it != last; ++it) + { + Real tmp = (*it - M)/k; + Q += k*(k-1)*tmp*tmp; + M += tmp; + k += 1; + } + return Q/(k-1); + } +} + +template +inline auto variance(Container const & v) +{ + return variance(v.cbegin(), v.cend()); +} + +template +auto sample_variance(ForwardIterator first, ForwardIterator last) +{ + size_t n = std::distance(first, last); + BOOST_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance."); + return n*variance(first, last)/(n-1); +} + +template +inline auto sample_variance(Container const & v) +{ + return sample_variance(v.cbegin(), v.cend()); +} + +template +auto mean_and_sample_variance(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance."); + // Higham, Accuracy and Stability, equation 1.6a and 1.6b: + if constexpr (std::is_integral::value) + { + double M = *first; + double Q = 0; + double k = 2; + for (auto it = std::next(first); it != last; ++it) + { + double tmp = *it - M; + Q = Q + ((k-1)*tmp*tmp)/k; + M = M + tmp/k; + k += 1; + } + return std::pair{M, Q/(k-2)}; + } + else + { + Real M = *first; + Real Q = 0; + Real k = 2; + for (auto it = std::next(first); it != last; ++it) + { + Real tmp = (*it - M)/k; + Q += k*(k-1)*tmp*tmp; + M += tmp; + k += 1; + } + return std::pair{M, Q/(k-2)}; + } +} + + + +// Follows equation 1.5 of: +// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf +template +auto skewness(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute skewness."); + if constexpr (std::is_integral::value) + { + double M1 = *first; + double M2 = 0; + double M3 = 0; + double n = 2; + for (auto it = std::next(first); it != last; ++it) + { + double delta21 = *it - M1; + double tmp = delta21/n; + M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 = M2 + tmp*(n-1)*delta21; + M1 = M1 + tmp; + n += 1; + } + + double var = M2/(n-1); + if (var == 0) + { + // The limit is technically undefined, but the interpretation here is clear: + // A constant dataset has no skewness. + return double(0); + } + double skew = M3/(M2*sqrt(var)); + return skew; + } + else + { + Real M1 = *first; + Real M2 = 0; + Real M3 = 0; + Real n = 2; + for (auto it = std::next(first); it != last; ++it) + { + Real delta21 = *it - M1; + Real tmp = delta21/n; + M3 += tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 += tmp*(n-1)*delta21; + M1 += tmp; + n += 1; + } + + Real var = M2/(n-1); + if (var == 0) + { + // The limit is technically undefined, but the interpretation here is clear: + // A constant dataset has no skewness. + return Real(0); + } + Real skew = M3/(M2*sqrt(var)); + return skew; + } +} + +template +inline auto skewness(Container const & v) +{ + return skewness(v.cbegin(), v.cend()); +} + +// Follows equation 1.5/1.6 of: +// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf +template +auto first_four_moments(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments."); + if constexpr (std::is_integral::value) + { + double M1 = *first; + double M2 = 0; + double M3 = 0; + double M4 = 0; + double n = 2; + for (auto it = std::next(first); it != last; ++it) + { + double delta21 = *it - M1; + double tmp = delta21/n; + M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3); + M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 = M2 + tmp*(n-1)*delta21; + M1 = M1 + tmp; + n += 1; + } + + return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1)); + } + else + { + Real M1 = *first; + Real M2 = 0; + Real M3 = 0; + Real M4 = 0; + Real n = 2; + for (auto it = std::next(first); it != last; ++it) + { + Real delta21 = *it - M1; + Real tmp = delta21/n; + M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3); + M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 = M2 + tmp*(n-1)*delta21; + M1 = M1 + tmp; + n += 1; + } + + return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1)); + } +} + +template +inline auto first_four_moments(Container const & v) +{ + return first_four_moments(v.cbegin(), v.cend()); +} + + +// Follows equation 1.6 of: +// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf +template +auto kurtosis(ForwardIterator first, ForwardIterator last) +{ + auto [M1, M2, M3, M4] = first_four_moments(first, last); + if (M2 == 0) + { + return M2; + } + return M4/(M2*M2); +} + +template +inline auto kurtosis(Container const & v) +{ + return kurtosis(v.cbegin(), v.cend()); +} + +template +auto excess_kurtosis(ForwardIterator first, ForwardIterator last) +{ + return kurtosis(first, last) - 3; +} + +template +inline auto excess_kurtosis(Container const & v) +{ + return excess_kurtosis(v.cbegin(), v.cend()); +} + + +template +auto median(RandomAccessIterator first, RandomAccessIterator last) +{ + size_t num_elems = std::distance(first, last); + BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined."); + if (num_elems & 1) + { + auto middle = first + (num_elems - 1)/2; + std::nth_element(first, middle, last); + return *middle; + } + else + { + auto middle = first + num_elems/2 - 1; + std::nth_element(first, middle, last); + std::nth_element(middle, middle+1, last); + return (*middle + *(middle+1))/2; + } +} + + +template +inline auto median(RandomAccessContainer & v) +{ + return median(v.begin(), v.end()); +} + +template +auto gini_coefficient(RandomAccessIterator first, RandomAccessIterator last) +{ + using Real = typename std::iterator_traits::value_type; + BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); + + std::sort(first, last); + if constexpr (std::is_integral::value) + { + double i = 1; + double num = 0; + double denom = 0; + for (auto it = first; it != last; ++it) + { + num += *it*i; + denom += *it; + ++i; + } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if (denom == 0) + { + return double(0); + } + + return ((2*num)/denom - i)/(i-1); + } + else + { + Real i = 1; + Real num = 0; + Real denom = 0; + for (auto it = first; it != last; ++it) + { + num += *it*i; + denom += *it; + ++i; + } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if (denom == 0) + { + return Real(0); + } + + return ((2*num)/denom - i)/(i-1); + } +} + +template +inline auto gini_coefficient(RandomAccessContainer & v) +{ + return gini_coefficient(v.begin(), v.end()); +} + +template +inline auto sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last) +{ + size_t n = std::distance(first, last); + return n*gini_coefficient(first, last)/(n-1); +} + +template +inline auto sample_gini_coefficient(RandomAccessContainer & v) +{ + return sample_gini_coefficient(v.begin(), v.end()); +} + +template +auto median_absolute_deviation(RandomAccessIterator first, RandomAccessIterator last, typename std::iterator_traits::value_type center=std::numeric_limits::value_type>::quiet_NaN()) +{ + using std::abs; + using Real = typename std::iterator_traits::value_type; + using std::isnan; + if (isnan(center)) + { + center = boost::math::statistics::median(first, last); + } + size_t num_elems = std::distance(first, last); + BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined."); + auto comparator = [¢er](Real a, Real b) { return abs(a-center) < abs(b-center);}; + if (num_elems & 1) + { + auto middle = first + (num_elems - 1)/2; + std::nth_element(first, middle, last, comparator); + return abs(*middle); + } + else + { + auto middle = first + num_elems/2 - 1; + std::nth_element(first, middle, last, comparator); + std::nth_element(middle, middle+1, last, comparator); + return (abs(*middle) + abs(*(middle+1)))/abs(static_cast(2)); + } +} + +template +inline auto median_absolute_deviation(RandomAccessContainer & v, typename RandomAccessContainer::value_type center=std::numeric_limits::quiet_NaN()) +{ + return median_absolute_deviation(v.begin(), v.end(), center); +} + +} +#endif diff --git a/include/boost/math/tools/bivariate_statistics.hpp b/include/boost/math/tools/bivariate_statistics.hpp index 790c3d961..a15e28b49 100644 --- a/include/boost/math/tools/bivariate_statistics.hpp +++ b/include/boost/math/tools/bivariate_statistics.hpp @@ -9,7 +9,9 @@ #include #include #include +#include +BOOST_HEADER_DEPRECATED(""); namespace boost{ namespace math{ namespace tools { diff --git a/include/boost/math/tools/signal_statistics.hpp b/include/boost/math/tools/signal_statistics.hpp index 1ff2dd283..9dc0fc4bf 100644 --- a/include/boost/math/tools/signal_statistics.hpp +++ b/include/boost/math/tools/signal_statistics.hpp @@ -11,8 +11,10 @@ #include #include #include -#include +#include +#include +BOOST_HEADER_DEPRECATED(""); namespace boost::math::tools { diff --git a/include/boost/math/tools/univariate_statistics.hpp b/include/boost/math/tools/univariate_statistics.hpp index 61b7e5d68..c6dc294df 100644 --- a/include/boost/math/tools/univariate_statistics.hpp +++ b/include/boost/math/tools/univariate_statistics.hpp @@ -9,7 +9,9 @@ #include #include #include +#include +BOOST_HEADER_DEPRECATED(""); namespace boost::math::tools { diff --git a/test/bivariate_statistics_test.cpp b/test/bivariate_statistics_test.cpp index 528df4cd2..82feeb727 100644 --- a/test/bivariate_statistics_test.cpp +++ b/test/bivariate_statistics_test.cpp @@ -15,8 +15,8 @@ #include #include #include -#include -#include +#include +#include #include #include @@ -32,8 +32,8 @@ using boost::multiprecision::cpp_complex_50; * 5) Does it work with complex data if complex data is sensible? */ -using boost::math::tools::means_and_covariance; -using boost::math::tools::covariance; +using boost::math::statistics::means_and_covariance; +using boost::math::statistics::covariance; template void test_covariance() @@ -94,10 +94,10 @@ void test_covariance() v[i] = (Real) dis(gen); } - Real mu_u = boost::math::tools::mean(u); - Real mu_v = boost::math::tools::mean(v); - Real sigma_u_sq = boost::math::tools::variance(u); - Real sigma_v_sq = boost::math::tools::variance(v); + Real mu_u = boost::math::statistics::mean(u); + Real mu_v = boost::math::statistics::mean(v); + Real sigma_u_sq = boost::math::statistics::variance(u); + Real sigma_v_sq = boost::math::statistics::variance(v); auto [mu_u_, mu_v_, cov_uv] = means_and_covariance(u, v); BOOST_TEST(abs(mu_u - mu_u_) < tol); @@ -116,7 +116,7 @@ void test_covariance() template void test_correlation_coefficient() { - using boost::math::tools::correlation_coefficient; + using boost::math::statistics::correlation_coefficient; Real tol = std::numeric_limits::epsilon(); std::vector u{1}; diff --git a/test/signal_statistics_test.cpp b/test/signal_statistics_test.cpp index 8fe98b494..79f7b1a1c 100644 --- a/test/signal_statistics_test.cpp +++ b/test/signal_statistics_test.cpp @@ -13,8 +13,8 @@ #include #include #include -#include -#include +#include +#include #include #include @@ -39,24 +39,24 @@ void test_hoyer_sparsity() using std::sqrt; Real tol = 5*std::numeric_limits::epsilon(); std::vector v{1,0,0}; - Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); + Real hs = boost::math::statistics::hoyer_sparsity(v.begin(), v.end()); BOOST_TEST(abs(hs - 1) < tol); - hs = boost::math::tools::hoyer_sparsity(v); + hs = boost::math::statistics::hoyer_sparsity(v); BOOST_TEST(abs(hs - 1) < tol); // Does it work with constant iterators? - hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); + hs = boost::math::statistics::hoyer_sparsity(v.cbegin(), v.cend()); BOOST_TEST(abs(hs - 1) < tol); v[0] = 1; v[1] = 1; v[2] = 1; - hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); + hs = boost::math::statistics::hoyer_sparsity(v.cbegin(), v.cend()); BOOST_TEST(abs(hs) < tol); std::array w{1,1,1}; - hs = boost::math::tools::hoyer_sparsity(w); + hs = boost::math::statistics::hoyer_sparsity(w); BOOST_TEST(abs(hs) < tol); // Now some statistics: @@ -69,13 +69,13 @@ void test_hoyer_sparsity() for (size_t i = 0; i < v.size(); ++i) { v[i] = dis(gen); } - hs = boost::math::tools::hoyer_sparsity(v); + hs = boost::math::statistics::hoyer_sparsity(v); Real expected = (1.0 - boost::math::constants::root_three()/2)/(1.0 - 1.0/sqrt(v.size())); BOOST_TEST(abs(expected - hs) < 0.01); // Does it work with a forward list? std::forward_list u1{1, 1, 1}; - hs = boost::math::tools::hoyer_sparsity(u1); + hs = boost::math::statistics::hoyer_sparsity(u1); BOOST_TEST(abs(hs) < tol); // Does it work with a boost ublas vector? @@ -83,7 +83,7 @@ void test_hoyer_sparsity() u2[0] = 1; u2[1] = 1; u2[2] = 1; - hs = boost::math::tools::hoyer_sparsity(u2); + hs = boost::math::statistics::hoyer_sparsity(u2); BOOST_TEST(abs(hs) < tol); } @@ -94,13 +94,13 @@ void test_integer_hoyer_sparsity() using std::sqrt; double tol = 5*std::numeric_limits::epsilon(); std::vector v{1,0,0}; - double hs = boost::math::tools::hoyer_sparsity(v); + double hs = boost::math::statistics::hoyer_sparsity(v); BOOST_TEST(abs(hs - 1) < tol); v[0] = 1; v[1] = 1; v[2] = 1; - hs = boost::math::tools::hoyer_sparsity(v); + hs = boost::math::statistics::hoyer_sparsity(v); BOOST_TEST(abs(hs) < tol); } @@ -112,21 +112,21 @@ void test_complex_hoyer_sparsity() using std::sqrt; Real tol = 5*std::numeric_limits::epsilon(); std::vector v{{0,1}, {0, 0}, {0,0}}; - Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end()); + Real hs = boost::math::statistics::hoyer_sparsity(v.begin(), v.end()); BOOST_TEST(abs(hs - 1) < tol); - hs = boost::math::tools::hoyer_sparsity(v); + hs = boost::math::statistics::hoyer_sparsity(v); BOOST_TEST(abs(hs - 1) < tol); // Does it work with constant iterators? - hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); + hs = boost::math::statistics::hoyer_sparsity(v.cbegin(), v.cend()); BOOST_TEST(abs(hs - 1) < tol); // All are the same magnitude: v[0] = {0, 1}; v[1] = {1, 0}; v[2] = {0,-1}; - hs = boost::math::tools::hoyer_sparsity(v.cbegin(), v.cend()); + hs = boost::math::statistics::hoyer_sparsity(v.cbegin(), v.cend()); BOOST_TEST(abs(hs) < tol); } @@ -134,8 +134,8 @@ void test_complex_hoyer_sparsity() template void test_absolute_gini_coefficient() { - using boost::math::tools::absolute_gini_coefficient; - using boost::math::tools::sample_absolute_gini_coefficient; + using boost::math::statistics::absolute_gini_coefficient; + using boost::math::statistics::sample_absolute_gini_coefficient; Real tol = std::numeric_limits::epsilon(); std::vector v{-1,0,0}; Real gini = sample_absolute_gini_coefficient(v.begin(), v.end()); @@ -207,8 +207,8 @@ void test_oracle_snr() std::vector noisy_signal = signal; noisy_signal[0] += 1; - Real snr = boost::math::tools::oracle_snr(signal, noisy_signal); - Real snr_db = boost::math::tools::oracle_snr_db(signal, noisy_signal); + Real snr = boost::math::statistics::oracle_snr(signal, noisy_signal); + Real snr_db = boost::math::statistics::oracle_snr_db(signal, noisy_signal); BOOST_TEST(abs(snr - length) < tol); BOOST_TEST(abs(snr_db - 10*log10(length)) < tol); } @@ -222,8 +222,8 @@ void test_integer_oracle_snr() std::vector noisy_signal = signal; noisy_signal[0] += 1; - double snr = boost::math::tools::oracle_snr(signal, noisy_signal); - double snr_db = boost::math::tools::oracle_snr_db(signal, noisy_signal); + double snr = boost::math::statistics::oracle_snr(signal, noisy_signal); + double snr_db = boost::math::statistics::oracle_snr_db(signal, noisy_signal); BOOST_TEST(abs(snr - length) < tol); BOOST_TEST(abs(snr_db - 10*log10(length)) < tol); } @@ -240,8 +240,8 @@ void test_complex_oracle_snr() std::vector noisy_signal = signal; noisy_signal[0] += Complex(1,0); - Real snr = boost::math::tools::oracle_snr(signal, noisy_signal); - Real snr_db = boost::math::tools::oracle_snr_db(signal, noisy_signal); + Real snr = boost::math::statistics::oracle_snr(signal, noisy_signal); + Real snr_db = boost::math::statistics::oracle_snr_db(signal, noisy_signal); BOOST_TEST(abs(snr - length) < tol); BOOST_TEST(abs(snr_db - 10*log10(length)) < tol); } @@ -262,8 +262,8 @@ void test_m2m4_snr_estimator() } // Kurtosis of a sine wave is 1.5: - auto m2m4_db = boost::math::tools::m2m4_snr_estimator_db(x, 1.5); - auto oracle_snr_db = boost::math::tools::mean_invariant_oracle_snr_db(signal, x); + auto m2m4_db = boost::math::statistics::m2m4_snr_estimator_db(x, 1.5); + auto oracle_snr_db = boost::math::statistics::mean_invariant_oracle_snr_db(signal, x); BOOST_TEST(abs(m2m4_db - oracle_snr_db) < 0.2); std::uniform_real_distribution uni_dis{-1,1}; @@ -273,8 +273,8 @@ void test_m2m4_snr_estimator() } // Kurtosis of continuous uniform distribution over [-1,1] is 1.8: - m2m4_db = boost::math::tools::m2m4_snr_estimator_db(x, 1.5, 1.8); - oracle_snr_db = boost::math::tools::mean_invariant_oracle_snr_db(signal, x); + m2m4_db = boost::math::statistics::m2m4_snr_estimator_db(x, 1.5, 1.8); + oracle_snr_db = boost::math::statistics::mean_invariant_oracle_snr_db(signal, x); // The performance depends on the exact numbers generated by the distribution, but this isn't bad: BOOST_TEST(abs(m2m4_db - oracle_snr_db) < 0.2); @@ -282,12 +282,12 @@ void test_m2m4_snr_estimator() // If x has snr y, then kx should have snr y. Real ka = 1.5; Real kw = 1.8; - auto m2m4 = boost::math::tools::m2m4_snr_estimator(x.begin(), x.end(), ka, kw); + auto m2m4 = boost::math::statistics::m2m4_snr_estimator(x.begin(), x.end(), ka, kw); for(size_t i = 0; i < x.size(); ++i) { x[i] *= 4096; } - auto m2m4_2 = boost::math::tools::m2m4_snr_estimator(x.begin(), x.end(), ka, kw); + auto m2m4_2 = boost::math::statistics::m2m4_snr_estimator(x.begin(), x.end(), ka, kw); BOOST_TEST(abs(m2m4 - m2m4_2) < tol); } diff --git a/test/test_t_test.cpp b/test/test_t_test.cpp new file mode 100644 index 000000000..dc5111319 --- /dev/null +++ b/test/test_t_test.cpp @@ -0,0 +1,38 @@ +/* + * Copyright Nick Thompson, 2019 + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#include "math_unit_test.hpp" +#include +#include + +void test_agreement_with_mathematica() +{ + // Reproduce via: + //data = RandomReal[NormalDistribution[], 128]; + //NumberForm[data, 16] + //NumberForm[TTest[data, 0.0, "TestStatistic"], 16] + //NumberForm[TTest[data, 0.0, "PValue"], 16] + std::vector v{1.270498757865948,-0.7097895555907483,1.151445006538434,0.915648732663531,-0.7480131480881454,-0.6837323220203325,2.362877076786142,2.438188734959438,0.1644154283470843,-0.980857299461513,-0.1448627006957758,0.04901437671768214,-0.3895499730435337,1.412356512608596,-0.3865249523080916,-0.6159322168089271,-0.1865107372684944,-0.152509328597876,1.142603106429423,-1.358368106645048,0.2268475747885975,0.4029249376986136,0.1167407378850566,0.05532794835680535,-1.928794899326586,0.6496438708570567,0.269012797381103,-0.908168796067257,-0.6194990582309883,1.606256899489664,-0.903964536847682,-1.375889704354273,0.04906080087803202,0.2039077019578547,-0.4907377045195846,-0.4781929001716083,-0.2289802280011548,-1.339055086640687,-0.3120811524451416,0.06142580393246503,-0.140496390441262,-0.6482824149508374,-0.2944027976542998,1.619416512991051,0.6285648262611375,1.312636016409526,-1.109965359363169,-0.774547681114892,-0.344875897907528,0.816762481553918,0.1500701005574458,0.807790349338737,-0.2052962007348396,1.057657121384678,0.836142529983228,0.3432803448381389,-0.01268905497569333,-1.144036865790547,-0.4530056923174255,-0.3061160863293071,-0.02963689772198411,-1.33671649419749,-0.06052105439831394,0.973282554859066,-1.643904288065807,-1.0293884110541,-0.5291066659852803,-0.3294227039691209,0.002993387508002654,0.2248580674319177,0.574521694409057,1.041337304293327,0.4078548122453237,0.1112225876991191,-0.6448072259486091,-0.3051345048257077,0.089593933234481,0.4611768867673915,0.7644315320471444,-0.341247840010495,0.0326958894744302,-0.05121900335567795,-0.06019531049352196,1.71234441194424,-0.04175157932686885,0.769694813995503,-1.080913235981393,0.5989354496438777,-0.84416230123901,0.03165655009402087,-0.7502374585144876,-2.734748382516766,1.541068679878993,0.1054620771416859,-0.6543692934553028,1.499220114211276,-0.342006571062175,-0.2053132127077213,0.5457125644270833,-0.7956250897267784,0.7320742348115779,0.4674423735122585,-0.3087396963145776,-1.53764162258267,0.2455449906251891,0.3795993803250636,-0.1195480230909131,0.137639511052913,0.931721348902457,0.06704522870668304,-0.03773030445251862,0.3888322348695948,-0.06366757901233728,0.5563758371320388,-0.7918177216642121,-0.7566297580399533,-0.3740377818446702,-0.6065664299451118,-0.2341124269010213,2.028052675971757,0.378550889251416,0.816911727914731,1.162652387697876,-0.3853743867873177,1.196620648443396,0.01265660717000745,1.816698960862263,-0.972941421015463}; + + + + + double expected_statistic = 0.4587075249160456; + double expected_pvalue = 0.6472282548266728; + + double computed_statistic = boost::math::statistics::one_sample_t_test_statistic(v, 0.0); + std::cout << "Expected statistic = " << expected_statistic << std::hexfloat << " = " << expected_statistic << "\n"; + std::cout << std::defaultfloat; + std::cout << "Computed statistic = " << computed_statistic << std::hexfloat << " = " << computed_statistic << "\n"; +} + + +int main() +{ + test_agreement_with_mathematica(); + return boost::math::test::report_errors(); +} diff --git a/test/univariate_statistics_test.cpp b/test/univariate_statistics_test.cpp index 7552480b3..5cee9888f 100644 --- a/test/univariate_statistics_test.cpp +++ b/test/univariate_statistics_test.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -104,23 +104,23 @@ void test_integer_mean() { double tol = 100*std::numeric_limits::epsilon(); std::vector v{1,2,3,4,5}; - double mu = boost::math::tools::mean(v); + double mu = boost::math::statistics::mean(v); BOOST_TEST(abs(mu - 3) < tol); // Work with std::array? std::array w{1,2,3,4,5}; - mu = boost::math::tools::mean(w); + mu = boost::math::statistics::mean(w); BOOST_TEST(abs(mu - 3) < tol); v = generate_random_vector(global_size, global_seed); Z scale = 2; - double m1 = scale*boost::math::tools::mean(v); + double m1 = scale*boost::math::statistics::mean(v); for (auto & x : v) { x *= scale; } - double m2 = boost::math::tools::mean(v); + double m2 = boost::math::statistics::mean(v); BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); } @@ -140,29 +140,29 @@ void test_mean() { Real tol = std::numeric_limits::epsilon(); std::vector v{1,2,3,4,5}; - Real mu = boost::math::tools::mean(v.begin(), v.end()); + Real mu = boost::math::statistics::mean(v.begin(), v.end()); BOOST_TEST(abs(mu - 3) < tol); // Does range call work? - mu = boost::math::tools::mean(v); + mu = boost::math::statistics::mean(v); BOOST_TEST(abs(mu - 3) < tol); // Can we successfully average only part of the vector? - mu = boost::math::tools::mean(v.begin(), v.begin() + 3); + mu = boost::math::statistics::mean(v.begin(), v.begin() + 3); BOOST_TEST(abs(mu - 2) < tol); // Does it work when we const qualify? - mu = boost::math::tools::mean(v.cbegin(), v.cend()); + mu = boost::math::statistics::mean(v.cbegin(), v.cend()); BOOST_TEST(abs(mu - 3) < tol); // Does it work for std::array? std::array u{1,2,3,4,5,6,7}; - mu = boost::math::tools::mean(u.begin(), u.end()); + mu = boost::math::statistics::mean(u.begin(), u.end()); BOOST_TEST(abs(mu - 4) < tol); // Does it work for a forward iterator? std::forward_list l{1,2,3,4,5,6,7}; - mu = boost::math::tools::mean(l.begin(), l.end()); + mu = boost::math::statistics::mean(l.begin(), l.end()); BOOST_TEST(abs(mu - 4) < tol); // Does it work with ublas vectors? @@ -171,17 +171,17 @@ void test_mean() { w[i] = i+1; } - mu = boost::math::tools::mean(w.cbegin(), w.cend()); + mu = boost::math::statistics::mean(w.cbegin(), w.cend()); BOOST_TEST(abs(mu - 4) < tol); v = generate_random_vector(global_size, global_seed); Real scale = 2; - Real m1 = scale*boost::math::tools::mean(v); + Real m1 = scale*boost::math::statistics::mean(v); for (auto & x : v) { x *= scale; } - Real m2 = boost::math::tools::mean(v); + Real m2 = boost::math::statistics::mean(v); BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); // Stress test: @@ -189,7 +189,7 @@ void test_mean() { v = generate_random_vector(i, 12803); auto naive_ = naive_mean(v); - auto higham_ = boost::math::tools::mean(v); + auto higham_ = boost::math::statistics::mean(v); if (abs(higham_ - naive_) >= 100*tol*abs(naive_)) { std::cout << std::hexfloat; @@ -208,12 +208,12 @@ void test_complex_mean() typedef typename Complex::value_type Real; Real tol = std::numeric_limits::epsilon(); std::vector v{{0,1},{0,2},{0,3},{0,4},{0,5}}; - auto mu = boost::math::tools::mean(v.begin(), v.end()); + auto mu = boost::math::statistics::mean(v.begin(), v.end()); BOOST_TEST(abs(mu.imag() - 3) < tol); BOOST_TEST(abs(mu.real()) < tol); // Does range work? - mu = boost::math::tools::mean(v); + mu = boost::math::statistics::mean(v); BOOST_TEST(abs(mu.imag() - 3) < tol); BOOST_TEST(abs(mu.real()) < tol); } @@ -223,38 +223,38 @@ void test_variance() { Real tol = std::numeric_limits::epsilon(); std::vector v{1,1,1,1,1,1}; - Real sigma_sq = boost::math::tools::variance(v.begin(), v.end()); + Real sigma_sq = boost::math::statistics::variance(v.begin(), v.end()); BOOST_TEST(abs(sigma_sq) < tol); - sigma_sq = boost::math::tools::variance(v); + sigma_sq = boost::math::statistics::variance(v); BOOST_TEST(abs(sigma_sq) < tol); - Real s_sq = boost::math::tools::sample_variance(v); + Real s_sq = boost::math::statistics::sample_variance(v); BOOST_TEST(abs(s_sq) < tol); std::vector u{1}; - sigma_sq = boost::math::tools::variance(u.cbegin(), u.cend()); + sigma_sq = boost::math::statistics::variance(u.cbegin(), u.cend()); BOOST_TEST(abs(sigma_sq) < tol); std::array w{0,1,0,1,0,1,0,1}; - sigma_sq = boost::math::tools::variance(w.begin(), w.end()); + sigma_sq = boost::math::statistics::variance(w.begin(), w.end()); BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); - sigma_sq = boost::math::tools::variance(w); + sigma_sq = boost::math::statistics::variance(w); BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); std::forward_list l{0,1,0,1,0,1,0,1}; - sigma_sq = boost::math::tools::variance(l.begin(), l.end()); + sigma_sq = boost::math::statistics::variance(l.begin(), l.end()); BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); v = generate_random_vector(global_size, global_seed); Real scale = 2; - Real m1 = scale*scale*boost::math::tools::variance(v); + Real m1 = scale*scale*boost::math::statistics::variance(v); for (auto & x : v) { x *= scale; } - Real m2 = boost::math::tools::variance(v); + Real m2 = boost::math::statistics::variance(v); BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); // Wikipedia example for a variance of N sided die: @@ -268,7 +268,7 @@ void test_variance() v[i] = i + 1; } - sigma_sq = boost::math::tools::variance(v); + sigma_sq = boost::math::statistics::variance(v); BOOST_TEST(abs(sigma_sq - (n*n-1)/Real(12)) <= tol*sigma_sq); } @@ -280,21 +280,21 @@ void test_integer_variance() { double tol = std::numeric_limits::epsilon(); std::vector v{1,1,1,1,1,1}; - double sigma_sq = boost::math::tools::variance(v); + double sigma_sq = boost::math::statistics::variance(v); BOOST_TEST(abs(sigma_sq) < tol); std::forward_list l{0,1,0,1,0,1,0,1}; - sigma_sq = boost::math::tools::variance(l.begin(), l.end()); + sigma_sq = boost::math::statistics::variance(l.begin(), l.end()); BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol); v = generate_random_vector(global_size, global_seed); Z scale = 2; - double m1 = scale*scale*boost::math::tools::variance(v); + double m1 = scale*scale*boost::math::statistics::variance(v); for (auto & x : v) { x *= scale; } - double m2 = boost::math::tools::variance(v); + double m2 = boost::math::statistics::variance(v); BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); } @@ -303,32 +303,32 @@ void test_integer_skewness() { double tol = std::numeric_limits::epsilon(); std::vector v{1,1,1}; - double skew = boost::math::tools::skewness(v); + double skew = boost::math::statistics::skewness(v); BOOST_TEST(abs(skew) < tol); // Dataset is symmetric about the mean: v = {1,2,3,4,5}; - skew = boost::math::tools::skewness(v); + skew = boost::math::statistics::skewness(v); BOOST_TEST(abs(skew) < tol); v = {0,0,0,0,5}; // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2 - skew = boost::math::tools::skewness(v); + skew = boost::math::statistics::skewness(v); BOOST_TEST(abs(skew - 3.0/2.0) < tol); std::forward_list v2{0,0,0,0,5}; - skew = boost::math::tools::skewness(v); + skew = boost::math::statistics::skewness(v); BOOST_TEST(abs(skew - 3.0/2.0) < tol); v = generate_random_vector(global_size, global_seed); Z scale = 2; - double m1 = boost::math::tools::skewness(v); + double m1 = boost::math::statistics::skewness(v); for (auto & x : v) { x *= scale; } - double m2 = boost::math::tools::skewness(v); + double m2 = boost::math::statistics::skewness(v); BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); } @@ -338,35 +338,35 @@ void test_skewness() { Real tol = std::numeric_limits::epsilon(); std::vector v{1,1,1}; - Real skew = boost::math::tools::skewness(v); + Real skew = boost::math::statistics::skewness(v); BOOST_TEST(abs(skew) < tol); // Dataset is symmetric about the mean: v = {1,2,3,4,5}; - skew = boost::math::tools::skewness(v); + skew = boost::math::statistics::skewness(v); BOOST_TEST(abs(skew) < tol); v = {0,0,0,0,5}; // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2 - skew = boost::math::tools::skewness(v); + skew = boost::math::statistics::skewness(v); BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol); std::array w1{0,0,0,0,5}; - skew = boost::math::tools::skewness(w1); + skew = boost::math::statistics::skewness(w1); BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol); std::forward_list w2{0,0,0,0,5}; - skew = boost::math::tools::skewness(w2); + skew = boost::math::statistics::skewness(w2); BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol); v = generate_random_vector(global_size, global_seed); Real scale = 2; - Real m1 = boost::math::tools::skewness(v); + Real m1 = boost::math::statistics::skewness(v); for (auto & x : v) { x *= scale; } - Real m2 = boost::math::tools::skewness(v); + Real m2 = boost::math::statistics::skewness(v); BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); } @@ -375,25 +375,25 @@ void test_kurtosis() { Real tol = std::numeric_limits::epsilon(); std::vector v{1,1,1}; - Real kurt = boost::math::tools::kurtosis(v); + Real kurt = boost::math::statistics::kurtosis(v); BOOST_TEST(abs(kurt) < tol); v = {1,2,3,4,5}; // mu =1, sigma^2 = 2, kurtosis = 17/10 - kurt = boost::math::tools::kurtosis(v); + kurt = boost::math::statistics::kurtosis(v); BOOST_TEST(abs(kurt - Real(17)/Real(10)) < tol); v = {0,0,0,0,5}; // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2, kurtosis = 13/4 - kurt = boost::math::tools::kurtosis(v); + kurt = boost::math::statistics::kurtosis(v); BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol); std::array v1{0,0,0,0,5}; - kurt = boost::math::tools::kurtosis(v1); + kurt = boost::math::statistics::kurtosis(v1); BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol); std::forward_list v2{0,0,0,0,5}; - kurt = boost::math::tools::kurtosis(v2); + kurt = boost::math::statistics::kurtosis(v2); BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol); std::vector v3(10000); @@ -402,24 +402,24 @@ void test_kurtosis() for (size_t i = 0; i < v3.size(); ++i) { v3[i] = dis(gen); } - kurt = boost::math::tools::kurtosis(v3); + kurt = boost::math::statistics::kurtosis(v3); BOOST_TEST(abs(kurt - 3) < 0.1); std::uniform_real_distribution udis(-1, 3); for (size_t i = 0; i < v3.size(); ++i) { v3[i] = udis(gen); } - auto excess_kurtosis = boost::math::tools::excess_kurtosis(v3); + auto excess_kurtosis = boost::math::statistics::excess_kurtosis(v3); BOOST_TEST(abs(excess_kurtosis + 6.0/5.0) < 0.2); v = generate_random_vector(global_size, global_seed); Real scale = 2; - Real m1 = boost::math::tools::kurtosis(v); + Real m1 = boost::math::statistics::kurtosis(v); for (auto & x : v) { x *= scale; } - Real m2 = boost::math::tools::kurtosis(v); + Real m2 = boost::math::statistics::kurtosis(v); BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); // This test only passes when there are a large number of samples. @@ -432,7 +432,7 @@ void test_kurtosis() //for (size_t i = 0; i < v3.size(); ++i) { // v3[i] = edis(gen); //} - //excess_kurtosis = boost::math::tools::kurtosis(v3) - 3; + //excess_kurtosis = boost::math::statistics::kurtosis(v3) - 3; //BOOST_TEST(abs(excess_kurtosis - 6.0) < 0.2); } @@ -441,27 +441,27 @@ void test_integer_kurtosis() { double tol = std::numeric_limits::epsilon(); std::vector v{1,1,1}; - double kurt = boost::math::tools::kurtosis(v); + double kurt = boost::math::statistics::kurtosis(v); BOOST_TEST(abs(kurt) < tol); v = {1,2,3,4,5}; // mu =1, sigma^2 = 2, kurtosis = 17/10 - kurt = boost::math::tools::kurtosis(v); + kurt = boost::math::statistics::kurtosis(v); BOOST_TEST(abs(kurt - 17.0/10.0) < tol); v = {0,0,0,0,5}; // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2, kurtosis = 13/4 - kurt = boost::math::tools::kurtosis(v); + kurt = boost::math::statistics::kurtosis(v); BOOST_TEST(abs(kurt - 13.0/4.0) < tol); v = generate_random_vector(global_size, global_seed); Z scale = 2; - double m1 = boost::math::tools::kurtosis(v); + double m1 = boost::math::statistics::kurtosis(v); for (auto & x : v) { x *= scale; } - double m2 = boost::math::tools::kurtosis(v); + double m2 = boost::math::statistics::kurtosis(v); BOOST_TEST(abs(m1 - m2) < tol*abs(m1)); } @@ -470,14 +470,14 @@ void test_first_four_moments() { Real tol = 10*std::numeric_limits::epsilon(); std::vector v{1,1,1}; - auto [M1_1, M2_1, M3_1, M4_1] = boost::math::tools::first_four_moments(v); + auto [M1_1, M2_1, M3_1, M4_1] = boost::math::statistics::first_four_moments(v); BOOST_TEST(abs(M1_1 - 1) < tol); BOOST_TEST(abs(M2_1) < tol); BOOST_TEST(abs(M3_1) < tol); BOOST_TEST(abs(M4_1) < tol); v = {1, 2, 3, 4, 5}; - auto [M1_2, M2_2, M3_2, M4_2] = boost::math::tools::first_four_moments(v); + auto [M1_2, M2_2, M3_2, M4_2] = boost::math::statistics::first_four_moments(v); BOOST_TEST(abs(M1_2 - 3) < tol); BOOST_TEST(abs(M2_2 - 2) < tol); BOOST_TEST(abs(M3_2) < tol); @@ -490,47 +490,47 @@ void test_median() std::mt19937 g(12); std::vector v{1,2,3,4,5,6,7}; - Real m = boost::math::tools::median(v.begin(), v.end()); + Real m = boost::math::statistics::median(v.begin(), v.end()); BOOST_TEST_EQ(m, 4); std::shuffle(v.begin(), v.end(), g); // Does range call work? - m = boost::math::tools::median(v); + m = boost::math::statistics::median(v); BOOST_TEST_EQ(m, 4); v = {1,2,3,3,4,5}; - m = boost::math::tools::median(v.begin(), v.end()); + m = boost::math::statistics::median(v.begin(), v.end()); BOOST_TEST_EQ(m, 3); std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::median(v.begin(), v.end()); + m = boost::math::statistics::median(v.begin(), v.end()); BOOST_TEST_EQ(m, 3); v = {1}; - m = boost::math::tools::median(v.begin(), v.end()); + m = boost::math::statistics::median(v.begin(), v.end()); BOOST_TEST_EQ(m, 1); v = {1,1}; - m = boost::math::tools::median(v.begin(), v.end()); + m = boost::math::statistics::median(v.begin(), v.end()); BOOST_TEST_EQ(m, 1); v = {2,4}; - m = boost::math::tools::median(v.begin(), v.end()); + m = boost::math::statistics::median(v.begin(), v.end()); BOOST_TEST_EQ(m, 3); v = {1,1,1}; - m = boost::math::tools::median(v.begin(), v.end()); + m = boost::math::statistics::median(v.begin(), v.end()); BOOST_TEST_EQ(m, 1); v = {1,2,3}; - m = boost::math::tools::median(v.begin(), v.end()); + m = boost::math::statistics::median(v.begin(), v.end()); BOOST_TEST_EQ(m, 2); std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::median(v.begin(), v.end()); + m = boost::math::statistics::median(v.begin(), v.end()); BOOST_TEST_EQ(m, 2); // Does it work with std::array? std::array w{1,2,3}; - m = boost::math::tools::median(w); + m = boost::math::statistics::median(w); BOOST_TEST_EQ(m, 2); // Does it work with ublas? @@ -538,7 +538,7 @@ void test_median() w1[0] = 1; w1[1] = 2; w1[2] = 3; - m = boost::math::tools::median(w); + m = boost::math::statistics::median(w); BOOST_TEST_EQ(m, 2); } @@ -547,53 +547,53 @@ void test_median_absolute_deviation() { std::vector v{-1, 2, -3, 4, -5, 6, -7}; - Real m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + Real m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0); BOOST_TEST_EQ(m, 4); std::mt19937 g(12); std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::median_absolute_deviation(v, 0); + m = boost::math::statistics::median_absolute_deviation(v, 0); BOOST_TEST_EQ(m, 4); v = {1, -2, -3, 3, -4, -5}; - m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0); BOOST_TEST_EQ(m, 3); std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0); BOOST_TEST_EQ(m, 3); v = {-1}; - m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0); BOOST_TEST_EQ(m, 1); v = {-1, 1}; - m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0); BOOST_TEST_EQ(m, 1); // The median is zero, so coincides with the default: - m = boost::math::tools::median_absolute_deviation(v.begin(), v.end()); + m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end()); BOOST_TEST_EQ(m, 1); - m = boost::math::tools::median_absolute_deviation(v); + m = boost::math::statistics::median_absolute_deviation(v); BOOST_TEST_EQ(m, 1); v = {2, -4}; - m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0); BOOST_TEST_EQ(m, 3); v = {1, -1, 1}; - m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0); BOOST_TEST_EQ(m, 1); v = {1, 2, -3}; - m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0); BOOST_TEST_EQ(m, 2); std::shuffle(v.begin(), v.end(), g); - m = boost::math::tools::median_absolute_deviation(v.begin(), v.end(), 0); + m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0); BOOST_TEST_EQ(m, 2); std::array w{1, 2, -3}; - m = boost::math::tools::median_absolute_deviation(w, 0); + m = boost::math::statistics::median_absolute_deviation(w, 0); BOOST_TEST_EQ(m, 2); // boost.ublas vector? @@ -604,7 +604,7 @@ void test_median_absolute_deviation() u[3] = 1; u[4] = 2; u[5] = -3; - m = boost::math::tools::median_absolute_deviation(u, 0); + m = boost::math::statistics::median_absolute_deviation(u, 0); BOOST_TEST_EQ(m, 2); } @@ -614,26 +614,26 @@ void test_sample_gini_coefficient() { Real tol = std::numeric_limits::epsilon(); std::vector v{1,0,0}; - Real gini = boost::math::tools::sample_gini_coefficient(v.begin(), v.end()); + Real gini = boost::math::statistics::sample_gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini - 1) < tol); - gini = boost::math::tools::sample_gini_coefficient(v); + gini = boost::math::statistics::sample_gini_coefficient(v); BOOST_TEST(abs(gini - 1) < tol); v[0] = 1; v[1] = 1; v[2] = 1; - gini = boost::math::tools::sample_gini_coefficient(v.begin(), v.end()); + gini = boost::math::statistics::sample_gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini) < tol); v[0] = 0; v[1] = 0; v[2] = 0; - gini = boost::math::tools::sample_gini_coefficient(v.begin(), v.end()); + gini = boost::math::statistics::sample_gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini) < tol); std::array w{0,0,0}; - gini = boost::math::tools::sample_gini_coefficient(w); + gini = boost::math::statistics::sample_gini_coefficient(w); BOOST_TEST(abs(gini) < tol); } @@ -643,34 +643,34 @@ void test_gini_coefficient() { Real tol = std::numeric_limits::epsilon(); std::vector v{1,0,0}; - Real gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + Real gini = boost::math::statistics::gini_coefficient(v.begin(), v.end()); Real expected = Real(2)/Real(3); BOOST_TEST(abs(gini - expected) < tol); - gini = boost::math::tools::gini_coefficient(v); + gini = boost::math::statistics::gini_coefficient(v); BOOST_TEST(abs(gini - expected) < tol); v[0] = 1; v[1] = 1; v[2] = 1; - gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + gini = boost::math::statistics::gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini) < tol); v[0] = 0; v[1] = 0; v[2] = 0; - gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + gini = boost::math::statistics::gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini) < tol); std::array w{0,0,0}; - gini = boost::math::tools::gini_coefficient(w); + gini = boost::math::statistics::gini_coefficient(w); BOOST_TEST(abs(gini) < tol); boost::numeric::ublas::vector w1(3); w1[0] = 1; w1[1] = 1; w1[2] = 1; - gini = boost::math::tools::gini_coefficient(w1); + gini = boost::math::statistics::gini_coefficient(w1); BOOST_TEST(abs(gini) < tol); std::mt19937 gen(18); @@ -682,7 +682,7 @@ void test_gini_coefficient() { v[i] = dis(gen); } - gini = boost::math::tools::gini_coefficient(v); + gini = boost::math::statistics::gini_coefficient(v); BOOST_TEST(abs(gini - expected) < 0.01); } @@ -692,34 +692,34 @@ void test_integer_gini_coefficient() { double tol = std::numeric_limits::epsilon(); std::vector v{1,0,0}; - double gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + double gini = boost::math::statistics::gini_coefficient(v.begin(), v.end()); double expected = 2.0/3.0; BOOST_TEST(abs(gini - expected) < tol); - gini = boost::math::tools::gini_coefficient(v); + gini = boost::math::statistics::gini_coefficient(v); BOOST_TEST(abs(gini - expected) < tol); v[0] = 1; v[1] = 1; v[2] = 1; - gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + gini = boost::math::statistics::gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini) < tol); v[0] = 0; v[1] = 0; v[2] = 0; - gini = boost::math::tools::gini_coefficient(v.begin(), v.end()); + gini = boost::math::statistics::gini_coefficient(v.begin(), v.end()); BOOST_TEST(abs(gini) < tol); std::array w{0,0,0}; - gini = boost::math::tools::gini_coefficient(w); + gini = boost::math::statistics::gini_coefficient(w); BOOST_TEST(abs(gini) < tol); boost::numeric::ublas::vector w1(3); w1[0] = 1; w1[1] = 1; w1[2] = 1; - gini = boost::math::tools::gini_coefficient(w1); + gini = boost::math::statistics::gini_coefficient(w1); BOOST_TEST(abs(gini) < tol); }