mirror of
https://github.com/boostorg/math.git
synced 2026-01-19 04:22:09 +00:00
Split descriptive_statistics.hpp into univariate_statistics.hpp and a currently-hypothetical bivariate_statistics.hpp [CI SKIP]
This commit is contained in:
@@ -553,7 +553,8 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22.
|
||||
[endmathpart] [/section:dist Statistical Distributions and Functions]
|
||||
|
||||
[mathpart vector_functionals Vector Functionals]
|
||||
[include vector_functionals/descriptive_statistics.qbk]
|
||||
[include vector_functionals/univariate_statistics.qbk]
|
||||
[include vector_functionals/signal_statistics.qbk]
|
||||
[include vector_functionals/norms.qbk]
|
||||
[endmathpart] [/section:vector_functionals Vector Functionals]
|
||||
|
||||
|
||||
@@ -122,7 +122,7 @@ The \u2113[super 2] norm is again a special case of the \u2113[super /p/] norm,
|
||||
double l1 = boost::math::tools::l2_norm(v.begin(), v.end());
|
||||
// l1 = sqrt(3)
|
||||
|
||||
Requires a forward iterable input, does not modify input data, and works with complex numbers.
|
||||
Requires a forward iterable input, does not modify input data, and works with real and complex numbers.
|
||||
|
||||
[heading Total Variation]
|
||||
|
||||
@@ -135,11 +135,14 @@ Requires a forward iterable input, does not modify input data, and works with co
|
||||
std::vector<int> v{1,1,1};
|
||||
int tv = boost::math::tools::total_variation(v);
|
||||
|
||||
The total variation only supports real numbers and /signed/ integers.
|
||||
The total variation only supports real numbers and integers.
|
||||
All the constituent operations to compute the total variation are well-defined for complex numbers,
|
||||
but the computed result is not meaningful; a 2D total variation is more appropriate.
|
||||
The container must be forward iterable, and the contents are not modified.
|
||||
|
||||
As an aside, the total variation is not technically a norm, since /TV(v) = 0/ does not imply /v = 0/.
|
||||
However, it satisfies the triangle inequality and is absolutely 1-homogeneous, so it is a seminorm, and hence is grouped with the other norms here.
|
||||
|
||||
[heading References]
|
||||
|
||||
* Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002.
|
||||
|
||||
144
doc/vector_functionals/signal_statistics.qbk
Normal file
144
doc/vector_functionals/signal_statistics.qbk
Normal file
@@ -0,0 +1,144 @@
|
||||
[/
|
||||
Copyright 2018 Nick Thompson
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0.
|
||||
(See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt).
|
||||
]
|
||||
|
||||
[section:signal_statistics Signal Statistics]
|
||||
|
||||
[heading Synopsis]
|
||||
|
||||
``
|
||||
#include <boost/math/tools/signal_statistics.hpp>
|
||||
|
||||
namespace boost{ namespace math{ namespace tools {
|
||||
|
||||
template<class Container>
|
||||
auto absolute_median(Container & c);
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto absolute_median(ForwardIterator first, ForwardIterator last);
|
||||
|
||||
template<class Container>
|
||||
auto absolute_gini_coefficient(Container & c);
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last);
|
||||
|
||||
template<class Container>
|
||||
auto hoyer_sparsity(Container const & c);
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto hoyer_sparsity(ForwardIterator first, ForwardIterator last);
|
||||
|
||||
template<class Container>
|
||||
auto shannon_entropy(Container const & c);
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto shannon_entropy(ForwardIterator first, ForwardIterator last);
|
||||
|
||||
template<class Container>
|
||||
auto shannon_cost(Container const & c);
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto shannon_cost(ForwardIterator first, ForwardIterator last);
|
||||
|
||||
|
||||
}}}
|
||||
``
|
||||
|
||||
[heading Description]
|
||||
|
||||
The file `boost/math/tools/signal_statistics.hpp` is a set of facilities for computing quantities commonly used in signal analysis.
|
||||
|
||||
Our examples use `std::vector<double>` to hold the data, but this not required.
|
||||
In general, you can store your data in an Eigen array, and Armadillo vector, `std::array`, and for many of the routines, a `std::forward_list`.
|
||||
These routines are usable in float, double, long double, and Boost.Multiprecision precision, as well as their complex extensions whenever the computation is well-defined.
|
||||
For certain operations (total variation, for example) integer inputs are supported.
|
||||
|
||||
[heading Absolute Median]
|
||||
|
||||
The absolute median is used in signal processing, where the median of the magnitude of the coefficients in some expansion are used to estimate noise variance.
|
||||
See [@https://wavelet-tour.github.io/ Mallat] for details.
|
||||
The absolute median supports both real and complex arithmetic, modifies its input, and requires random access iterators.
|
||||
|
||||
std::vector<double> v{-1, 1};
|
||||
double m = boost::math::tools::absolute_median(v.begin(), v.end());
|
||||
// m = 1
|
||||
|
||||
[heading Absolute Gini Coefficient]
|
||||
|
||||
The Gini coefficient, first used to measure wealth inequality, is also one of the best measures of the sparsity of an expansion in a basis.
|
||||
A sparse expansion has most of its norm concentrated in just a few coefficients, making the connection with wealth inequality obvious.
|
||||
However, for measuring sparsity, the phase of the numbers is irrelevant, so we provide the `absolute_gini_coefficient`:
|
||||
|
||||
std::vector<std::complex<double>> v{{0,1}, {0,0}, {0,0}, {0,0}};
|
||||
double abs_gini = boost::math::tools::absolute_gini_coefficient(v.begin(), v.end());
|
||||
// now abs_gini = 1
|
||||
|
||||
std::vector<std::complex<double>> w{{0,1}, {1,0}, {0,-1}, {-1,0}};
|
||||
double abs_gini = boost::math::tools::absolute_gini_coefficient(w.begin(), w.end());
|
||||
// now abs_gini = 0
|
||||
|
||||
std::vector<double> u{-1, 1, -1};
|
||||
double abs_gini = boost::math::tools::absolute_gini_coefficient(u.begin(), u.end());
|
||||
// now abs_gini = 0
|
||||
|
||||
Wikipedia calls our scaling a "sample Gini coefficient".
|
||||
We chose this scaling because it always returns unity for a vector which has only one nonzero coefficient,
|
||||
whereas the value of the population Gini coefficient of a vector with one non-zero element is dependent on the length of the input.
|
||||
|
||||
If sorting the input data is too much expense for a sparsity measure (is it going to be perfect anyway?),
|
||||
consider calculating the Hoyer sparsity instead.
|
||||
|
||||
[heading Hoyer Sparsity]
|
||||
|
||||
The Hoyer sparsity measures a normalized ratio of the \u2113[super 1] and \u2113[super 2] norms.
|
||||
As the name suggests, it is used to measure sparsity in an expansion in some basis.
|
||||
|
||||
The Hoyer sparsity computes ([radic]/N/ - \u2113[super 1](v)/\u2113[super 2](v))/([radic]N -1).
|
||||
For details, see [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard].
|
||||
|
||||
Usage:
|
||||
|
||||
std::vector<Real> v{1,0,0};
|
||||
Real hs = boost::math::tools::hoyer_sparsity(v);
|
||||
// hs = 1
|
||||
std::vector<Real> v{1,-1,1};
|
||||
Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end());
|
||||
// hs = 0
|
||||
|
||||
The container must be forward iterable and the contents are not modified.
|
||||
Accepts real, complex, and integer inputs. If the input is an integral type, the output is a double precision float.
|
||||
|
||||
[heading Shannon Entropy]
|
||||
|
||||
std::vector<double> v{1/2.0, 1/2.0};
|
||||
double Hs = boost::math::tools::shannon_entropy(v.begin(), v.end());
|
||||
// Hs = ln(2).
|
||||
|
||||
The Shannon entropy only supports non-negative real-valued inputs, presumably for interpretational purposes in the range [0,1]-though this is not enforced.
|
||||
The natural logarithm is used to compute the Shannon entropy; all other "Shannon entropies" are readily obtained by change of log base.
|
||||
|
||||
[heading Shannon Cost]
|
||||
|
||||
std::vector<double> v{-1, 1,-1};
|
||||
double Ks = boost::math::tools::shannon_cost(v.begin(), v.end());
|
||||
// Ks = 0; concentration of the vector is minimized.
|
||||
|
||||
The Shannon cost is a modified version of the Shannon entropy used in signal processing and data compression.
|
||||
The useful properties of the Shannon cost are /K/[sub /s/](0) = 0 and /K/[sub /s/](/v/\u2295/w/) = /K/[sub /s/](v) + /K/[sub /s/](w).
|
||||
See [@https://doi.org/10.1007/978-3-642-56702-5 Ripples in Mathematics] for details.
|
||||
|
||||
|
||||
[heading References]
|
||||
|
||||
* Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002.
|
||||
* Mallat, Stephane. ['A wavelet tour of signal processing: the sparse way.] Academic press, 2008.
|
||||
* Hurley, Niall, and Scott Rickard. ['Comparing measures of sparsity.] IEEE Transactions on Information Theory 55.10 (2009): 4723-4741.
|
||||
* Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001.
|
||||
|
||||
[endsect]
|
||||
[/section:signal_statistics Signal Statistics]
|
||||
@@ -1,17 +1,17 @@
|
||||
[/
|
||||
Copyright 2017 Nick Thompson
|
||||
Copyright 2018 Nick Thompson
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0.
|
||||
(See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt).
|
||||
]
|
||||
|
||||
[section:descriptive_statistics Descriptive Statistics]
|
||||
[section:univariate_statistics Univariate Statistics]
|
||||
|
||||
[heading Synopsis]
|
||||
|
||||
``
|
||||
#include <boost/math/tools/descriptive_statistics.hpp>
|
||||
#include <boost/math/tools/univariate_statistics.hpp>
|
||||
|
||||
namespace boost{ namespace math{ namespace tools {
|
||||
|
||||
@@ -33,55 +33,24 @@ namespace boost{ namespace math{ namespace tools {
|
||||
template<class ForwardIterator>
|
||||
auto median(ForwardIterator first, ForwardIterator last);
|
||||
|
||||
template<class Container>
|
||||
auto absolute_median(Container & c);
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto absolute_median(ForwardIterator first, ForwardIterator last);
|
||||
|
||||
template<class Container>
|
||||
auto gini_coefficient(Container & c);
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto gini_coefficient(ForwardIterator first, ForwardIterator last);
|
||||
|
||||
template<class Container>
|
||||
auto absolute_gini_coefficient(Container & c);
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last);
|
||||
|
||||
template<class Container>
|
||||
auto hoyer_sparsity(Container const & c);
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto hoyer_sparsity(ForwardIterator first, ForwardIterator last);
|
||||
|
||||
template<class Container>
|
||||
auto shannon_entropy(Container const & c);
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto shannon_entropy(ForwardIterator first, ForwardIterator last);
|
||||
|
||||
template<class Container>
|
||||
auto shannon_cost(Container const & c);
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto shannon_cost(ForwardIterator first, ForwardIterator last);
|
||||
|
||||
|
||||
}}}
|
||||
``
|
||||
|
||||
[heading Description]
|
||||
|
||||
The file `boost/math/tools/descriptive_statistics.hpp` is a set of facilities for computing scalar values from vectors.
|
||||
The file `boost/math/tools/univariate_statistics.hpp` is a set of facilities for computing scalar values from vectors.
|
||||
|
||||
Many of these functionals have trivial naive implementations, but experienced programmers will recognize that even trivial algorithms are easy to screw up, and that numerical instabilities often lurk in corner cases.
|
||||
We have attempted to do our "due diligence" to root out these problems-scouring the literature for numerically stable algorithms for even the simplest of functionals.
|
||||
|
||||
/Nota bene/: Some similar functionality is provided in [@https://www.boost.org/doc/libs/1_68_0/doc/html/accumulators/user_s_guide.html Boost Accumulators Framework].
|
||||
These accumulators should be used in real-time applications; `descriptive_statistics.hpp` should be used when CPU vectorization is needed.
|
||||
These accumulators should be used in real-time applications; `univariate_statistics.hpp` should be used when CPU vectorization is needed.
|
||||
As a reminder, remember that to actually /get/ vectorization, compile with `-march=native -O3` flags.
|
||||
|
||||
We now describe each functional in detail.
|
||||
@@ -131,15 +100,6 @@ Compute the median of a dataset:
|
||||
The calculation of the median is a thin wrapper around the C++11 [@https://en.cppreference.com/w/cpp/algorithm/nth_element `nth_element`].
|
||||
Therefore, all requirements of `std::nth_element` are inherited by the median calculation.
|
||||
|
||||
[heading Absolute Median]
|
||||
|
||||
The absolute median is used in signal processing, where the median of the magnitude of the coefficients in some expansion are used to estimate noise variance.
|
||||
See [@https://wavelet-tour.github.io/ Mallat] for details.
|
||||
The absolute median supports both real and complex arithmetic, modifies its input, and requires random access iterators.
|
||||
|
||||
std::vector<double> v{-1, 1};
|
||||
double m = boost::math::tools::absolute_median(v.begin(), v.end());
|
||||
// m = 1
|
||||
|
||||
[heading Gini Coefficient]
|
||||
|
||||
@@ -163,69 +123,6 @@ If you wish to convert the Boost Gini coefficient to the population Gini coeffic
|
||||
However, a single use case (measuring wealth inequality when some people have negative wealth) exists, so we do not throw an exception when negative values are encountered.
|
||||
You should have /very/ good cause to pass negative values to the Gini coefficient calculator.
|
||||
|
||||
The Gini coefficient, first used to measure wealth inequality, is also one of the best measures of the sparsity of an expansion in a basis.
|
||||
A sparse expansion has most of its norm concentrated in just a few coefficients, making the connection with wealth inequality obvious.
|
||||
However, for measuring sparsity, the phase of the numbers is irrelevant, so `absolute_gini_coefficient` should be used instead:
|
||||
|
||||
std::vector<std::complex<double>> v{{0,1}, {0,0}, {0,0}, {0,0}};
|
||||
double abs_gini = boost::math::tools::absolute_gini_coefficient(v.begin(), v.end());
|
||||
// now abs_gini = 1
|
||||
|
||||
std::vector<std::complex<double>> w{{0,1}, {1,0}, {0,-1}, {-1,0}};
|
||||
double abs_gini = boost::math::tools::absolute_gini_coefficient(w.begin(), w.end());
|
||||
// now abs_gini = 0
|
||||
|
||||
std::vector<double> u{-1, 1, -1};
|
||||
double abs_gini = boost::math::tools::absolute_gini_coefficient(u.begin(), u.end());
|
||||
// now abs_gini = 0
|
||||
|
||||
Again, Wikipedia denotes our scaling as a "sample Gini coefficient".
|
||||
We chose this scaling because it always returns unity for a vector which has only one nonzero coefficient,
|
||||
whereas the value of the population Gini coefficient of a vector with one non-zero element is dependent on the length of the input.
|
||||
|
||||
If sorting the input data is too much expense for a sparsity measure (is it going to be perfect anyway?),
|
||||
consider calculating the Hoyer sparsity instead.
|
||||
|
||||
[heading Hoyer Sparsity]
|
||||
|
||||
The Hoyer sparsity measures a normalized ratio of the \u2113[super 1] and \u2113[super 2] norms.
|
||||
As the name suggests, it is used to measure sparsity in an expansion in some basis.
|
||||
|
||||
The Hoyer sparsity computes ([radic]/N/ - \u2113[super 1](v)/\u2113[super 2](v))/([radic]N -1).
|
||||
For details, see [@https://arxiv.org/pdf/0811.4706.pdf Hurley and Rickard].
|
||||
|
||||
Usage:
|
||||
|
||||
std::vector<Real> v{1,0,0};
|
||||
Real hs = boost::math::tools::hoyer_sparsity(v);
|
||||
// hs = 1
|
||||
std::vector<Real> v{1,-1,1};
|
||||
Real hs = boost::math::tools::hoyer_sparsity(v.begin(), v.end());
|
||||
// hs = 0
|
||||
|
||||
The container must be forward iterable and the contents are not modified.
|
||||
Accepts real, complex, and integer inputs. If the input is an integral type, the output is a double precision float.
|
||||
|
||||
[heading Shannon Entropy]
|
||||
|
||||
std::vector<double> v{1/2.0, 1/2.0};
|
||||
double Hs = boost::math::tools::shannon_entropy(v.begin(), v.end());
|
||||
// Hs = ln(2).
|
||||
|
||||
The Shannon entropy only supports non-negative real-valued inputs, presumably for interpretational purposes in the range [0,1]-though this is not enforced.
|
||||
The natural logarithm is used to compute the Shannon entropy; all other "Shannon entropies" are readily obtained by change of log base.
|
||||
|
||||
[heading Shannon Cost]
|
||||
|
||||
std::vector<double> v{-1, 1,-1};
|
||||
double Ks = boost::math::tools::shannon_cost(v.begin(), v.end());
|
||||
// Ks = 0; concentration of the vector is minimized.
|
||||
|
||||
The Shannon cost is a modified version of the Shannon entropy used in signal processing and data compression.
|
||||
The useful properties of the Shannon cost are /K/[sub /s/](0) = 0 and /K/[sub /s/](/v/\u2295 /w/) = /K/[sub /s/](v) + /K/[sub /s/](w).
|
||||
See [@https://doi.org/10.1007/978-3-642-56702-5 Ripples in Mathematics] for details.
|
||||
|
||||
|
||||
[heading References]
|
||||
|
||||
* Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002.
|
||||
@@ -234,4 +131,4 @@ See [@https://doi.org/10.1007/978-3-642-56702-5 Ripples in Mathematics] for deta
|
||||
* Jensen, Arne, and Anders la Cour-Harbo. ['Ripples in mathematics: the discrete wavelet transform.] Springer Science & Business Media, 2001.
|
||||
|
||||
[endsect]
|
||||
[/section:descriptive_statistics Descriptive Statistics]
|
||||
[/section:univariate_statistics Univariate Statistics]
|
||||
@@ -12,10 +12,6 @@
|
||||
#include <boost/assert.hpp>
|
||||
#include <boost/multiprecision/detail/number_base.hpp>
|
||||
|
||||
/*
|
||||
* A set of tools for computing scalar quantities associated with lists of numbers.
|
||||
*/
|
||||
|
||||
|
||||
namespace boost{ namespace math{ namespace tools {
|
||||
|
||||
@@ -29,12 +25,32 @@ auto total_variation(ForwardIterator first, ForwardIterator last)
|
||||
Real tv = 0;
|
||||
auto it = first;
|
||||
Real tmp = *it;
|
||||
while (++it != last)
|
||||
|
||||
if constexpr (std::is_unsigned<Real>::value)
|
||||
{
|
||||
tv += abs(*it - tmp);
|
||||
tmp = *it;
|
||||
while (++it != last)
|
||||
{
|
||||
if (*it > tmp)
|
||||
{
|
||||
tv += *it - tmp;
|
||||
}
|
||||
else
|
||||
{
|
||||
tv += tmp - *it;
|
||||
}
|
||||
tmp = *it;
|
||||
}
|
||||
return tv;
|
||||
}
|
||||
else
|
||||
{
|
||||
while (++it != last)
|
||||
{
|
||||
tv += abs(*it - tmp);
|
||||
tmp = *it;
|
||||
}
|
||||
return tv;
|
||||
}
|
||||
return tv;
|
||||
}
|
||||
|
||||
template<class Container>
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#ifndef BOOST_MATH_TOOLS_DESCRIPTIVE_STATISTICS_HPP
|
||||
#define BOOST_MATH_TOOLS_DESCRIPTIVE_STATISTICS_HPP
|
||||
#ifndef BOOST_MATH_TOOLS_SIGNAL_STATISTICS_HPP
|
||||
#define BOOST_MATH_TOOLS_SIGNAL_STATISTICS_HPP
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
@@ -12,119 +12,9 @@
|
||||
#include <boost/assert.hpp>
|
||||
#include <boost/multiprecision/detail/number_base.hpp>
|
||||
|
||||
/*
|
||||
* A set of tools for computing scalar quantities associated with lists of numbers.
|
||||
*/
|
||||
|
||||
|
||||
namespace boost{ namespace math{ namespace tools {
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto
|
||||
mean(ForwardIterator first, ForwardIterator last)
|
||||
{
|
||||
using Real = typename std::iterator_traits<ForwardIterator>::value_type;
|
||||
BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean.");
|
||||
if constexpr (std::is_integral<Real>::value)
|
||||
{
|
||||
double mu = 0;
|
||||
double i = 1;
|
||||
for(auto it = first; it != last; ++it) {
|
||||
mu = mu + (*it - mu)/i;
|
||||
i += 1;
|
||||
}
|
||||
return mu;
|
||||
}
|
||||
else
|
||||
{
|
||||
Real mu = 0;
|
||||
Real i = 1;
|
||||
for(auto it = first; it != last; ++it) {
|
||||
mu = mu + (*it - mu)/i;
|
||||
i += 1;
|
||||
}
|
||||
return mu;
|
||||
}
|
||||
}
|
||||
|
||||
template<class Container>
|
||||
inline auto mean(Container const & v)
|
||||
{
|
||||
return mean(v.cbegin(), v.cend());
|
||||
}
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto
|
||||
mean_and_population_variance(ForwardIterator first, ForwardIterator last)
|
||||
{
|
||||
using Real = typename std::iterator_traits<ForwardIterator>::value_type;
|
||||
BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
|
||||
// Higham, Accuracy and Stability, equation 1.6a and 1.6b:
|
||||
if constexpr (std::is_integral<Real>::value)
|
||||
{
|
||||
double M = *first;
|
||||
double Q = 0;
|
||||
double k = 2;
|
||||
for (auto it = first + 1; it != last; ++it)
|
||||
{
|
||||
double tmp = *it - M;
|
||||
Q = Q + ((k-1)*tmp*tmp)/k;
|
||||
M = M + tmp/k;
|
||||
k += 1;
|
||||
}
|
||||
return std::make_pair(M, Q/(k-1));
|
||||
}
|
||||
else
|
||||
{
|
||||
Real M = *first;
|
||||
Real Q = 0;
|
||||
Real k = 2;
|
||||
for (auto it = first + 1; it != last; ++it)
|
||||
{
|
||||
Real tmp = *it - M;
|
||||
Q = Q + ((k-1)*tmp*tmp)/k;
|
||||
M = M + tmp/k;
|
||||
k += 1;
|
||||
}
|
||||
|
||||
return std::make_pair(M, Q/(k-1));
|
||||
}
|
||||
}
|
||||
|
||||
template<class Container>
|
||||
inline auto mean_and_population_variance(Container const & v)
|
||||
{
|
||||
return mean_and_population_variance(v.cbegin(), v.cend());
|
||||
}
|
||||
|
||||
template<class RandomAccessIterator>
|
||||
auto median(RandomAccessIterator first, RandomAccessIterator last)
|
||||
{
|
||||
size_t num_elems = std::distance(first, last);
|
||||
BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined.");
|
||||
if (num_elems & 1)
|
||||
{
|
||||
auto middle = first + (num_elems - 1)/2;
|
||||
std::nth_element(first, middle, last);
|
||||
return *middle;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto middle = first + num_elems/2 - 1;
|
||||
std::nth_element(first, middle, last);
|
||||
std::nth_element(middle, middle+1, last);
|
||||
return (*middle + *(middle+1))/2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class RandomAccessContainer>
|
||||
inline auto median(RandomAccessContainer & v)
|
||||
{
|
||||
return median(v.begin(), v.end());
|
||||
}
|
||||
|
||||
|
||||
template<class RandomAccessIterator>
|
||||
auto absolute_median(RandomAccessIterator first, RandomAccessIterator last)
|
||||
{
|
||||
@@ -202,40 +92,6 @@ inline auto shannon_cost(Container const & v)
|
||||
}
|
||||
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto gini_coefficient(ForwardIterator first, ForwardIterator last)
|
||||
{
|
||||
using Real = typename std::iterator_traits<ForwardIterator>::value_type;
|
||||
BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples.");
|
||||
|
||||
std::sort(first, last);
|
||||
|
||||
Real i = 1;
|
||||
Real num = 0;
|
||||
Real denom = 0;
|
||||
for (auto it = first; it != last; ++it)
|
||||
{
|
||||
num += *it*i;
|
||||
denom += *it;
|
||||
++i;
|
||||
}
|
||||
|
||||
// If the l1 norm is zero, all elements are zero, so every element is the same.
|
||||
if (denom == 0)
|
||||
{
|
||||
return Real(0);
|
||||
}
|
||||
|
||||
return ((2*num)/denom - i)/(i-2);
|
||||
}
|
||||
|
||||
template<class RandomAccessContainer>
|
||||
inline auto gini_coefficient(RandomAccessContainer & v)
|
||||
{
|
||||
return gini_coefficient(v.begin(), v.end());
|
||||
}
|
||||
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last)
|
||||
{
|
||||
158
include/boost/math/tools/univariate_statistics.hpp
Normal file
158
include/boost/math/tools/univariate_statistics.hpp
Normal file
@@ -0,0 +1,158 @@
|
||||
// (C) Copyright Nick Thompson 2018.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#ifndef BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP
|
||||
#define BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <boost/type_traits.hpp>
|
||||
#include <boost/assert.hpp>
|
||||
#include <boost/multiprecision/detail/number_base.hpp>
|
||||
|
||||
|
||||
namespace boost{ namespace math{ namespace tools {
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto
|
||||
mean(ForwardIterator first, ForwardIterator last)
|
||||
{
|
||||
using Real = typename std::iterator_traits<ForwardIterator>::value_type;
|
||||
BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean.");
|
||||
if constexpr (std::is_integral<Real>::value)
|
||||
{
|
||||
double mu = 0;
|
||||
double i = 1;
|
||||
for(auto it = first; it != last; ++it) {
|
||||
mu = mu + (*it - mu)/i;
|
||||
i += 1;
|
||||
}
|
||||
return mu;
|
||||
}
|
||||
else
|
||||
{
|
||||
Real mu = 0;
|
||||
Real i = 1;
|
||||
for(auto it = first; it != last; ++it) {
|
||||
mu = mu + (*it - mu)/i;
|
||||
i += 1;
|
||||
}
|
||||
return mu;
|
||||
}
|
||||
}
|
||||
|
||||
template<class Container>
|
||||
inline auto mean(Container const & v)
|
||||
{
|
||||
return mean(v.cbegin(), v.cend());
|
||||
}
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto
|
||||
mean_and_population_variance(ForwardIterator first, ForwardIterator last)
|
||||
{
|
||||
using Real = typename std::iterator_traits<ForwardIterator>::value_type;
|
||||
BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
|
||||
// Higham, Accuracy and Stability, equation 1.6a and 1.6b:
|
||||
if constexpr (std::is_integral<Real>::value)
|
||||
{
|
||||
double M = *first;
|
||||
double Q = 0;
|
||||
double k = 2;
|
||||
for (auto it = first + 1; it != last; ++it)
|
||||
{
|
||||
double tmp = *it - M;
|
||||
Q = Q + ((k-1)*tmp*tmp)/k;
|
||||
M = M + tmp/k;
|
||||
k += 1;
|
||||
}
|
||||
return std::make_pair(M, Q/(k-1));
|
||||
}
|
||||
else
|
||||
{
|
||||
Real M = *first;
|
||||
Real Q = 0;
|
||||
Real k = 2;
|
||||
for (auto it = first + 1; it != last; ++it)
|
||||
{
|
||||
Real tmp = *it - M;
|
||||
Q = Q + ((k-1)*tmp*tmp)/k;
|
||||
M = M + tmp/k;
|
||||
k += 1;
|
||||
}
|
||||
|
||||
return std::make_pair(M, Q/(k-1));
|
||||
}
|
||||
}
|
||||
|
||||
template<class Container>
|
||||
inline auto mean_and_population_variance(Container const & v)
|
||||
{
|
||||
return mean_and_population_variance(v.cbegin(), v.cend());
|
||||
}
|
||||
|
||||
template<class RandomAccessIterator>
|
||||
auto median(RandomAccessIterator first, RandomAccessIterator last)
|
||||
{
|
||||
size_t num_elems = std::distance(first, last);
|
||||
BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined.");
|
||||
if (num_elems & 1)
|
||||
{
|
||||
auto middle = first + (num_elems - 1)/2;
|
||||
std::nth_element(first, middle, last);
|
||||
return *middle;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto middle = first + num_elems/2 - 1;
|
||||
std::nth_element(first, middle, last);
|
||||
std::nth_element(middle, middle+1, last);
|
||||
return (*middle + *(middle+1))/2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class RandomAccessContainer>
|
||||
inline auto median(RandomAccessContainer & v)
|
||||
{
|
||||
return median(v.begin(), v.end());
|
||||
}
|
||||
|
||||
|
||||
template<class ForwardIterator>
|
||||
auto gini_coefficient(ForwardIterator first, ForwardIterator last)
|
||||
{
|
||||
using Real = typename std::iterator_traits<ForwardIterator>::value_type;
|
||||
BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples.");
|
||||
|
||||
std::sort(first, last);
|
||||
|
||||
Real i = 1;
|
||||
Real num = 0;
|
||||
Real denom = 0;
|
||||
for (auto it = first; it != last; ++it)
|
||||
{
|
||||
num += *it*i;
|
||||
denom += *it;
|
||||
++i;
|
||||
}
|
||||
|
||||
// If the l1 norm is zero, all elements are zero, so every element is the same.
|
||||
if (denom == 0)
|
||||
{
|
||||
return Real(0);
|
||||
}
|
||||
|
||||
return ((2*num)/denom - i)/(i-2);
|
||||
}
|
||||
|
||||
template<class RandomAccessContainer>
|
||||
inline auto gini_coefficient(RandomAccessContainer & v)
|
||||
{
|
||||
return gini_coefficient(v.begin(), v.end());
|
||||
}
|
||||
|
||||
}}}
|
||||
#endif
|
||||
@@ -902,8 +902,9 @@ test-suite misc :
|
||||
[ run test_constant_generate.cpp : : : release <define>USE_CPP_FLOAT=1 <exception-handling>off:<build>no ]
|
||||
[ run test_cubic_b_spline.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_smart_ptr cxx11_defaulted_functions ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release ]
|
||||
[ run catmull_rom_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] # does not in fact require C++17 constexpr; requires C++17 std::size.
|
||||
[ run descriptive_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ]
|
||||
[ run univariate_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ]
|
||||
[ run norms_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ]
|
||||
[ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr ] ]
|
||||
[ run test_real_concept.cpp ../../test/build//boost_unit_test_framework ]
|
||||
[ run test_remez.cpp pch ../../test/build//boost_unit_test_framework ]
|
||||
[ run test_roots.cpp pch ../../test/build//boost_unit_test_framework ]
|
||||
|
||||
@@ -109,6 +109,12 @@ void test_integer_total_variation()
|
||||
std::array<Z, 2> w{1,1};
|
||||
tv = boost::math::tools::total_variation(w);
|
||||
BOOST_TEST_EQ(tv,0);
|
||||
|
||||
// Work with both signed and unsigned integers?
|
||||
std::array<Z, 4> u{1, 2, 1, 2};
|
||||
tv = boost::math::tools::total_variation(u);
|
||||
BOOST_TEST_EQ(tv, 3);
|
||||
|
||||
}
|
||||
|
||||
template<class Real>
|
||||
@@ -335,6 +341,11 @@ int main()
|
||||
|
||||
test_integer_l1_norm<int>();
|
||||
|
||||
test_complex_l1_norm<std::complex<float>>();
|
||||
test_complex_l1_norm<std::complex<double>>();
|
||||
test_complex_l1_norm<std::complex<long double>>();
|
||||
test_complex_l1_norm<cpp_complex_50>();
|
||||
|
||||
test_complex_l2_norm<std::complex<float>>();
|
||||
test_complex_l2_norm<std::complex<double>>();
|
||||
test_complex_l2_norm<std::complex<long double>>();
|
||||
@@ -345,16 +356,12 @@ int main()
|
||||
test_l2_norm<long double>();
|
||||
test_l2_norm<cpp_bin_float_50>();
|
||||
|
||||
test_complex_l1_norm<std::complex<float>>();
|
||||
test_complex_l1_norm<std::complex<double>>();
|
||||
test_complex_l1_norm<std::complex<long double>>();
|
||||
test_complex_l1_norm<cpp_complex_50>();
|
||||
|
||||
test_total_variation<float>();
|
||||
test_total_variation<double>();
|
||||
test_total_variation<long double>();
|
||||
test_total_variation<cpp_bin_float_50>();
|
||||
|
||||
test_integer_total_variation<uint32_t>();
|
||||
test_integer_total_variation<int>();
|
||||
|
||||
return boost::report_errors();
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#include <boost/core/lightweight_test.hpp>
|
||||
#include <boost/numeric/ublas/vector.hpp>
|
||||
#include <boost/math/constants/constants.hpp>
|
||||
#include <boost/math/tools/descriptive_statistics.hpp>
|
||||
#include <boost/math/tools/signal_statistics.hpp>
|
||||
#include <boost/multiprecision/cpp_bin_float.hpp>
|
||||
#include <boost/multiprecision/cpp_complex.hpp>
|
||||
|
||||
@@ -29,162 +29,6 @@ using boost::multiprecision::cpp_complex_50;
|
||||
* 5) Does it work with complex data if complex data is sensible?
|
||||
*/
|
||||
|
||||
template<class Z>
|
||||
void test_integer_mean()
|
||||
{
|
||||
double tol = std::numeric_limits<double>::epsilon();
|
||||
std::vector<Z> v{1,2,3,4,5};
|
||||
double mu = boost::math::tools::mean(v);
|
||||
BOOST_TEST(abs(mu - 3) < tol);
|
||||
|
||||
// Work with std::array?
|
||||
std::array<Z, 5> w{1,2,3,4,5};
|
||||
mu = boost::math::tools::mean(w);
|
||||
BOOST_TEST(abs(mu - 3) < tol);
|
||||
}
|
||||
|
||||
template<class Real>
|
||||
void test_mean()
|
||||
{
|
||||
Real tol = std::numeric_limits<Real>::epsilon();
|
||||
std::vector<Real> v{1,2,3,4,5};
|
||||
Real mu = boost::math::tools::mean(v.begin(), v.end());
|
||||
BOOST_TEST(abs(mu - 3) < tol);
|
||||
|
||||
// Does range call work?
|
||||
mu = boost::math::tools::mean(v);
|
||||
BOOST_TEST(abs(mu - 3) < tol);
|
||||
|
||||
// Can we successfully average only part of the vector?
|
||||
mu = boost::math::tools::mean(v.begin(), v.begin() + 3);
|
||||
BOOST_TEST(abs(mu - 2) < tol);
|
||||
|
||||
// Does it work when we const qualify?
|
||||
mu = boost::math::tools::mean(v.cbegin(), v.cend());
|
||||
BOOST_TEST(abs(mu - 3) < tol);
|
||||
|
||||
// Does it work for std::array?
|
||||
std::array<Real, 7> u{1,2,3,4,5,6,7};
|
||||
mu = boost::math::tools::mean(u.begin(), u.end());
|
||||
BOOST_TEST(abs(mu - 4) < tol);
|
||||
|
||||
// Does it work for a forward iterator?
|
||||
std::forward_list<Real> l{1,2,3,4,5,6,7};
|
||||
mu = boost::math::tools::mean(l.begin(), l.end());
|
||||
BOOST_TEST(abs(mu - 4) < tol);
|
||||
|
||||
// Does it work with ublas vectors?
|
||||
boost::numeric::ublas::vector<Real> w(7);
|
||||
for (size_t i = 0; i < w.size(); ++i)
|
||||
{
|
||||
w[i] = i+1;
|
||||
}
|
||||
mu = boost::math::tools::mean(w.cbegin(), w.cend());
|
||||
BOOST_TEST(abs(mu - 4) < tol);
|
||||
|
||||
}
|
||||
|
||||
template<class Complex>
|
||||
void test_complex_mean()
|
||||
{
|
||||
typedef typename Complex::value_type Real;
|
||||
Real tol = std::numeric_limits<Real>::epsilon();
|
||||
std::vector<Complex> v{{0,1},{0,2},{0,3},{0,4},{0,5}};
|
||||
auto mu = boost::math::tools::mean(v.begin(), v.end());
|
||||
BOOST_TEST(abs(mu.imag() - 3) < tol);
|
||||
BOOST_TEST(abs(mu.real()) < tol);
|
||||
|
||||
// Does range work?
|
||||
mu = boost::math::tools::mean(v);
|
||||
BOOST_TEST(abs(mu.imag() - 3) < tol);
|
||||
BOOST_TEST(abs(mu.real()) < tol);
|
||||
}
|
||||
|
||||
template<class Real>
|
||||
void test_mean_and_population_variance()
|
||||
{
|
||||
Real tol = std::numeric_limits<Real>::epsilon();
|
||||
std::vector<Real> v{1,1,1,1,1,1};
|
||||
auto [mu, sigma_sq] = boost::math::tools::mean_and_population_variance(v.begin(), v.end());
|
||||
BOOST_TEST(abs(mu - 1) < tol);
|
||||
BOOST_TEST(abs(sigma_sq) < tol);
|
||||
|
||||
std::vector<Real> u{1};
|
||||
auto [mu1, sigma1_sq] = boost::math::tools::mean_and_population_variance(u.cbegin(), u.cend());
|
||||
BOOST_TEST(abs(mu1 - 1) < tol);
|
||||
BOOST_TEST(abs(sigma1_sq) < tol);
|
||||
|
||||
std::array<Real, 8> w{0,1,0,1,0,1,0,1};
|
||||
auto [mu2, sigma2_sq] = boost::math::tools::mean_and_population_variance(w.begin(), w.end());
|
||||
BOOST_TEST(abs(mu2 - 1.0/2.0) < tol);
|
||||
BOOST_TEST(abs(sigma2_sq - 1.0/4.0) < tol);
|
||||
|
||||
auto [mu3, sigma3_sq] = boost::math::tools::mean_and_population_variance(w);
|
||||
BOOST_TEST(abs(mu3 - 1.0/2.0) < tol);
|
||||
BOOST_TEST(abs(sigma3_sq - 1.0/4.0) < tol);
|
||||
|
||||
}
|
||||
|
||||
template<class Z>
|
||||
void test_integer_mean_and_population_variance()
|
||||
{
|
||||
double tol = std::numeric_limits<double>::epsilon();
|
||||
std::vector<Z> v{1,1,1,1,1,1};
|
||||
auto [mu, sigma_sq] = boost::math::tools::mean_and_population_variance(v);
|
||||
BOOST_TEST(abs(mu - 1) < tol);
|
||||
BOOST_TEST(abs(sigma_sq) < tol);
|
||||
}
|
||||
|
||||
template<class Real>
|
||||
void test_median()
|
||||
{
|
||||
std::mt19937 g(12);
|
||||
std::vector<Real> v{1,2,3,4,5,6,7};
|
||||
|
||||
Real m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 4);
|
||||
|
||||
std::shuffle(v.begin(), v.end(), g);
|
||||
// Does range call work?
|
||||
m = boost::math::tools::median(v);
|
||||
BOOST_TEST_EQ(m, 4);
|
||||
|
||||
v = {1,2,3,3,4,5};
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 3);
|
||||
std::shuffle(v.begin(), v.end(), g);
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 3);
|
||||
|
||||
v = {1};
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 1);
|
||||
|
||||
v = {1,1};
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 1);
|
||||
|
||||
v = {2,4};
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 3);
|
||||
|
||||
v = {1,1,1};
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 1);
|
||||
|
||||
v = {1,2,3};
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 2);
|
||||
std::shuffle(v.begin(), v.end(), g);
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 2);
|
||||
|
||||
// Does it work with std::array?
|
||||
std::array<Real, 3> w{1,2,3};
|
||||
m = boost::math::tools::median(w);
|
||||
BOOST_TEST_EQ(m, 2);
|
||||
}
|
||||
|
||||
template<class Real>
|
||||
void test_absolute_median()
|
||||
{
|
||||
@@ -261,34 +105,6 @@ void test_complex_absolute_median()
|
||||
}
|
||||
|
||||
|
||||
template<class Real>
|
||||
void test_gini_coefficient()
|
||||
{
|
||||
Real tol = std::numeric_limits<Real>::epsilon();
|
||||
std::vector<Real> v{1,0,0};
|
||||
Real gini = boost::math::tools::gini_coefficient(v.begin(), v.end());
|
||||
BOOST_TEST(abs(gini - 1) < tol);
|
||||
|
||||
gini = boost::math::tools::gini_coefficient(v);
|
||||
BOOST_TEST(abs(gini - 1) < tol);
|
||||
|
||||
v[0] = 1;
|
||||
v[1] = 1;
|
||||
v[2] = 1;
|
||||
gini = boost::math::tools::gini_coefficient(v.begin(), v.end());
|
||||
BOOST_TEST(abs(gini) < tol);
|
||||
|
||||
v[0] = 0;
|
||||
v[1] = 0;
|
||||
v[2] = 0;
|
||||
gini = boost::math::tools::gini_coefficient(v.begin(), v.end());
|
||||
BOOST_TEST(abs(gini) < tol);
|
||||
|
||||
std::array<Real, 3> w{0,0,0};
|
||||
gini = boost::math::tools::gini_coefficient(w);
|
||||
BOOST_TEST(abs(gini) < tol);
|
||||
}
|
||||
|
||||
template<class Real>
|
||||
void test_hoyer_sparsity()
|
||||
{
|
||||
@@ -411,30 +227,6 @@ void test_shannon_entropy()
|
||||
|
||||
int main()
|
||||
{
|
||||
test_integer_mean<uint8_t>();
|
||||
test_integer_mean<int8_t>();
|
||||
test_integer_mean<int>();
|
||||
|
||||
test_mean<float>();
|
||||
test_mean<double>();
|
||||
test_mean<long double>();
|
||||
test_mean<cpp_bin_float_50>();
|
||||
|
||||
test_complex_mean<std::complex<float>>();
|
||||
test_complex_mean<cpp_complex_50>();
|
||||
|
||||
test_mean_and_population_variance<float>();
|
||||
test_mean_and_population_variance<double>();
|
||||
test_mean_and_population_variance<long double>();
|
||||
test_mean_and_population_variance<cpp_bin_float_50>();
|
||||
|
||||
test_integer_mean_and_population_variance<int>();
|
||||
|
||||
test_median<float>();
|
||||
test_median<double>();
|
||||
test_median<long double>();
|
||||
test_median<cpp_bin_float_50>();
|
||||
|
||||
test_absolute_median<float>();
|
||||
test_absolute_median<double>();
|
||||
test_absolute_median<long double>();
|
||||
@@ -445,11 +237,6 @@ int main()
|
||||
test_complex_absolute_median<std::complex<long double>>();
|
||||
test_complex_absolute_median<cpp_complex_50>();
|
||||
|
||||
test_gini_coefficient<float>();
|
||||
test_gini_coefficient<double>();
|
||||
test_gini_coefficient<long double>();
|
||||
test_gini_coefficient<cpp_bin_float_50>();
|
||||
|
||||
test_absolute_gini_coefficient<float>();
|
||||
test_absolute_gini_coefficient<double>();
|
||||
test_absolute_gini_coefficient<long double>();
|
||||
248
test/univariate_statistics_test.cpp
Normal file
248
test/univariate_statistics_test.cpp
Normal file
@@ -0,0 +1,248 @@
|
||||
/*
|
||||
* (C) Copyright Nick Thompson 2018.
|
||||
* Use, modification and distribution are subject to the
|
||||
* Boost Software License, Version 1.0. (See accompanying file
|
||||
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <forward_list>
|
||||
#include <algorithm>
|
||||
#include <random>
|
||||
#include <boost/core/lightweight_test.hpp>
|
||||
#include <boost/numeric/ublas/vector.hpp>
|
||||
#include <boost/math/constants/constants.hpp>
|
||||
#include <boost/math/tools/univariate_statistics.hpp>
|
||||
#include <boost/multiprecision/cpp_bin_float.hpp>
|
||||
#include <boost/multiprecision/cpp_complex.hpp>
|
||||
|
||||
using boost::multiprecision::cpp_bin_float_50;
|
||||
using boost::multiprecision::cpp_complex_50;
|
||||
|
||||
/*
|
||||
* Test checklist:
|
||||
* 1) Does it work with multiprecision?
|
||||
* 2) Does it work with .cbegin()/.cend() if the data is not altered?
|
||||
* 3) Does it work with ublas and std::array? (Checking Eigen and Armadillo will make the CI system really unhappy.)
|
||||
* 4) Does it work with std::forward_list if a forward iterator is all that is required?
|
||||
* 5) Does it work with complex data if complex data is sensible?
|
||||
*/
|
||||
|
||||
template<class Z>
|
||||
void test_integer_mean()
|
||||
{
|
||||
double tol = std::numeric_limits<double>::epsilon();
|
||||
std::vector<Z> v{1,2,3,4,5};
|
||||
double mu = boost::math::tools::mean(v);
|
||||
BOOST_TEST(abs(mu - 3) < tol);
|
||||
|
||||
// Work with std::array?
|
||||
std::array<Z, 5> w{1,2,3,4,5};
|
||||
mu = boost::math::tools::mean(w);
|
||||
BOOST_TEST(abs(mu - 3) < tol);
|
||||
}
|
||||
|
||||
template<class Real>
|
||||
void test_mean()
|
||||
{
|
||||
Real tol = std::numeric_limits<Real>::epsilon();
|
||||
std::vector<Real> v{1,2,3,4,5};
|
||||
Real mu = boost::math::tools::mean(v.begin(), v.end());
|
||||
BOOST_TEST(abs(mu - 3) < tol);
|
||||
|
||||
// Does range call work?
|
||||
mu = boost::math::tools::mean(v);
|
||||
BOOST_TEST(abs(mu - 3) < tol);
|
||||
|
||||
// Can we successfully average only part of the vector?
|
||||
mu = boost::math::tools::mean(v.begin(), v.begin() + 3);
|
||||
BOOST_TEST(abs(mu - 2) < tol);
|
||||
|
||||
// Does it work when we const qualify?
|
||||
mu = boost::math::tools::mean(v.cbegin(), v.cend());
|
||||
BOOST_TEST(abs(mu - 3) < tol);
|
||||
|
||||
// Does it work for std::array?
|
||||
std::array<Real, 7> u{1,2,3,4,5,6,7};
|
||||
mu = boost::math::tools::mean(u.begin(), u.end());
|
||||
BOOST_TEST(abs(mu - 4) < tol);
|
||||
|
||||
// Does it work for a forward iterator?
|
||||
std::forward_list<Real> l{1,2,3,4,5,6,7};
|
||||
mu = boost::math::tools::mean(l.begin(), l.end());
|
||||
BOOST_TEST(abs(mu - 4) < tol);
|
||||
|
||||
// Does it work with ublas vectors?
|
||||
boost::numeric::ublas::vector<Real> w(7);
|
||||
for (size_t i = 0; i < w.size(); ++i)
|
||||
{
|
||||
w[i] = i+1;
|
||||
}
|
||||
mu = boost::math::tools::mean(w.cbegin(), w.cend());
|
||||
BOOST_TEST(abs(mu - 4) < tol);
|
||||
|
||||
}
|
||||
|
||||
template<class Complex>
|
||||
void test_complex_mean()
|
||||
{
|
||||
typedef typename Complex::value_type Real;
|
||||
Real tol = std::numeric_limits<Real>::epsilon();
|
||||
std::vector<Complex> v{{0,1},{0,2},{0,3},{0,4},{0,5}};
|
||||
auto mu = boost::math::tools::mean(v.begin(), v.end());
|
||||
BOOST_TEST(abs(mu.imag() - 3) < tol);
|
||||
BOOST_TEST(abs(mu.real()) < tol);
|
||||
|
||||
// Does range work?
|
||||
mu = boost::math::tools::mean(v);
|
||||
BOOST_TEST(abs(mu.imag() - 3) < tol);
|
||||
BOOST_TEST(abs(mu.real()) < tol);
|
||||
}
|
||||
|
||||
template<class Real>
|
||||
void test_mean_and_population_variance()
|
||||
{
|
||||
Real tol = std::numeric_limits<Real>::epsilon();
|
||||
std::vector<Real> v{1,1,1,1,1,1};
|
||||
auto [mu, sigma_sq] = boost::math::tools::mean_and_population_variance(v.begin(), v.end());
|
||||
BOOST_TEST(abs(mu - 1) < tol);
|
||||
BOOST_TEST(abs(sigma_sq) < tol);
|
||||
|
||||
std::vector<Real> u{1};
|
||||
auto [mu1, sigma1_sq] = boost::math::tools::mean_and_population_variance(u.cbegin(), u.cend());
|
||||
BOOST_TEST(abs(mu1 - 1) < tol);
|
||||
BOOST_TEST(abs(sigma1_sq) < tol);
|
||||
|
||||
std::array<Real, 8> w{0,1,0,1,0,1,0,1};
|
||||
auto [mu2, sigma2_sq] = boost::math::tools::mean_and_population_variance(w.begin(), w.end());
|
||||
BOOST_TEST(abs(mu2 - 1.0/2.0) < tol);
|
||||
BOOST_TEST(abs(sigma2_sq - 1.0/4.0) < tol);
|
||||
|
||||
auto [mu3, sigma3_sq] = boost::math::tools::mean_and_population_variance(w);
|
||||
BOOST_TEST(abs(mu3 - 1.0/2.0) < tol);
|
||||
BOOST_TEST(abs(sigma3_sq - 1.0/4.0) < tol);
|
||||
|
||||
}
|
||||
|
||||
template<class Z>
|
||||
void test_integer_mean_and_population_variance()
|
||||
{
|
||||
double tol = std::numeric_limits<double>::epsilon();
|
||||
std::vector<Z> v{1,1,1,1,1,1};
|
||||
auto [mu, sigma_sq] = boost::math::tools::mean_and_population_variance(v);
|
||||
BOOST_TEST(abs(mu - 1) < tol);
|
||||
BOOST_TEST(abs(sigma_sq) < tol);
|
||||
}
|
||||
|
||||
template<class Real>
|
||||
void test_median()
|
||||
{
|
||||
std::mt19937 g(12);
|
||||
std::vector<Real> v{1,2,3,4,5,6,7};
|
||||
|
||||
Real m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 4);
|
||||
|
||||
std::shuffle(v.begin(), v.end(), g);
|
||||
// Does range call work?
|
||||
m = boost::math::tools::median(v);
|
||||
BOOST_TEST_EQ(m, 4);
|
||||
|
||||
v = {1,2,3,3,4,5};
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 3);
|
||||
std::shuffle(v.begin(), v.end(), g);
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 3);
|
||||
|
||||
v = {1};
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 1);
|
||||
|
||||
v = {1,1};
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 1);
|
||||
|
||||
v = {2,4};
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 3);
|
||||
|
||||
v = {1,1,1};
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 1);
|
||||
|
||||
v = {1,2,3};
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 2);
|
||||
std::shuffle(v.begin(), v.end(), g);
|
||||
m = boost::math::tools::median(v.begin(), v.end());
|
||||
BOOST_TEST_EQ(m, 2);
|
||||
|
||||
// Does it work with std::array?
|
||||
std::array<Real, 3> w{1,2,3};
|
||||
m = boost::math::tools::median(w);
|
||||
BOOST_TEST_EQ(m, 2);
|
||||
}
|
||||
|
||||
template<class Real>
|
||||
void test_gini_coefficient()
|
||||
{
|
||||
Real tol = std::numeric_limits<Real>::epsilon();
|
||||
std::vector<Real> v{1,0,0};
|
||||
Real gini = boost::math::tools::gini_coefficient(v.begin(), v.end());
|
||||
BOOST_TEST(abs(gini - 1) < tol);
|
||||
|
||||
gini = boost::math::tools::gini_coefficient(v);
|
||||
BOOST_TEST(abs(gini - 1) < tol);
|
||||
|
||||
v[0] = 1;
|
||||
v[1] = 1;
|
||||
v[2] = 1;
|
||||
gini = boost::math::tools::gini_coefficient(v.begin(), v.end());
|
||||
BOOST_TEST(abs(gini) < tol);
|
||||
|
||||
v[0] = 0;
|
||||
v[1] = 0;
|
||||
v[2] = 0;
|
||||
gini = boost::math::tools::gini_coefficient(v.begin(), v.end());
|
||||
BOOST_TEST(abs(gini) < tol);
|
||||
|
||||
std::array<Real, 3> w{0,0,0};
|
||||
gini = boost::math::tools::gini_coefficient(w);
|
||||
BOOST_TEST(abs(gini) < tol);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test_integer_mean<uint8_t>();
|
||||
test_integer_mean<int8_t>();
|
||||
test_integer_mean<int>();
|
||||
|
||||
test_mean<float>();
|
||||
test_mean<double>();
|
||||
test_mean<long double>();
|
||||
test_mean<cpp_bin_float_50>();
|
||||
|
||||
test_complex_mean<std::complex<float>>();
|
||||
test_complex_mean<cpp_complex_50>();
|
||||
|
||||
test_mean_and_population_variance<float>();
|
||||
test_mean_and_population_variance<double>();
|
||||
test_mean_and_population_variance<long double>();
|
||||
test_mean_and_population_variance<cpp_bin_float_50>();
|
||||
|
||||
test_integer_mean_and_population_variance<int>();
|
||||
|
||||
test_median<float>();
|
||||
test_median<double>();
|
||||
test_median<long double>();
|
||||
test_median<cpp_bin_float_50>();
|
||||
|
||||
test_gini_coefficient<float>();
|
||||
test_gini_coefficient<double>();
|
||||
test_gini_coefficient<long double>();
|
||||
test_gini_coefficient<cpp_bin_float_50>();
|
||||
|
||||
return boost::report_errors();
|
||||
}
|
||||
Reference in New Issue
Block a user