mirror of
https://github.com/boostorg/math.git
synced 2026-01-19 04:22:09 +00:00
Computation of covariance. [CI SKIP]
This commit is contained in:
@@ -554,6 +554,7 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22.
|
||||
|
||||
[mathpart vector_functionals Vector Functionals]
|
||||
[include vector_functionals/univariate_statistics.qbk]
|
||||
[include vector_functionals/bivariate_statistics.qbk]
|
||||
[include vector_functionals/signal_statistics.qbk]
|
||||
[include vector_functionals/norms.qbk]
|
||||
[endmathpart] [/section:vector_functionals Vector Functionals]
|
||||
|
||||
54
doc/vector_functionals/bivariate_statistics.qbk
Normal file
54
doc/vector_functionals/bivariate_statistics.qbk
Normal file
@@ -0,0 +1,54 @@
|
||||
[/
|
||||
Copyright 2018 Nick Thompson
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0.
|
||||
(See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt).
|
||||
]
|
||||
|
||||
[section:bivariate_statistics Bivariate Statistics]
|
||||
|
||||
[heading Synopsis]
|
||||
|
||||
``
|
||||
#include <boost/math/tools/bivariate_statistics.hpp>
|
||||
|
||||
namespace boost{ namespace math{ namespace tools {
|
||||
|
||||
template<class Container>
|
||||
auto population_covariance(Container const & u, Container const & v);
|
||||
|
||||
template<class Container>
|
||||
auto means_and_population_covariance(Container const & u, Container const & v);
|
||||
|
||||
}}}
|
||||
``
|
||||
|
||||
[heading Description]
|
||||
|
||||
This file provides functions for computing bivariate statistics.
|
||||
|
||||
[heading Population Covariance]
|
||||
|
||||
std::vector<double> u{1,2,3,4,5};
|
||||
std::vector<double> v{1,2,3,4,5};
|
||||
double cov_uv = boost::math::tools::population_covariance(u, v);
|
||||
|
||||
The implementation follows [@https://doi.org/10.1109/CLUSTR.2009.5289161 Bennet et al].
|
||||
The data is not modified and must be forward iterable.
|
||||
Works with real-valued inputs and does not work with complex-valued inputs.
|
||||
|
||||
The algorithm used herein simultaneously generates the mean values of the input data /u/ and /v/.
|
||||
For certain applications, it might be useful to get them in a single pass.
|
||||
As such, we provide `means_and_population_covariance`:
|
||||
|
||||
std::vector<double> u{1,2,3,4,5};
|
||||
std::vector<double> v{1,2,3,4,5};
|
||||
auto [mu_u, mu_v, cov_uv] = boost::math::tools::means_and_population_covariance(u, v);
|
||||
|
||||
[heading References]
|
||||
|
||||
* Bennett, Janine, et al. ['Numerically stable, single-pass, parallel statistics algorithms.] Cluster Computing and Workshops, 2009. CLUSTER'09. IEEE International Conference on. IEEE, 2009.
|
||||
|
||||
[endsect]
|
||||
[/section:bivariate_statistics Bivariate Statistics]
|
||||
53
include/boost/math/tools/bivariate_statistics.hpp
Normal file
53
include/boost/math/tools/bivariate_statistics.hpp
Normal file
@@ -0,0 +1,53 @@
|
||||
// (C) Copyright Nick Thompson 2018.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#ifndef BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
|
||||
#define BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
|
||||
|
||||
#include <iterator>
|
||||
#include <tuple>
|
||||
#include <boost/type_traits.hpp>
|
||||
#include <boost/assert.hpp>
|
||||
#include <boost/multiprecision/detail/number_base.hpp>
|
||||
|
||||
|
||||
namespace boost{ namespace math{ namespace tools {
|
||||
|
||||
template<class Container>
|
||||
auto
|
||||
means_and_population_covariance(Container const & u, Container const & v)
|
||||
{
|
||||
using Real = typename Container::value_type;
|
||||
using std::size;
|
||||
BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance.");
|
||||
BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least one sample.");
|
||||
|
||||
// See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al.
|
||||
Real cov = 0;
|
||||
Real mu_u = u[0];
|
||||
Real mu_v = v[0];
|
||||
|
||||
for(size_t i = 1; i < size(u); ++i)
|
||||
{
|
||||
Real u_tmp = (u[i] - mu_u)/(i+1);
|
||||
Real v_tmp = v[i] - mu_v;
|
||||
cov += i*u_tmp*v_tmp;
|
||||
mu_u = mu_u + u_tmp;
|
||||
mu_v = mu_v + v_tmp/(i+1);
|
||||
}
|
||||
|
||||
return std::make_tuple(mu_u, mu_v, cov/size(u));
|
||||
}
|
||||
|
||||
template<class Container>
|
||||
auto
|
||||
population_covariance(Container const & u, Container const & v)
|
||||
{
|
||||
auto [mu_u, mu_v, cov] = boost::math::tools::means_and_population_covariance(u, v);
|
||||
return cov;
|
||||
}
|
||||
|
||||
}}}
|
||||
#endif
|
||||
@@ -905,6 +905,7 @@ test-suite misc :
|
||||
[ run univariate_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ]
|
||||
[ run norms_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ]
|
||||
[ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr ] ]
|
||||
[ run bivariate_statistics_test.cpp : : : [ requires cxx17_if_constexpr ] ]
|
||||
[ run test_real_concept.cpp ../../test/build//boost_unit_test_framework ]
|
||||
[ run test_remez.cpp pch ../../test/build//boost_unit_test_framework ]
|
||||
[ run test_roots.cpp pch ../../test/build//boost_unit_test_framework ]
|
||||
|
||||
121
test/bivariate_statistics_test.cpp
Normal file
121
test/bivariate_statistics_test.cpp
Normal file
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* (C) Copyright Nick Thompson 2018.
|
||||
* Use, modification and distribution are subject to the
|
||||
* Boost Software License, Version 1.0. (See accompanying file
|
||||
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <forward_list>
|
||||
#include <algorithm>
|
||||
#include <random>
|
||||
#include <boost/core/lightweight_test.hpp>
|
||||
#include <boost/numeric/ublas/vector.hpp>
|
||||
#include <boost/math/constants/constants.hpp>
|
||||
#include <boost/math/tools/univariate_statistics.hpp>
|
||||
#include <boost/math/tools/bivariate_statistics.hpp>
|
||||
#include <boost/multiprecision/cpp_bin_float.hpp>
|
||||
#include <boost/multiprecision/cpp_complex.hpp>
|
||||
|
||||
using boost::multiprecision::cpp_bin_float_50;
|
||||
using boost::multiprecision::cpp_complex_50;
|
||||
|
||||
/*
|
||||
* Test checklist:
|
||||
* 1) Does it work with multiprecision?
|
||||
* 2) Does it work with .cbegin()/.cend() if the data is not altered?
|
||||
* 3) Does it work with ublas and std::array? (Checking Eigen and Armadillo will make the CI system really unhappy.)
|
||||
* 4) Does it work with std::forward_list if a forward iterator is all that is required?
|
||||
* 5) Does it work with complex data if complex data is sensible?
|
||||
*/
|
||||
|
||||
using boost::math::tools::means_and_population_covariance;
|
||||
using boost::math::tools::population_covariance;
|
||||
|
||||
template<class Real>
|
||||
void test_covariance()
|
||||
{
|
||||
std::cout << std::setprecision(std::numeric_limits<Real>::digits10+1);
|
||||
Real tol = std::numeric_limits<Real>::epsilon();
|
||||
using std::abs;
|
||||
|
||||
// Covariance of a single thing is zero:
|
||||
std::array<Real, 1> u1{8};
|
||||
std::array<Real, 1> v1{17};
|
||||
auto [mu_u1, mu_v1, cov1] = means_and_population_covariance(u1, v1);
|
||||
|
||||
BOOST_TEST(abs(cov1) < tol);
|
||||
BOOST_TEST(abs(mu_u1 - 8) < tol);
|
||||
BOOST_TEST(abs(mu_v1 - 17) < tol);
|
||||
|
||||
|
||||
std::array<Real, 2> u2{8, 4};
|
||||
std::array<Real, 2> v2{3, 7};
|
||||
auto [mu_u2, mu_v2, cov2] = means_and_population_covariance(u2, v2);
|
||||
|
||||
BOOST_TEST(abs(cov2+4) < tol);
|
||||
BOOST_TEST(abs(mu_u2 - 6) < tol);
|
||||
BOOST_TEST(abs(mu_v2 - 5) < tol);
|
||||
|
||||
std::vector<Real> u3{1,2,3};
|
||||
std::vector<Real> v3{1,1,1};
|
||||
|
||||
auto [mu_u3, mu_v3, cov3] = means_and_population_covariance(u3,v3);
|
||||
|
||||
// Since v is constant, covariance(u,v) = 0 against everything any u:
|
||||
BOOST_TEST(abs(cov3) < tol);
|
||||
BOOST_TEST(abs(mu_u3 - 2) < tol);
|
||||
BOOST_TEST(abs(mu_v3 - 1) < tol);
|
||||
// Make sure we pull the correct symbol out of means_and_populaton_covariance:
|
||||
cov3 = population_covariance(u3, v3);
|
||||
BOOST_TEST(abs(cov3) < tol);
|
||||
|
||||
cov3 = population_covariance(v3, u3);
|
||||
// Covariance is symmetric: cov(u,v) = cov(v,u)
|
||||
BOOST_TEST(abs(cov3) < tol);
|
||||
|
||||
// cov(u,u) = sigma(u)^2:
|
||||
cov3 = population_covariance(u3, u3);
|
||||
Real expected = Real(2)/Real(3);
|
||||
|
||||
BOOST_TEST(abs(cov3 - expected) < tol);
|
||||
|
||||
std::mt19937 gen(15);
|
||||
// Can't template standard library on multiprecision, so use double and cast back:
|
||||
std::uniform_real_distribution<double> dis(-1.0, 1.0);
|
||||
std::vector<Real> u(500);
|
||||
std::vector<Real> v(500);
|
||||
for(size_t i = 0; i < u.size(); ++i) {
|
||||
u[i] = (Real) dis(gen);
|
||||
v[i] = (Real) dis(gen);
|
||||
}
|
||||
|
||||
auto [mu_u, sigma_u_sq] = boost::math::tools::mean_and_population_variance(u);
|
||||
auto [mu_v, sigma_v_sq] = boost::math::tools::mean_and_population_variance(v);
|
||||
|
||||
auto [mu_u_, mu_v_, cov_uv] = means_and_population_covariance(u, v);
|
||||
BOOST_TEST(abs(mu_u - mu_u_) < tol);
|
||||
BOOST_TEST(abs(mu_v - mu_v_) < tol);
|
||||
|
||||
// Cauchy-Schwartz inequality:
|
||||
BOOST_TEST(cov_uv*cov_uv <= sigma_u_sq*sigma_v_sq);
|
||||
// cov(X, X) = sigma(X)^2:
|
||||
Real cov_uu = population_covariance(u, u);
|
||||
BOOST_TEST(abs(cov_uu - sigma_u_sq) < tol);
|
||||
Real cov_vv = population_covariance(v, v);
|
||||
BOOST_TEST(abs(cov_vv - sigma_v_sq) < tol);
|
||||
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test_covariance<float>();
|
||||
test_covariance<double>();
|
||||
test_covariance<long double>();
|
||||
test_covariance<cpp_bin_float_50>();
|
||||
|
||||
return boost::report_errors();
|
||||
}
|
||||
Reference in New Issue
Block a user