mirror of
https://github.com/boostorg/math.git
synced 2026-01-19 04:22:09 +00:00
Empirical Cumulative Distribution function
This commit is contained in:
@@ -37,6 +37,7 @@
|
|||||||
[include triangular.qbk]
|
[include triangular.qbk]
|
||||||
[include uniform.qbk]
|
[include uniform.qbk]
|
||||||
[include weibull.qbk]
|
[include weibull.qbk]
|
||||||
|
[include empirical_cdf.qbk]
|
||||||
|
|
||||||
[endsect] [/section:dists Distributions]
|
[endsect] [/section:dists Distributions]
|
||||||
|
|
||||||
@@ -138,10 +139,3 @@ opportunity to integrate the statistical tests with this framework at some later
|
|||||||
(See accompanying file LICENSE_1_0.txt or copy at
|
(See accompanying file LICENSE_1_0.txt or copy at
|
||||||
http://www.boost.org/LICENSE_1_0.txt).
|
http://www.boost.org/LICENSE_1_0.txt).
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
71
doc/distributions/empirical_cdf.qbk
Normal file
71
doc/distributions/empirical_cdf.qbk
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
[/
|
||||||
|
Copyright (c) 2019 Nick Thompson
|
||||||
|
Use, modification and distribution are subject to the
|
||||||
|
Boost Software License, Version 1.0. (See accompanying file
|
||||||
|
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
]
|
||||||
|
|
||||||
|
[section:empirical_cdf Empirical Cumulative Distribution Function]
|
||||||
|
|
||||||
|
[heading Synopsis]
|
||||||
|
|
||||||
|
```
|
||||||
|
#include <boost/math/distributions/empirical_cumulative_distribution_function.hpp>
|
||||||
|
|
||||||
|
namespace boost{ namespace math{
|
||||||
|
|
||||||
|
template <class RandomAccessContainer>
|
||||||
|
class empirical_cumulative_distribution_function
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using Real = typename RandomAccessContainer::value_type;
|
||||||
|
empirical_cumulative_distribution_function(RandomAccessContainer && v);
|
||||||
|
|
||||||
|
auto operator()(Real t) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
}}
|
||||||
|
```
|
||||||
|
|
||||||
|
[heading Empirical Cumulative Distribution Function]
|
||||||
|
|
||||||
|
The empirical cumulative distribution function is a step function constructed from observed data which converges to the true cumulative distribution function in the limit of infinite data.
|
||||||
|
This function is a basic building block of hypothesis testing workflows that attempt to answer the question "does my data come from a given distribution?"
|
||||||
|
These tests require computing quadratures over some function of the empirical CDF and the supposed CDF to create a distance measurement, and hence it is occasionally useful to construct a continuous callable from the data.
|
||||||
|
|
||||||
|
An example usage is demonstrated below:
|
||||||
|
|
||||||
|
```
|
||||||
|
#include <vector>
|
||||||
|
#include <random>
|
||||||
|
#include <boost/math/distributions/empirical_cumulative_distribution_function.hpp>
|
||||||
|
using boost::math::empirical_cumulative_distribution_function;
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 gen{rd()};
|
||||||
|
std::normal_distribution<double> dis(0, 1);
|
||||||
|
size_t n = 128;
|
||||||
|
std::vector<double> v(n);
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
v[i] = dis(gen);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ecdf = empirical_cumulative_distribution_function(std::move(v));
|
||||||
|
std::cout << "ecdf(0.0) = " << ecdf(0.0) << "\n";
|
||||||
|
// should print approximately 0.5 . . .
|
||||||
|
```
|
||||||
|
|
||||||
|
The empirical distribution function operates on sorted data.
|
||||||
|
If the data are not already sorted, the constructor sorts it for you at O(Nlog(N)) cost.
|
||||||
|
|
||||||
|
Call operator complexity is O(log(N)).
|
||||||
|
|
||||||
|
Works with both integer and floating point types.
|
||||||
|
If the input data consists of integers, the output of the call operator is a double. Requires C++17.
|
||||||
|
|
||||||
|
[$../graphs/empiricial_cumulative_distribution_gauss.svg]
|
||||||
|
|
||||||
|
[$../graphs/empiricial_cumulative_distribution_uniform.svg]
|
||||||
|
|
||||||
|
|
||||||
|
[endsect]
|
||||||
|
[/section:empirical_cdf]
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
<svg xmlns='http://www.w3.org/2000/svg' width='1100' height='679'>
|
<svg xmlns='http://www.w3.org/2000/svg' width='1100' height='679'>
|
||||||
<style>svg { background-color: black; }
|
<style>svg { background-color: black; }
|
||||||
</style>
|
</style>
|
||||||
<text x='550' y='20' font-family='Palatino' font-size='25' fill='white' alignment-baseline='middle' text-anchor='middle'>Empirical (blue) and continuous CDF (orange) of an 𝓝(0,1) Gaussian distribution for n = 128 samples</text>
|
<text x='550' y='20' font-family='Palatino' font-size='25' fill='white' alignment-baseline='middle' text-anchor='middle'>Empirical (blue) and continuous CDF (orange) of an 𝓝(0,1) distribution on 128 samples</text>
|
||||||
<g transform='translate(25, 40)'>
|
<g transform='translate(25, 40)'>
|
||||||
<line x1='0' y1='0' x2='0' y2='619' stroke='gray' stroke-width='1' />
|
<line x1='0' y1='0' x2='0' y2='619' stroke='gray' stroke-width='1' />
|
||||||
<line x1='0' y1='619' x2='1055' y2='619' stroke='gray' stroke-width='1' />
|
<line x1='0' y1='619' x2='1055' y2='619' stroke='gray' stroke-width='1' />
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 29 KiB After Width: | Height: | Size: 29 KiB |
@@ -2,7 +2,7 @@
|
|||||||
<svg xmlns='http://www.w3.org/2000/svg' width='1100' height='679'>
|
<svg xmlns='http://www.w3.org/2000/svg' width='1100' height='679'>
|
||||||
<style>svg { background-color: black; }
|
<style>svg { background-color: black; }
|
||||||
</style>
|
</style>
|
||||||
<text x='550' y='20' font-family='Palatino' font-size='25' fill='white' alignment-baseline='middle' text-anchor='middle'>Empirical (blue) and theoretical CDF (orange) of an dice roll distribution on n = 128 samples</text>
|
<text x='550' y='20' font-family='Palatino' font-size='25' fill='white' alignment-baseline='middle' text-anchor='middle'>Empirical (blue) and theoretical CDF (orange) of the dice roll distribution on n = 128 samples</text>
|
||||||
<g transform='translate(25, 40)'>
|
<g transform='translate(25, 40)'>
|
||||||
<line x1='0' y1='0' x2='0' y2='619' stroke='gray' stroke-width='1' />
|
<line x1='0' y1='0' x2='0' y2='619' stroke='gray' stroke-width='1' />
|
||||||
<line x1='0' y1='619' x2='1055' y2='619' stroke='gray' stroke-width='1' />
|
<line x1='0' y1='619' x2='1055' y2='619' stroke='gray' stroke-width='1' />
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 29 KiB After Width: | Height: | Size: 29 KiB |
@@ -6,6 +6,7 @@
|
|||||||
#ifndef BOOST_MATH_DISTRIBUTIONS_EMPIRICAL_CUMULATIVE_DISTRIBUTION_FUNCTION_HPP
|
#ifndef BOOST_MATH_DISTRIBUTIONS_EMPIRICAL_CUMULATIVE_DISTRIBUTION_FUNCTION_HPP
|
||||||
#define BOOST_MATH_DISTRIBUTIONS_EMPIRICAL_CUMULATIVE_DISTRIBUTION_FUNCTION_HPP
|
#define BOOST_MATH_DISTRIBUTIONS_EMPIRICAL_CUMULATIVE_DISTRIBUTION_FUNCTION_HPP
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
|
|
||||||
namespace boost { namespace math{
|
namespace boost { namespace math{
|
||||||
|
|
||||||
@@ -22,7 +23,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto operator()(Real x) const {
|
auto operator()(Real x) const {
|
||||||
if constexpr (std::is_integral_v<Real>) {
|
if constexpr (std::is_integral_v<Real>)
|
||||||
|
{
|
||||||
if (x < m_v[0]) {
|
if (x < m_v[0]) {
|
||||||
return double(0);
|
return double(0);
|
||||||
}
|
}
|
||||||
@@ -32,15 +34,16 @@ public:
|
|||||||
auto it = std::upper_bound(m_v.begin(), m_v.end(), x);
|
auto it = std::upper_bound(m_v.begin(), m_v.end(), x);
|
||||||
return static_cast<double>(std::distance(m_v.begin(), it))/static_cast<double>(m_v.size());
|
return static_cast<double>(std::distance(m_v.begin(), it))/static_cast<double>(m_v.size());
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
if (x < m_v[0]) {
|
{
|
||||||
return Real(0);
|
if (x < m_v[0]) {
|
||||||
}
|
return Real(0);
|
||||||
if (x >= m_v[m_v.size()-1]) {
|
}
|
||||||
return Real(1);
|
if (x >= m_v[m_v.size()-1]) {
|
||||||
}
|
return Real(1);
|
||||||
auto it = std::upper_bound(m_v.begin(), m_v.end(), x);
|
}
|
||||||
return static_cast<Real>(std::distance(m_v.begin(), it))/static_cast<Real>(m_v.size());
|
auto it = std::upper_bound(m_v.begin(), m_v.end(), x);
|
||||||
|
return static_cast<Real>(std::distance(m_v.begin(), it))/static_cast<Real>(m_v.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -951,6 +951,7 @@ test-suite misc :
|
|||||||
[ run compile_test/catmull_rom_concept_test.cpp compile_test_main : : : [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] ]
|
[ run compile_test/catmull_rom_concept_test.cpp compile_test_main : : : [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] ]
|
||||||
[ run ooura_fourier_integral_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
|
[ run ooura_fourier_integral_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
|
||||||
[ run univariate_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
|
[ run univariate_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
|
||||||
|
[ run empirical_cumulative_distribution_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
|
||||||
[ run norms_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
|
[ run norms_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
|
||||||
[ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
|
[ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
|
||||||
[ run bivariate_statistics_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
|
[ run bivariate_statistics_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
|
||||||
|
|||||||
@@ -16,18 +16,54 @@
|
|||||||
using boost::multiprecision::float128;
|
using boost::multiprecision::float128;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
using boost::math::distributions::empirical_cumulative_distribution_function;
|
using boost::math::empirical_cumulative_distribution_function;
|
||||||
|
|
||||||
|
template<class Z>
|
||||||
|
void test_uniform_z()
|
||||||
|
{
|
||||||
|
std::vector<Z> v{6,3,4,1,2,5};
|
||||||
|
|
||||||
|
auto ecdf = empirical_cumulative_distribution_function(std::move(v));
|
||||||
|
|
||||||
|
CHECK_ULP_CLOSE(1.0/6.0, ecdf(1), 1);
|
||||||
|
CHECK_ULP_CLOSE(2.0/6.0, ecdf(2), 1);
|
||||||
|
CHECK_ULP_CLOSE(3.0/6.0, ecdf(3), 1);
|
||||||
|
CHECK_ULP_CLOSE(4.0/6.0, ecdf(4), 1);
|
||||||
|
CHECK_ULP_CLOSE(5.0/6.0, ecdf(5), 1);
|
||||||
|
CHECK_ULP_CLOSE(6.0/6.0, ecdf(6), 1);
|
||||||
|
|
||||||
|
// Less trivial:
|
||||||
|
|
||||||
|
v = {6,3,4,1,1,1,2,4};
|
||||||
|
ecdf = empirical_cumulative_distribution_function(std::move(v));
|
||||||
|
CHECK_ULP_CLOSE(3.0/8.0, ecdf(1), 1);
|
||||||
|
CHECK_ULP_CLOSE(4.0/8.0, ecdf(2), 1);
|
||||||
|
CHECK_ULP_CLOSE(5.0/8.0, ecdf(3), 1);
|
||||||
|
CHECK_ULP_CLOSE(7.0/8.0, ecdf(4), 1);
|
||||||
|
CHECK_ULP_CLOSE(7.0/8.0, ecdf(5), 1);
|
||||||
|
CHECK_ULP_CLOSE(8.0/8.0, ecdf(6), 1);
|
||||||
|
}
|
||||||
|
|
||||||
template<class Real>
|
template<class Real>
|
||||||
void test_uniform()
|
void test_uniform()
|
||||||
{
|
{
|
||||||
|
size_t n = 128;
|
||||||
|
std::vector<Real> v(n);
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
v[i] = Real(i+1)/Real(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ecdf = empirical_cumulative_distribution_function(std::move(v));
|
||||||
|
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
CHECK_ULP_CLOSE(Real(i+1)/Real(n), ecdf(Real(i+1)/Real(n)), 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
|
test_uniform_z<int>();
|
||||||
test_uniform<float>();
|
test_uniform<double>();
|
||||||
return boost::math::test::report_errors();
|
return boost::math::test::report_errors();
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user