2
0
mirror of https://github.com/boostorg/math.git synced 2026-01-19 04:22:09 +00:00

Commit working version before changing to readable code.

This commit is contained in:
NAThompson
2019-12-31 12:29:05 -05:00
parent f943649e68
commit ff528a8f1d
3 changed files with 135 additions and 10 deletions

View File

@@ -72,6 +72,12 @@ namespace boost{ namespace math{ namespace statistics {
template<class RandomAccessContainer>
auto median_absolute_deviation(RandomAccessContainer v, typename RandomAccessContainer::value_type center=std::numeric_limits<Real>::quiet_NaN());
template<class RandomAccessIterator>
auto interquartile_range(ForwardIterator first, ForwardIterator last);
template<class RandomAccessContainer>
auto interquartile_range(RandomAccessContainer v);
template<class Container>
auto gini_coefficient(Container & c);
@@ -210,6 +216,19 @@ use the following:
/Nota bene:/ The input vector is modified.
Again the vector is passed into a call to [@https://en.cppreference.com/w/cpp/algorithm/nth_element `nth_element`].
[heading Interquartile Range]
Computes the [@https://en.wikipedia.org/wiki/Interquartile_range interquartile range] of a dataset:
std::vector<double> v{1,2,3,4,5};
double iqr = boost::math::statistics::interquartile_range(v);
// Q1 = 1.5, Q3 = 4.5 => iqr = 3
For a vector of length /2n+1/ or /2n/, the first quartile /Q/[sub 1] is the median of the /n/ smallest values,
and the third quartile /Q/[sub 3] is the median of the /n/ largest values.
The interquartile range is then /Q/[sub 3] - /Q/[sub 1].
The function `interquartile_range`, like the `median`, calls into `std::nth_element`, and hence partially sorts the data.
[heading Gini Coefficient]
Compute the Gini coefficient of a dataset:

View File

@@ -469,13 +469,33 @@ template<class ForwardIterator>
auto interquartile_range(ForwardIterator first, ForwardIterator last)
{
using Real = typename std::iterator_traits<ForwardIterator>::value_type;
BOOST_ASSERT_MSG(std::distance(first, last) >= 4, "At least 4 samples are required to compute the interquartile range.");
std::nth_element(first, first + (last-first)/4, last);
Real Q1 = *(first + (last-first)/4);
auto q1 = first + (last-first)/4;
std::nth_element(q1, q1 + (last-q1)/2, last);
Real Q3 = *(first + 3*(last-first)/4);
return Q3 - Q1;
auto m = std::distance(first,last);
BOOST_ASSERT_MSG(m >= 3, "At least 3 samples are required to compute the interquartile range.");
auto k = m/4;
auto j = m - (4*k);
if ( (m/2) & 1 ) {
auto q1 = first + (last-first)/4;
auto q3 = first + 3*(last-first)/4;
std::nth_element(first, q1, last);
Real Q1 = *q1;
std::nth_element(q1, q3, last);
Real Q3 = *q3;
return Q3 - Q1;
} else {
auto q1 = first + m/4 - 1;
auto q3 = first + 3*m/4 - 1 + j;
std::nth_element(first, q1, last);
Real a = *q1;
std::nth_element(q1, q1 + 1, last);
Real b = *(q1 + 1);
Real Q1 = (a+b)/2;
std::nth_element(q1, q3, last);
a = *q3;
std::nth_element(q3, q3 + 1, last);
b = *(q3 + 1);
Real Q3 = (a+b)/2;
return Q3 - Q1;
}
}
template<class RandomAccessContainer>

View File

@@ -726,15 +726,16 @@ void test_integer_gini_coefficient()
template<typename Real>
void test_interquartile_range()
{
std::mt19937 gen(486);
Real iqr;
// Taken from Wikipedia's example:
std::vector<Real> v{7, 7, 31, 31, 47, 75, 87, 115, 116, 119, 119, 155, 177};
// Q1 = 31, Q3 = 119, Q3 - Q1 = 88.
Real iqr = boost::math::statistics::interquartile_range(v);
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 88);
std::mt19937 gen(486);
std::shuffle(v.begin(), v.end(), gen);
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 88);
@@ -747,6 +748,90 @@ void test_interquartile_range()
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 0);
v = {1,2,3};
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 2);
std::shuffle(v.begin(), v.end(), gen);
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 2);
v = {0, 3, 5};
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 5);
std::shuffle(v.begin(), v.end(), gen);
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 5);
v = {1,2,3,4};
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 2);
std::shuffle(v.begin(), v.end(), gen);
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 2);
v = {1,2,3,4,5};
// Q1 = 1.5, Q3 = 4.5
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 3);
std::shuffle(v.begin(), v.end(), gen);
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 3);
v = {1,2,3,4,5,6};
// Q1 = 2, Q3 = 5
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 3);
std::shuffle(v.begin(), v.end(), gen);
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 3);
v = {1,2,3, 4, 5,6,7};
// Q1 = 2, Q3 = 6
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 4);
std::shuffle(v.begin(), v.end(), gen);
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 4);
v = {1,2,3,4,5,6,7,8};
// Q1 = 2.5, Q3 = 6.5
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 4);
std::shuffle(v.begin(), v.end(), gen);
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 4);
v = {1,2,3,4,5,6,7,8,9};
// Q1 = 2.5, Q3 = 7.5
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 5);
std::shuffle(v.begin(), v.end(), gen);
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 5);
v = {1,2,3,4,5,6,7,8,9,10};
// Q1 = 3, Q3 = 8
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 5);
std::shuffle(v.begin(), v.end(), gen);
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 5);
v = {1,2,3,4,5,6,7,8,9,10,11};
// Q1 = 3, Q3 = 9
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 6);
std::shuffle(v.begin(), v.end(), gen);
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 6);
v = {1,2,3,4,5,6,7,8,9,10,11,12};
// Q1 = 3.5, Q3 = 9.5
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 6);
std::shuffle(v.begin(), v.end(), gen);
iqr = boost::math::statistics::interquartile_range(v);
BOOST_TEST_EQ(iqr, 6);
}
@@ -818,5 +903,6 @@ int main()
test_sample_gini_coefficient<cpp_bin_float_50>();
test_interquartile_range<double>();
test_interquartile_range<cpp_bin_float_50>();
return boost::report_errors();
}