mirror of
https://github.com/boostorg/math.git
synced 2026-01-19 04:22:09 +00:00
Implemented mode and associated tests (#390)
* Implemented mode and associated tests * Clarity and complexity changes * Added google benchmark * Small changes to mode. More tests. * Seperated into sorted and non-sorted functions * Fixed data types and removed copying * Fixed bounds checking * Additional tests and cleanup. * Added tests for std::list and std::forward_list * Small testing changes and documentation * Added modes memory allocation and faster insertion * Changed return type, and modified tests * Removed copied iterator and edge cases. * Documentation fixes
This commit is contained in:
@@ -90,6 +90,17 @@ namespace boost{ namespace math{ namespace statistics {
|
||||
template<class ForwardIterator>
|
||||
auto sample_gini_coefficient(ForwardIterator first, ForwardIterator last);
|
||||
|
||||
template<class ForwardIterator, class OutputIterator>
|
||||
auto sorted_mode(ForwardIterator first, ForwardIterator last, OutputIterator output) -> decltype(output)
|
||||
|
||||
template<class Container, class OutputIterator>
|
||||
inline auto sorted_mode(Container & v, OutputIterator output) -> decltype(output)
|
||||
|
||||
template<class RandomAccessIterator, class OutputIterator>
|
||||
auto mode(RandomAccessIterator first, RandomAccessIterator last, OutputIterator output) -> decltype(output)
|
||||
|
||||
template<class RandomAccessContainer, class OutputIterator>
|
||||
inline auto mode(RandomAccessContainer & v, OutputIterator output) -> decltype(output)
|
||||
}}}
|
||||
``
|
||||
|
||||
@@ -252,6 +263,38 @@ You should have /very/ good cause to pass negative values to the Gini coefficien
|
||||
Another use case is found in signal processing, but the sorting is by magnitude and hence has a different implementation.
|
||||
See `absolute_gini_coefficient` for details.
|
||||
|
||||
[heading Mode]
|
||||
|
||||
Compute the mode(s) of a data set:
|
||||
|
||||
std::vector<int> v {1, 3, 2, 2, 5, 4};
|
||||
std::vector<int> modes;
|
||||
boost::math::statistics::mode(v, std::back_inserter(modes));
|
||||
// Mode is 2, modes.size() == 1
|
||||
std::deque<int> d_modes;
|
||||
std::array<int, 7> w {2, 2, 3, 1, 5, 4, 4};
|
||||
boost::math::statistics::mode(w, std::back_inserter(d_modes));
|
||||
// Modes are 2 and 4, d_modes.size() == 2
|
||||
|
||||
/Nota bene/: The input data is altered: in particular, it is sorted. Makes a call to `std::sort`, and as such requires random access iterators.
|
||||
|
||||
If your data is sorted, the following function can be used instead:
|
||||
|
||||
std::vector<int> v {1, 2, 2, 3, 4, 5};
|
||||
std::vector<int> modes;
|
||||
boost::math::statistics::sorted_mode(v, std::back_inserter(modes));
|
||||
// Mode is 2, modes.size() == 1
|
||||
std::deque<int> d_modes;
|
||||
std::array<int, 7> w {1, 2, 2, 3, 4, 4, 5};
|
||||
boost::math::statistics::sorted_mode(w, std::back_inserter(d_modes));
|
||||
// Modes are 2 and 4, d_modes.size() == 2
|
||||
|
||||
/Nota bene/: The requirements for sorted_mode are reduced to forward iterators because there is no call to `std::sort`.
|
||||
|
||||
/Nota bene/: Passing unsorted data to sorted_mode is a bug.
|
||||
|
||||
For both mode, and sorted_mode the dataset must be of an integer type.
|
||||
|
||||
[heading References]
|
||||
|
||||
* Higham, Nicholas J. ['Accuracy and stability of numerical algorithms.] Vol. 80. Siam, 2002.
|
||||
|
||||
@@ -512,6 +512,63 @@ inline auto interquartile_range(RandomAccessContainer & v)
|
||||
return interquartile_range(v.begin(), v.end());
|
||||
}
|
||||
|
||||
template<class ForwardIterator, class OutputIterator>
|
||||
auto sorted_mode(ForwardIterator first, ForwardIterator last, OutputIterator output) -> decltype(output)
|
||||
{
|
||||
using Z = typename std::iterator_traits<ForwardIterator>::value_type;
|
||||
static_assert(std::is_integral<Z>::value, "Floating point values have not yet been implemented.");
|
||||
using Size = typename std::iterator_traits<ForwardIterator>::difference_type;
|
||||
|
||||
std::vector<Z> modes {};
|
||||
modes.reserve(16);
|
||||
Size max_counter {0};
|
||||
|
||||
while(first != last)
|
||||
{
|
||||
Size current_count {0};
|
||||
auto end_it {first};
|
||||
while(end_it != last && *end_it == *first)
|
||||
{
|
||||
++current_count;
|
||||
++end_it;
|
||||
}
|
||||
|
||||
if(current_count > max_counter)
|
||||
{
|
||||
modes.resize(1);
|
||||
modes[0] = *first;
|
||||
max_counter = current_count;
|
||||
}
|
||||
|
||||
else if(current_count == max_counter)
|
||||
{
|
||||
modes.emplace_back(*first);
|
||||
}
|
||||
|
||||
first = end_it;
|
||||
}
|
||||
|
||||
return std::move(modes.begin(), modes.end(), output);
|
||||
}
|
||||
|
||||
template<class Container, class OutputIterator>
|
||||
inline auto sorted_mode(Container & v, OutputIterator output) -> decltype(output)
|
||||
{
|
||||
return sorted_mode(v.begin(), v.end(), output);
|
||||
}
|
||||
|
||||
template<class RandomAccessIterator, class OutputIterator>
|
||||
auto mode(RandomAccessIterator first, RandomAccessIterator last, OutputIterator output) -> decltype(output)
|
||||
{
|
||||
std::sort(first, last);
|
||||
return sorted_mode(first, last, output);
|
||||
}
|
||||
|
||||
template<class RandomAccessContainer, class OutputIterator>
|
||||
inline auto mode(RandomAccessContainer & v, OutputIterator output) -> decltype(output)
|
||||
{
|
||||
return mode(v.begin(), v.end(), output);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
130
reporting/performance/test_mode.cpp
Normal file
130
reporting/performance/test_mode.cpp
Normal file
@@ -0,0 +1,130 @@
|
||||
// (C) Copyright Nick Thompson and Matt Borland 2020.
|
||||
// Use, modification and distribution are subject to the
|
||||
// Boost Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <random>
|
||||
#include <boost/math/statistics/univariate_statistics.hpp>
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
template <class Z>
|
||||
void test_mode(benchmark::State& state)
|
||||
{
|
||||
using boost::math::statistics::sorted_mode;
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937_64 mt(rd());
|
||||
std::uniform_int_distribution<> dist {1, 10};
|
||||
|
||||
auto gen = [&dist, &mt](){return dist(mt);};
|
||||
|
||||
std::vector<Z> v(state.range(0));
|
||||
std::generate(v.begin(), v.end(), gen);
|
||||
|
||||
for (auto _ : state)
|
||||
{
|
||||
std::vector<Z> modes;
|
||||
benchmark::DoNotOptimize(sorted_mode(v.begin(), v.end(), std::back_inserter(modes)));
|
||||
}
|
||||
|
||||
state.SetComplexityN(state.range(0));
|
||||
}
|
||||
|
||||
template <class Z>
|
||||
void sequential_test_mode(benchmark::State& state)
|
||||
{
|
||||
using boost::math::statistics::sorted_mode;
|
||||
|
||||
std::vector<Z> v(state.range(0));
|
||||
|
||||
size_t current_num {1};
|
||||
// produces {1, 2, 3, 4, 5...}
|
||||
for(size_t i {}; i < v.size(); ++i)
|
||||
{
|
||||
v[i] = current_num;
|
||||
++current_num;
|
||||
}
|
||||
|
||||
for (auto _ : state)
|
||||
{
|
||||
std::vector<Z> modes;
|
||||
benchmark::DoNotOptimize(sorted_mode(v, std::back_inserter(modes)));
|
||||
}
|
||||
|
||||
state.SetComplexityN(state.range(0));
|
||||
}
|
||||
|
||||
template <class Z>
|
||||
void sequential_pairs_test_mode(benchmark::State& state)
|
||||
{
|
||||
using boost::math::statistics::sorted_mode;
|
||||
|
||||
std::vector<Z> v(state.range(0));
|
||||
|
||||
size_t current_num {1};
|
||||
size_t current_num_counter {};
|
||||
// produces {1, 1, 2, 2, 3, 3, ...}
|
||||
for(size_t i {}; i < v.size(); ++i)
|
||||
{
|
||||
v[i] = current_num;
|
||||
++current_num_counter;
|
||||
if(current_num_counter > 2)
|
||||
{
|
||||
++current_num;
|
||||
current_num_counter = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto _ : state)
|
||||
{
|
||||
std::vector<Z> modes;
|
||||
benchmark::DoNotOptimize(sorted_mode(v, std::back_inserter(modes)));
|
||||
}
|
||||
|
||||
state.SetComplexityN(state.range(0));
|
||||
}
|
||||
|
||||
template <class Z>
|
||||
void sequential_multiple_test_mode(benchmark::State& state)
|
||||
{
|
||||
using boost::math::statistics::sorted_mode;
|
||||
|
||||
std::vector<Z> v(state.range(0));
|
||||
|
||||
size_t current_num {1};
|
||||
size_t current_num_counter {};
|
||||
// produces {1, 2, 2, 3, 3, 3, 4, 4, 4, 4, ...}
|
||||
for(size_t i {}; i < v.size(); ++i)
|
||||
{
|
||||
v[i] = current_num;
|
||||
++current_num_counter;
|
||||
if(current_num_counter > current_num)
|
||||
{
|
||||
++current_num;
|
||||
current_num_counter = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto _ : state)
|
||||
{
|
||||
std::vector<Z> modes;
|
||||
benchmark::DoNotOptimize(sorted_mode(v, std::back_inserter(modes)));
|
||||
}
|
||||
|
||||
state.SetComplexityN(state.range(0));
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(test_mode, int32_t)->RangeMultiplier(2)->Range(1<<1, 1<<22)->Complexity();
|
||||
BENCHMARK_TEMPLATE(test_mode, int64_t)->RangeMultiplier(2)->Range(1<<1, 1<<22)->Complexity();
|
||||
BENCHMARK_TEMPLATE(test_mode, uint32_t)->RangeMultiplier(2)->Range(1<<1, 1<<22)->Complexity();
|
||||
BENCHMARK_TEMPLATE(sequential_test_mode, int32_t)->RangeMultiplier(2)->Range(1<<1, 1<<22)->Complexity();
|
||||
BENCHMARK_TEMPLATE(sequential_test_mode, int64_t)->RangeMultiplier(2)->Range(1<<1, 1<<22)->Complexity();
|
||||
BENCHMARK_TEMPLATE(sequential_test_mode, uint32_t)->RangeMultiplier(2)->Range(1<<1, 1<<22)->Complexity();
|
||||
BENCHMARK_TEMPLATE(sequential_pairs_test_mode, int32_t)->RangeMultiplier(2)->Range(1<<1, 1<<22)->Complexity();
|
||||
BENCHMARK_TEMPLATE(sequential_pairs_test_mode, int64_t)->RangeMultiplier(2)->Range(1<<1, 1<<22)->Complexity();
|
||||
BENCHMARK_TEMPLATE(sequential_pairs_test_mode, uint32_t)->RangeMultiplier(2)->Range(1<<1, 1<<22)->Complexity();
|
||||
BENCHMARK_TEMPLATE(sequential_multiple_test_mode, int32_t)->RangeMultiplier(2)->Range(1<<1, 1<<22)->Complexity();
|
||||
BENCHMARK_TEMPLATE(sequential_multiple_test_mode, int64_t)->RangeMultiplier(2)->Range(1<<1, 1<<22)->Complexity();
|
||||
BENCHMARK_TEMPLATE(sequential_multiple_test_mode, uint32_t)->RangeMultiplier(2)->Range(1<<1, 1<<22)->Complexity();
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <list>
|
||||
#include <forward_list>
|
||||
#include <algorithm>
|
||||
#include <random>
|
||||
@@ -833,6 +834,69 @@ void test_interquartile_range()
|
||||
BOOST_TEST_EQ(iqr, 6);
|
||||
}
|
||||
|
||||
template<class Z>
|
||||
void test_mode()
|
||||
{
|
||||
std::vector<Z> modes;
|
||||
std::vector<Z> v {1, 2, 2, 3, 4, 5};
|
||||
const Z ref = 2;
|
||||
|
||||
// Does iterator call work?
|
||||
boost::math::statistics::mode(v.begin(), v.end(), std::back_inserter(modes));
|
||||
BOOST_TEST_EQ(ref, modes[0]);
|
||||
|
||||
// Does container call work?
|
||||
modes.clear();
|
||||
boost::math::statistics::mode(v, std::back_inserter(modes));
|
||||
BOOST_TEST_EQ(ref, modes[0]);
|
||||
|
||||
// Does it work with part of a vector?
|
||||
modes.clear();
|
||||
boost::math::statistics::mode(v.begin(), v.begin() + 3, std::back_inserter(modes));
|
||||
BOOST_TEST_EQ(ref, modes[0]);
|
||||
|
||||
// Does it work with const qualification? Only if pre-sorted
|
||||
modes.clear();
|
||||
boost::math::statistics::sorted_mode(v.cbegin(), v.cend(), std::back_inserter(modes));
|
||||
BOOST_TEST_EQ(ref, modes[0]);
|
||||
|
||||
// Does it work with std::array?
|
||||
modes.clear();
|
||||
std::array<Z, 6> u {1, 2, 2, 3, 4, 5};
|
||||
boost::math::statistics::mode(u, std::back_inserter(modes));
|
||||
BOOST_TEST_EQ(ref, modes[0]);
|
||||
|
||||
// Does it work with a bi-modal distribuition?
|
||||
modes.clear();
|
||||
std::vector<Z> w {1, 2, 2, 3, 3, 4, 5};
|
||||
boost::math::statistics::mode(w.begin(), w.end(), std::back_inserter(modes));
|
||||
BOOST_TEST_EQ(modes.size(), 2);
|
||||
|
||||
// Does it work with an empty vector?
|
||||
modes.clear();
|
||||
std::vector<Z> x {};
|
||||
boost::math::statistics::mode(x, std::back_inserter(modes));
|
||||
BOOST_TEST_EQ(modes.size(), 0);
|
||||
|
||||
// Does it work with a one item vector
|
||||
modes.clear();
|
||||
x.push_back(2);
|
||||
boost::math::statistics::mode(x, std::back_inserter(modes));
|
||||
BOOST_TEST_EQ(ref, modes[0]);
|
||||
|
||||
// Does it work with a doubly linked list
|
||||
modes.clear();
|
||||
std::list<Z> dl {1, 2, 2, 3, 4, 5};
|
||||
boost::math::statistics::sorted_mode(dl, std::back_inserter(modes));
|
||||
BOOST_TEST_EQ(ref, modes[0]);
|
||||
|
||||
// Does it work with a singly linked list
|
||||
modes.clear();
|
||||
std::forward_list<Z> fl {1, 2, 2, 3, 4, 5};
|
||||
boost::math::statistics::sorted_mode(fl, std::back_inserter(modes));
|
||||
BOOST_TEST_EQ(ref, modes[0]);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test_mean<float>();
|
||||
@@ -902,5 +966,11 @@ int main()
|
||||
|
||||
test_interquartile_range<double>();
|
||||
test_interquartile_range<cpp_bin_float_50>();
|
||||
|
||||
test_mode<int>();
|
||||
test_mode<int32_t>();
|
||||
test_mode<int64_t>();
|
||||
test_mode<uint32_t>();
|
||||
|
||||
return boost::report_errors();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user