diff --git a/include/boost/math/statistics/detail/single_pass.hpp b/include/boost/math/statistics/detail/single_pass.hpp index 18fa44463..b705b0ea5 100644 --- a/include/boost/math/statistics/detail/single_pass.hpp +++ b/include/boost/math/statistics/detail/single_pass.hpp @@ -232,30 +232,6 @@ ReturnType skewness_sequential_impl(ForwardIterator first, ForwardIterator last) return skew; } -template -ReturnType gini_coefficient_parallel_impl(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) -{ - using Real = typename std::iterator_traits::value_type; - - ReturnType i = 1; - ReturnType num = 0; - ReturnType denom = 0; - - std::for_each(exec, first, last, [&i, &num, &denom](const Real& val) - { - num = num + val * i; - denom = denom + val; - i = i + 1; - }); - - if(denom == 0) - { - return ReturnType(0); - } - - return ((2*num)/denom - i)/(i-1); -} - template ReturnType gini_coefficient_sequential_impl(ForwardIterator first, ForwardIterator last) { @@ -281,6 +257,98 @@ ReturnType gini_coefficient_sequential_impl(ForwardIterator first, ForwardIterat } } +template +ReturnType gini_range_fraction(ForwardIterator first, ForwardIterator last, std::size_t starting_index) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + + std::size_t i = starting_index + 1; + Real num = 0; + Real denom = 0; + + for(auto it = first; it != last; ++it) + { + num += *it*i; + denom += *it; + ++i; + } + + return std::make_tuple(num, denom, i); +} + +template +ReturnType gini_coefficient_parallel_impl(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) +{ + using range_tuple = std::tuple; + + const auto elements = std::distance(first, last); + const unsigned max_concurrency = std::thread::hardware_concurrency() == 0 ? 2u : std::thread::hardware_concurrency(); + unsigned num_threads = 2u; + + // Threading is faster for: 10 + 10.12e-3 N/j <= 10.12e-3N => N >= 10^4j/10.12(j-1). + const auto parallel_lower_bound = 10e4*max_concurrency/(10.12*(max_concurrency-1)); + const auto parallel_upper_bound = 10e4*2/10.12; // j = 2 + + // https://lemire.me/blog/2020/01/30/cost-of-a-thread-in-c-under-linux/ + if(elements < parallel_lower_bound) + { + return gini_coefficient_sequential_impl(first, last); + } + else if(elements >= parallel_upper_bound) + { + num_threads = max_concurrency; + } + else + { + for(unsigned i = 3; i < max_concurrency; ++i) + { + if(parallel_lower_bound < 10e4*i/(10.12*(i-1))) + { + num_threads = i; + break; + } + } + } + + std::vector> future_manager; + const auto elements_per_thread = std::ceil(static_cast(elements) / num_threads); + + auto it = first; + for(std::size_t i {}; i < num_threads - 1; ++i) + { + future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [it, elements_per_thread, i]() -> range_tuple + { + return gini_range_fraction(it, std::next(it, elements_per_thread), i*elements_per_thread); + })); + it = std::next(it, elements_per_thread); + } + + future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [it, last, num_threads, elements_per_thread]() -> range_tuple + { + return gini_range_fraction(it, last, (num_threads - 1)*elements_per_thread); + })); + + ReturnType num = 0; + ReturnType denom = 0; + + for(std::size_t i = 0; i < future_manager.size(); ++i) + { + auto temp = future_manager[i].get(); + num += std::get<0>(temp); + denom += std::get<1>(temp); + } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if(denom == 0) + { + return ReturnType(0); + } + else + { + return ((2*num)/denom - elements)/(elements-1); + } +} + template OutputIterator mode_impl(ForwardIterator first, ForwardIterator last, OutputIterator output) { diff --git a/include/boost/math/statistics/univariate_statistics.hpp b/include/boost/math/statistics/univariate_statistics.hpp index 17e88bc6f..08587a3b3 100644 --- a/include/boost/math/statistics/univariate_statistics.hpp +++ b/include/boost/math/statistics/univariate_statistics.hpp @@ -413,12 +413,6 @@ inline auto median(RandomAccessContainer & v) return median(std::execution::seq, std::begin(v), std::end(v)); } -#if 0 -// -// Parallel gini calculation is curently broken, see: -// https://github.com/boostorg/math/issues/585 -// We will fix this at a later date, for now just use a serial implementation: -// template inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) { @@ -451,27 +445,6 @@ inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessIterator first, return detail::gini_coefficient_parallel_impl(exec, first, last); } } -#else -template -inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) -{ - using Real = typename std::iterator_traits::value_type; - - if (!std::is_sorted(exec, first, last)) - { - std::sort(exec, first, last); - } - - if constexpr (std::is_integral_v) - { - return detail::gini_coefficient_sequential_impl(first, last); - } - else - { - return detail::gini_coefficient_sequential_impl(first, last); - } -} -#endif template inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessContainer & v) diff --git a/reporting/performance/univariate_statistics_performance.cpp b/reporting/performance/univariate_statistics_performance.cpp index 2e93c84eb..1f587380c 100644 --- a/reporting/performance/univariate_statistics_performance.cpp +++ b/reporting/performance/univariate_statistics_performance.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -47,7 +48,7 @@ std::vector generate_random_vector(std::size_t size, std::size_t seed) } return v; } - else if constexpr (boost::is_complex::value) + else if constexpr (boost::math::tools::is_complex_type::value) { std::normal_distribution dis(0, 1); for (size_t i = 0; i < v.size(); ++i)