From ad9731ffb2088410e4082d625a7a8b192e5692b5 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 28 Mar 2021 16:41:36 +0300 Subject: [PATCH 1/3] Fix for issue #585 --- .../math/statistics/detail/single_pass.hpp | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/include/boost/math/statistics/detail/single_pass.hpp b/include/boost/math/statistics/detail/single_pass.hpp index 18fa44463..23c36ce70 100644 --- a/include/boost/math/statistics/detail/single_pass.hpp +++ b/include/boost/math/statistics/detail/single_pass.hpp @@ -21,6 +21,11 @@ #include #include +#if !defined(BOOST_NO_CXX17_HDR_EXECUTION) && !defined(_MSC_VER) +#include +#include +#endif + namespace boost { namespace math { namespace statistics { namespace detail { template @@ -241,12 +246,35 @@ ReturnType gini_coefficient_parallel_impl(ExecutionPolicy&& exec, RandomAccessIt ReturnType num = 0; ReturnType denom = 0; + #ifndef _MSC_VER std::for_each(exec, first, last, [&i, &num, &denom](const Real& val) { num = num + val * i; denom = denom + val; i = i + 1; }); + #elif !defined(BOOST_NO_CXX17_HDR_EXECUTION) + if constexpr (std::is_same_v, decltype(std::execution::par)>) + { + std::mutex m; + std::for_each(exec, first, last, [&i, &num, &denom](const Real& val) + { + std::lock_guard guard(m); + num = num + val * i; + denom = denom + val; + i = i + 1; + }); + } + else // // lock_guard constructor not allowed with std::execution::par_unseq (vectorization-unsafe) + { + std::for_each(exec, first, last, [&i, &num, &denom](const Real& val) + { + num = num + val * i; + denom = denom + val; + i = i + 1; + }); + } + #endif if(denom == 0) { From 2e0897a6b87989e0a8534c86c92500790a4f46af Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Sun, 28 Mar 2021 16:49:29 +0300 Subject: [PATCH 2/3] Fix typo --- include/boost/math/statistics/detail/single_pass.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/math/statistics/detail/single_pass.hpp b/include/boost/math/statistics/detail/single_pass.hpp index 23c36ce70..b058f9423 100644 --- a/include/boost/math/statistics/detail/single_pass.hpp +++ b/include/boost/math/statistics/detail/single_pass.hpp @@ -21,7 +21,7 @@ #include #include -#if !defined(BOOST_NO_CXX17_HDR_EXECUTION) && !defined(_MSC_VER) +#if !defined(BOOST_NO_CXX17_HDR_EXECUTION) && defined(_MSC_VER) #include #include #endif From 1924e3179c24ac4d73181587d752774ad72c0ccb Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 31 Mar 2021 22:04:35 +0300 Subject: [PATCH 3/3] Complete redo of parallel gini calculation --- .../math/statistics/detail/single_pass.hpp | 144 +++++++++++------- .../math/statistics/univariate_statistics.hpp | 27 ---- .../univariate_statistics_performance.cpp | 3 +- 3 files changed, 94 insertions(+), 80 deletions(-) diff --git a/include/boost/math/statistics/detail/single_pass.hpp b/include/boost/math/statistics/detail/single_pass.hpp index b058f9423..b705b0ea5 100644 --- a/include/boost/math/statistics/detail/single_pass.hpp +++ b/include/boost/math/statistics/detail/single_pass.hpp @@ -21,11 +21,6 @@ #include #include -#if !defined(BOOST_NO_CXX17_HDR_EXECUTION) && defined(_MSC_VER) -#include -#include -#endif - namespace boost { namespace math { namespace statistics { namespace detail { template @@ -237,53 +232,6 @@ ReturnType skewness_sequential_impl(ForwardIterator first, ForwardIterator last) return skew; } -template -ReturnType gini_coefficient_parallel_impl(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) -{ - using Real = typename std::iterator_traits::value_type; - - ReturnType i = 1; - ReturnType num = 0; - ReturnType denom = 0; - - #ifndef _MSC_VER - std::for_each(exec, first, last, [&i, &num, &denom](const Real& val) - { - num = num + val * i; - denom = denom + val; - i = i + 1; - }); - #elif !defined(BOOST_NO_CXX17_HDR_EXECUTION) - if constexpr (std::is_same_v, decltype(std::execution::par)>) - { - std::mutex m; - std::for_each(exec, first, last, [&i, &num, &denom](const Real& val) - { - std::lock_guard guard(m); - num = num + val * i; - denom = denom + val; - i = i + 1; - }); - } - else // // lock_guard constructor not allowed with std::execution::par_unseq (vectorization-unsafe) - { - std::for_each(exec, first, last, [&i, &num, &denom](const Real& val) - { - num = num + val * i; - denom = denom + val; - i = i + 1; - }); - } - #endif - - if(denom == 0) - { - return ReturnType(0); - } - - return ((2*num)/denom - i)/(i-1); -} - template ReturnType gini_coefficient_sequential_impl(ForwardIterator first, ForwardIterator last) { @@ -309,6 +257,98 @@ ReturnType gini_coefficient_sequential_impl(ForwardIterator first, ForwardIterat } } +template +ReturnType gini_range_fraction(ForwardIterator first, ForwardIterator last, std::size_t starting_index) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + + std::size_t i = starting_index + 1; + Real num = 0; + Real denom = 0; + + for(auto it = first; it != last; ++it) + { + num += *it*i; + denom += *it; + ++i; + } + + return std::make_tuple(num, denom, i); +} + +template +ReturnType gini_coefficient_parallel_impl(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) +{ + using range_tuple = std::tuple; + + const auto elements = std::distance(first, last); + const unsigned max_concurrency = std::thread::hardware_concurrency() == 0 ? 2u : std::thread::hardware_concurrency(); + unsigned num_threads = 2u; + + // Threading is faster for: 10 + 10.12e-3 N/j <= 10.12e-3N => N >= 10^4j/10.12(j-1). + const auto parallel_lower_bound = 10e4*max_concurrency/(10.12*(max_concurrency-1)); + const auto parallel_upper_bound = 10e4*2/10.12; // j = 2 + + // https://lemire.me/blog/2020/01/30/cost-of-a-thread-in-c-under-linux/ + if(elements < parallel_lower_bound) + { + return gini_coefficient_sequential_impl(first, last); + } + else if(elements >= parallel_upper_bound) + { + num_threads = max_concurrency; + } + else + { + for(unsigned i = 3; i < max_concurrency; ++i) + { + if(parallel_lower_bound < 10e4*i/(10.12*(i-1))) + { + num_threads = i; + break; + } + } + } + + std::vector> future_manager; + const auto elements_per_thread = std::ceil(static_cast(elements) / num_threads); + + auto it = first; + for(std::size_t i {}; i < num_threads - 1; ++i) + { + future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [it, elements_per_thread, i]() -> range_tuple + { + return gini_range_fraction(it, std::next(it, elements_per_thread), i*elements_per_thread); + })); + it = std::next(it, elements_per_thread); + } + + future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [it, last, num_threads, elements_per_thread]() -> range_tuple + { + return gini_range_fraction(it, last, (num_threads - 1)*elements_per_thread); + })); + + ReturnType num = 0; + ReturnType denom = 0; + + for(std::size_t i = 0; i < future_manager.size(); ++i) + { + auto temp = future_manager[i].get(); + num += std::get<0>(temp); + denom += std::get<1>(temp); + } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if(denom == 0) + { + return ReturnType(0); + } + else + { + return ((2*num)/denom - elements)/(elements-1); + } +} + template OutputIterator mode_impl(ForwardIterator first, ForwardIterator last, OutputIterator output) { diff --git a/include/boost/math/statistics/univariate_statistics.hpp b/include/boost/math/statistics/univariate_statistics.hpp index 17e88bc6f..08587a3b3 100644 --- a/include/boost/math/statistics/univariate_statistics.hpp +++ b/include/boost/math/statistics/univariate_statistics.hpp @@ -413,12 +413,6 @@ inline auto median(RandomAccessContainer & v) return median(std::execution::seq, std::begin(v), std::end(v)); } -#if 0 -// -// Parallel gini calculation is curently broken, see: -// https://github.com/boostorg/math/issues/585 -// We will fix this at a later date, for now just use a serial implementation: -// template inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) { @@ -451,27 +445,6 @@ inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessIterator first, return detail::gini_coefficient_parallel_impl(exec, first, last); } } -#else -template -inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) -{ - using Real = typename std::iterator_traits::value_type; - - if (!std::is_sorted(exec, first, last)) - { - std::sort(exec, first, last); - } - - if constexpr (std::is_integral_v) - { - return detail::gini_coefficient_sequential_impl(first, last); - } - else - { - return detail::gini_coefficient_sequential_impl(first, last); - } -} -#endif template inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessContainer & v) diff --git a/reporting/performance/univariate_statistics_performance.cpp b/reporting/performance/univariate_statistics_performance.cpp index 2e93c84eb..1f587380c 100644 --- a/reporting/performance/univariate_statistics_performance.cpp +++ b/reporting/performance/univariate_statistics_performance.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -47,7 +48,7 @@ std::vector generate_random_vector(std::size_t size, std::size_t seed) } return v; } - else if constexpr (boost::is_complex::value) + else if constexpr (boost::math::tools::is_complex_type::value) { std::normal_distribution dis(0, 1); for (size_t i = 0; i < v.size(); ++i)