Merge pull request #586 from mborland/gini

Fix for issue #585
2026-01-19 04:22:09 +00:00 · 2021-04-06 18:16:23 +01:00
parent eb1686cb0b 1924e3179c
commit 858e8a66e9
3 changed files with 94 additions and 52 deletions
--- a/include/boost/math/statistics/detail/single_pass.hpp
+++ b/include/boost/math/statistics/detail/single_pass.hpp
@@ -232,30 +232,6 @@ ReturnType skewness_sequential_impl(ForwardIterator first, ForwardIterator last)
    return skew;
 }

-template<typename ReturnType, typename ExecutionPolicy, typename RandomAccessIterator>
-ReturnType gini_coefficient_parallel_impl(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last)
-{
-    using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
-    
-    ReturnType i = 1;
-    ReturnType num = 0;
-    ReturnType denom = 0;
-    
-    std::for_each(exec, first, last, [&i, &num, &denom](const Real& val)
-    {
-        num = num + val * i;
-        denom = denom + val;
-        i = i + 1;
-    });
-
-    if(denom == 0)
-    {
-        return ReturnType(0);
-    }
-
-    return ((2*num)/denom - i)/(i-1);
-}
-
 template<typename ReturnType, typename ForwardIterator>
 ReturnType gini_coefficient_sequential_impl(ForwardIterator first, ForwardIterator last)
 {
@@ -281,6 +257,98 @@ ReturnType gini_coefficient_sequential_impl(ForwardIterator first, ForwardIterat
    }
 }

+template<typename ReturnType, typename ForwardIterator>
+ReturnType gini_range_fraction(ForwardIterator first, ForwardIterator last, std::size_t starting_index)
+{
+    using Real = typename std::tuple_element<0, ReturnType>::type;
+
+    std::size_t i = starting_index + 1;
+    Real num = 0;
+    Real denom = 0;
+
+    for(auto it = first; it != last; ++it)
+    {
+        num += *it*i;
+        denom += *it;
+        ++i;
+    }
+
+    return std::make_tuple(num, denom, i);
+}
+
+template<typename ReturnType, typename ExecutionPolicy, typename ForwardIterator>
+ReturnType gini_coefficient_parallel_impl(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last)
+{
+    using range_tuple = std::tuple<ReturnType, ReturnType, std::size_t>;
+    
+    const auto elements = std::distance(first, last);
+    const unsigned max_concurrency = std::thread::hardware_concurrency() == 0 ? 2u : std::thread::hardware_concurrency();
+    unsigned num_threads = 2u;
+    
+    // Threading is faster for: 10 + 10.12e-3 N/j <= 10.12e-3N => N >= 10^4j/10.12(j-1).
+    const auto parallel_lower_bound = 10e4*max_concurrency/(10.12*(max_concurrency-1));
+    const auto parallel_upper_bound = 10e4*2/10.12; // j = 2
+
+    // https://lemire.me/blog/2020/01/30/cost-of-a-thread-in-c-under-linux/
+    if(elements < parallel_lower_bound)
+    {
+        return gini_coefficient_sequential_impl<ReturnType>(first, last);
+    }
+    else if(elements >= parallel_upper_bound)
+    {
+        num_threads = max_concurrency;
+    }
+    else
+    {
+        for(unsigned i = 3; i < max_concurrency; ++i)
+        {
+            if(parallel_lower_bound < 10e4*i/(10.12*(i-1)))
+            {
+                num_threads = i;
+                break;
+            }
+        }
+    }
+
+    std::vector<std::future<range_tuple>> future_manager;
+    const auto elements_per_thread = std::ceil(static_cast<double>(elements) / num_threads);
+
+    auto it = first;
+    for(std::size_t i {}; i < num_threads - 1; ++i)
+    {
+        future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [it, elements_per_thread, i]() -> range_tuple
+        {
+            return gini_range_fraction<range_tuple>(it, std::next(it, elements_per_thread), i*elements_per_thread);
+        }));
+        it = std::next(it, elements_per_thread);
+    }
+
+    future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [it, last, num_threads, elements_per_thread]() -> range_tuple
+    {
+        return gini_range_fraction<range_tuple>(it, last, (num_threads - 1)*elements_per_thread);
+    }));
+
+    ReturnType num = 0;
+    ReturnType denom = 0;
+
+    for(std::size_t i = 0; i < future_manager.size(); ++i)
+    {
+        auto temp = future_manager[i].get();
+        num += std::get<0>(temp);
+        denom += std::get<1>(temp);
+    }
+
+    // If the l1 norm is zero, all elements are zero, so every element is the same.
+    if(denom == 0)
+    {
+        return ReturnType(0);
+    }
+    else
+    {
+        return ((2*num)/denom - elements)/(elements-1);
+    }
+}
+
 template<typename ForwardIterator, typename OutputIterator>
 OutputIterator mode_impl(ForwardIterator first, ForwardIterator last, OutputIterator output)
 {
--- a/include/boost/math/statistics/univariate_statistics.hpp
+++ b/include/boost/math/statistics/univariate_statistics.hpp
@@ -413,12 +413,6 @@ inline auto median(RandomAccessContainer & v)
    return median(std::execution::seq, std::begin(v), std::end(v));
 }

-#if 0
-//
-// Parallel gini calculation is curently broken, see:
-// https://github.com/boostorg/math/issues/585
-// We will fix this at a later date, for now just use a serial implementation:
-//
 template<class ExecutionPolicy, class RandomAccessIterator>
 inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last)
 {
@@ -451,27 +445,6 @@ inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessIterator first,
        return detail::gini_coefficient_parallel_impl<Real>(exec, first, last);
    }
 }
-#else
-template<class ExecutionPolicy, class RandomAccessIterator>
-inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last)
-{
-   using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
-
-   if (!std::is_sorted(exec, first, last))
-   {
-      std::sort(exec, first, last);
-   }
-
-   if constexpr (std::is_integral_v<Real>)
-   {
-      return detail::gini_coefficient_sequential_impl<double>(first, last);
-   }
-   else
-   {
-      return detail::gini_coefficient_sequential_impl<Real>(first, last);
-   }
-}
-#endif

 template<class ExecutionPolicy, class RandomAccessContainer>
 inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessContainer & v)
--- a/reporting/performance/univariate_statistics_performance.cpp
+++ b/reporting/performance/univariate_statistics_performance.cpp
@@ -8,6 +8,7 @@
 #include <boost/multiprecision/cpp_bin_float.hpp>
 #include <boost/math/statistics/univariate_statistics.hpp>
 #include <boost/math/tools/assert.hpp>
+#include <boost/math/tools/complex.hpp>
 #include <benchmark/benchmark.h>
 #include <vector>
 #include <algorithm>
@@ -47,7 +48,7 @@ std::vector<T> generate_random_vector(std::size_t size, std::size_t seed)
        }
        return v;
    }
-    else if constexpr (boost::is_complex<T>::value)
+    else if constexpr (boost::math::tools::is_complex_type<T>::value)
    {
        std::normal_distribution<typename T::value_type> dis(0, 1);
        for (size_t i = 0; i < v.size(); ++i)