added randon number generation speedups from Martin Leitner-Ankerl

2026-01-19 04:02:13 +00:00 · 2023-07-25 16:14:22 +02:00
parent 19a2888cf0
commit 5f8027d30b
2 changed files with 77 additions and 5 deletions
--- a/parallel_load.cpp
+++ b/parallel_load.cpp
@@ -53,6 +53,7 @@ void resume_timing()
 }

 #include <boost/bind/bind.hpp>
+#include <boost/core/detail/splitmix64.hpp>
 #include <boost/unordered/concurrent_flat_map.hpp>
 #include <iostream>
 #include <latch>
@@ -185,6 +186,65 @@ private:

 };

+/* contributed by Martin Leinter-Ankerl */
+
+template<size_t N>
+class simple_discrete_distribution {
+public:
+  /* N-1 because we don't store the last probability*/
+  std::array<uint64_t, N> m_cummulative{};
+
+public:
+  simple_discrete_distribution(std::initializer_list<double> l)
+  {
+    std::array<double, N> sums{};
+    double                sum=0.0;
+
+    auto i=0;
+    for(auto x:l){
+      sum+=x;
+      sums[i]=sum;
+      ++i;
+    }
+
+    /* normalize to 2^64 */
+    for(int i=0;i<N;++i){
+      m_cummulative[i]=static_cast<uint64_t>(
+        sums[i]/sum*(double)(std::numeric_limits<uint64_t>::max)());
+    }
+    m_cummulative.back()=(std::numeric_limits<uint64_t>::max)();
+  }
+
+  std::size_t operator()(uint64_t r01)const noexcept
+  {
+    for(size_t i=0;i<m_cummulative.size();++i)
+    {
+      if(r01<=m_cummulative[i])return i;
+    }
+    return m_cummulative.size()-1;
+  }
+
+  template<typename URNG>
+  std::size_t operator()(URNG& rng)const noexcept
+  {
+    static_assert((URNG::min)()==0,"URNG::min must be 0");
+    static_assert(
+      (URNG::max)()==(std::numeric_limits<uint64_t>::max)(),
+      "URNG::max must be max of uint64_t");
+    return operator()(rng());
+  }
+};
+
+struct splitmix64_urng:boost::detail::splitmix64
+{
+  using boost::detail::splitmix64::splitmix64;
+  using result_type=boost::uint64_t;
+
+  static constexpr result_type (min)(){return 0u;}
+  static constexpr result_type(max)()
+  {return (std::numeric_limits<result_type>::max)();}
+};
+
 template<typename Map>
 struct parallel_load
 {
@@ -213,8 +273,8 @@ struct parallel_load
          finder<zipfian_int_distribution<int>>
        >::type;

-        std::discrete_distribution<>  dist({10,45,45});
-        std::mt19937_64               gen(std::size_t(282472+i*213731));
+        simple_discrete_distribution<3> dist({10,45,45});
+        splitmix64_urng                 gen(std::size_t(282472+i*213731));

        updater     update{zipf1};
        finder_type successful_find{zipf1},
--- a/zipfian_int_distribution.h
+++ b/zipfian_int_distribution.h
@@ -61,15 +61,27 @@
 */

 /* 
- * Joaquin M Lopez Munoz, May-Jun 2023:
+ * Joaquin M Lopez Munoz, May-Jul 2023:
 *   - Trivial changes to get rid of GCC specific functions and some warnings.
 *   - Cached values to speed up zipfian_int_distribution::operator().
+ *   - Replaced std::generate_canonical with faster alternative (contributed
+ *     by Martin Leitner-Ankerl from https://prng.di.unimi.it/).
 */

 #include <cmath>
 #include <limits>
 #include <random>
 #include <cassert>
+#include <cstring>
+
+double uniform01(uint64_t r) {
+  auto i = (UINT64_C(0x3ff) << 52U) | (r >> 12U);
+  // can't use union in c++ here for type puning, it's undefined behavior.
+  // std::memcpy is optimized anyways.
+  double d{};
+  std::memcpy(&d, &i, sizeof(double));
+  return d - 1.0;    
+}

 template<typename _IntType = int>
 class zipfian_int_distribution
@@ -226,8 +238,8 @@ public:
  template<typename _UniformRandomNumberGenerator>
  result_type operator()(_UniformRandomNumberGenerator& __urng, const param_type& __p)
  {
-    double u = std::generate_canonical<double, std::numeric_limits<double>::digits, _UniformRandomNumberGenerator>(__urng);
-      
+    double u = uniform01(__urng());
+
    double uz = u * __p.zeta();
    if(uz < 1.0) return __p.a();
    if(uz < __p._1_plus_05_to_theta()) return __p.a() + 1;