added randon number generation speedups from Martin Leitner-Ankerl

This commit is contained in:
joaquintides
2023-07-25 16:14:22 +02:00
parent 19a2888cf0
commit 5f8027d30b
2 changed files with 77 additions and 5 deletions

View File

@@ -53,6 +53,7 @@ void resume_timing()
}
#include <boost/bind/bind.hpp>
#include <boost/core/detail/splitmix64.hpp>
#include <boost/unordered/concurrent_flat_map.hpp>
#include <iostream>
#include <latch>
@@ -185,6 +186,65 @@ private:
};
/* contributed by Martin Leinter-Ankerl */
template<size_t N>
class simple_discrete_distribution {
public:
/* N-1 because we don't store the last probability*/
std::array<uint64_t, N> m_cummulative{};
public:
simple_discrete_distribution(std::initializer_list<double> l)
{
std::array<double, N> sums{};
double sum=0.0;
auto i=0;
for(auto x:l){
sum+=x;
sums[i]=sum;
++i;
}
/* normalize to 2^64 */
for(int i=0;i<N;++i){
m_cummulative[i]=static_cast<uint64_t>(
sums[i]/sum*(double)(std::numeric_limits<uint64_t>::max)());
}
m_cummulative.back()=(std::numeric_limits<uint64_t>::max)();
}
std::size_t operator()(uint64_t r01)const noexcept
{
for(size_t i=0;i<m_cummulative.size();++i)
{
if(r01<=m_cummulative[i])return i;
}
return m_cummulative.size()-1;
}
template<typename URNG>
std::size_t operator()(URNG& rng)const noexcept
{
static_assert((URNG::min)()==0,"URNG::min must be 0");
static_assert(
(URNG::max)()==(std::numeric_limits<uint64_t>::max)(),
"URNG::max must be max of uint64_t");
return operator()(rng());
}
};
struct splitmix64_urng:boost::detail::splitmix64
{
using boost::detail::splitmix64::splitmix64;
using result_type=boost::uint64_t;
static constexpr result_type (min)(){return 0u;}
static constexpr result_type(max)()
{return (std::numeric_limits<result_type>::max)();}
};
template<typename Map>
struct parallel_load
{
@@ -213,8 +273,8 @@ struct parallel_load
finder<zipfian_int_distribution<int>>
>::type;
std::discrete_distribution<> dist({10,45,45});
std::mt19937_64 gen(std::size_t(282472+i*213731));
simple_discrete_distribution<3> dist({10,45,45});
splitmix64_urng gen(std::size_t(282472+i*213731));
updater update{zipf1};
finder_type successful_find{zipf1},

View File

@@ -61,15 +61,27 @@
*/
/*
* Joaquin M Lopez Munoz, May-Jun 2023:
* Joaquin M Lopez Munoz, May-Jul 2023:
* - Trivial changes to get rid of GCC specific functions and some warnings.
* - Cached values to speed up zipfian_int_distribution::operator().
* - Replaced std::generate_canonical with faster alternative (contributed
* by Martin Leitner-Ankerl from https://prng.di.unimi.it/).
*/
#include <cmath>
#include <limits>
#include <random>
#include <cassert>
#include <cstring>
double uniform01(uint64_t r) {
auto i = (UINT64_C(0x3ff) << 52U) | (r >> 12U);
// can't use union in c++ here for type puning, it's undefined behavior.
// std::memcpy is optimized anyways.
double d{};
std::memcpy(&d, &i, sizeof(double));
return d - 1.0;
}
template<typename _IntType = int>
class zipfian_int_distribution
@@ -226,8 +238,8 @@ public:
template<typename _UniformRandomNumberGenerator>
result_type operator()(_UniformRandomNumberGenerator& __urng, const param_type& __p)
{
double u = std::generate_canonical<double, std::numeric_limits<double>::digits, _UniformRandomNumberGenerator>(__urng);
double u = uniform01(__urng());
double uz = u * __p.zeta();
if(uz < 1.0) return __p.a();
if(uz < __p._1_plus_05_to_theta()) return __p.a() + 1;