Files
boost_unordered_benchmarks/non_unique_scattered_erasure_by_key.cpp

253 lines
6.0 KiB
C++

/* Measuring erasure times of unordered associative containers
* with duplicate elements.
*
* Copyright 2013-2022 Joaquin M Lopez Munoz.
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
#include <boost/core/detail/splitmix64.hpp>
#include <algorithm>
#include <array>
#include <chrono>
#include <numeric>
#include <string>
// fnv1a_hash
template<int Bits> struct fnv1a_hash_impl;
template<> struct fnv1a_hash_impl<32>
{
std::size_t operator()( std::string const& s ) const
{
std::size_t h = 0x811C9DC5u;
char const * first = s.data();
char const * last = first + s.size();
for( ; first != last; ++first )
{
h ^= static_cast<unsigned char>( *first );
h *= 0x01000193ul;
}
return h;
}
};
template<> struct fnv1a_hash_impl<64>
{
std::size_t operator()( std::string const& s ) const
{
std::size_t h = 0xCBF29CE484222325ull;
char const * first = s.data();
char const * last = first + s.size();
for( ; first != last; ++first )
{
h ^= static_cast<unsigned char>( *first );
h *= 0x00000100000001B3ull;
}
return h;
}
};
struct fnv1a_hash: fnv1a_hash_impl< std::numeric_limits<std::size_t>::digits > {};
std::chrono::high_resolution_clock::time_point measure_start,measure_pause;
template<typename F>
double measure(F f)
{
using namespace std::chrono;
static const int num_trials=10;
static const milliseconds min_time_per_trial(200);
std::array<double,num_trials> trials;
for(int i=0;i<num_trials;++i){
int runs=0;
high_resolution_clock::time_point t2;
measure_start=high_resolution_clock::now();
do{
f();
++runs;
t2=high_resolution_clock::now();
}while(t2-measure_start<min_time_per_trial);
trials[i]=duration_cast<duration<double>>(t2-measure_start).count()/runs;
}
std::sort(trials.begin(),trials.end());
return std::accumulate(
trials.begin()+2,trials.end()-2,0.0)/(trials.size()-4);
}
void pause_timing()
{
measure_pause=std::chrono::high_resolution_clock::now();
}
void resume_timing()
{
measure_start+=std::chrono::high_resolution_clock::now()-measure_pause;
}
#include <boost/bind/bind.hpp>
#include <iostream>
#include <random>
#include <vector>
struct rand_seq
{
rand_seq(unsigned int n,unsigned int G):mod(n/G),gen(34862){}
unsigned int operator()()
{
unsigned int m=dist(gen)%mod;
m^=0x9e3779b9+(m<<6)+(m>>2);
return m;
}
private:
unsigned int mod;
std::uniform_int_distribution<unsigned int> dist;
std::mt19937 gen;
};
static std::vector<std::string> words;
static std::string make_random_word( unsigned x )
{
char buffer[ 64 ];
std::snprintf( buffer, sizeof(buffer), "pfx_%0*d_%u_sfx", x % 8 + 1, 0, x );
return buffer;
}
static void init_words(unsigned n) {
words.clear();
words.reserve(n);
boost::detail::splitmix64 rng;
for (auto i = 0u; i < n; ++i) {
auto const word = make_random_word( rng() );
words.push_back( word );
}
}
template<typename Container>
struct scattered_erasure
{
typedef void result_type;
template<typename Vector>
void operator()(unsigned int n,float Fmax,unsigned int G,const Vector& vec)const
{
{
pause_timing();
Container s;
s.max_load_factor(Fmax);
unsigned int mod=n/G;
for (auto i = 0u; i < n; ++i) {
auto const& w = words[i%mod];
s.insert(w);
}
resume_timing();
for (const auto& w: vec) {
s.erase(w);
}
pause_timing();
}
resume_timing();
}
};
template<
template<typename> class Tester,
typename Container1,typename Container2,typename Container3>
void test(
const char* title,
const char* name1,const char* name2,const char* name3,
float Fmax,unsigned int G)
{
unsigned int n0=10000,n1=3000000,dn=500;
double fdn=1.05;
init_words(n1);
std::cout<<title<<", Fmax="<<Fmax<<", G="<<G<<":"<<std::endl;
std::cout<<name1<<";"<<name2<<";"<<name3<<std::endl;
for(unsigned int n=n0;n<=n1;n+=dn,dn=(unsigned int)(dn*fdn)){
double t;
unsigned int mod=n/G;
std::mt19937 gen(73642);
auto vec = std::vector<std::string>(words.begin(), words.begin() + mod);
std::shuffle(vec.begin(), vec.end(), gen);
t=measure(boost::bind(Tester<Container1>(),n,Fmax,G,boost::cref(vec)));
std::cout<<n<<";"<<(t/n)*10E6;
t=measure(boost::bind(Tester<Container2>(),n,Fmax,G,boost::cref(vec)));
std::cout<<";"<<(t/n)*10E6;
t=measure(boost::bind(Tester<Container3>(),n,Fmax,G,boost::cref(vec)));
std::cout<<";"<<(t/n)*10E6<<std::endl;
}
}
#include <boost/unordered_set.hpp>
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/identity.hpp>
#include <unordered_set>
int main()
{
using namespace boost::multi_index;
/* some stdlibs provide the discussed but finally rejected std::identity */
using boost::multi_index::identity;
typedef std::unordered_multiset<std::string, std::hash<std::string>> container_t1;
typedef boost::unordered_multiset<std::string, std::hash<std::string>> container_t2;
typedef boost::multi_index_container<
std::string,
indexed_by<
hashed_non_unique<identity<std::string>, std::hash<std::string> >
>
> container_t3;
test<
scattered_erasure,
container_t1,
container_t2,
container_t3>
(
"Scattered erasure",
"std::unordered_multiset",
"boost::unordered_multiset",
"multi_index::hashed_non_unique",
1.0,5
);
test<
scattered_erasure,
container_t1,
container_t2,
container_t3>
(
"Scattered erasure",
"std::unordered_multiset",
"boost::unordered_multiset",
"multi_index::hashed_non_unique",
5.0,5
);
}