// Copyright 2021, 2022 Peter Dimov. // Copyright 2022-2023 Joaquin M Lopez Munoz. // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt #define _SILENCE_CXX17_OLD_ALLOCATOR_MEMBERS_DEPRECATION_WARNING #define _SILENCE_CXX20_CISO646_REMOVED_WARNING #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "rw_spinlock.hpp" #include "cfoa.hpp" #include "cuckoohash_map.hh" #include "oneapi/tbb/concurrent_hash_map.h" #include "oneapi/tbb/spin_rw_mutex.h" #include "gtl/phmap.hpp" #ifdef NUM_THREADS int const Th = NUM_THREADS; #else int const Th = 16; #endif int const Sh = 512; // number of shards using namespace std::chrono_literals; static void print_time( std::chrono::steady_clock::time_point & t1, char const* label, std::size_t s, std::size_t size ) { auto t2 = std::chrono::steady_clock::now(); std::cout << label << ": " << ( t2 - t1 ) / 1ms << " ms (s=" << s << ", size=" << size << ")\n"; t1 = t2; } static std::vector words; static void init_words() { #if SIZE_MAX > UINT32_MAX char const* fn = "enwik9"; // http://mattmahoney.net/dc/textdata #else char const* fn = "enwik8"; // ditto #endif auto t1 = std::chrono::steady_clock::now(); std::ifstream is( fn ); std::string in( std::istreambuf_iterator( is ), std::istreambuf_iterator{} ); boost::regex re( "[a-zA-Z]+"); boost::sregex_token_iterator it( in.begin(), in.end(), re, 0 ), end; words.assign( it, end ); auto t2 = std::chrono::steady_clock::now(); std::cout << fn << ": " << words.size() << " words, " << ( t2 - t1 ) / 1ms << " ms\n\n"; } // template struct map_policy { using key_type = Key; using raw_key_type = typename std::remove_const::type; using raw_mapped_type = typename std::remove_const::type; using init_type = std::pair; using moved_type = std::pair; using value_type = std::pair; using element_type = value_type; static value_type& value_from(element_type& x) { return x; } template static raw_key_type const& extract(std::pair const& kv) { return kv.first; } static moved_type move(init_type& x) { return {std::move(x.first), std::move(x.second)}; } static moved_type move(element_type& x) { // TODO: we probably need to launder here return {std::move(const_cast(x.first)), std::move(const_cast(x.second))}; } template static void construct(A& al, init_type* p, Args&&... args) { boost::allocator_construct(al, p, std::forward(args)...); } template static void construct(A& al, value_type* p, Args&&... args) { boost::allocator_construct(al, p, std::forward(args)...); } template static void destroy(A& al, init_type* p) noexcept { boost::allocator_destroy(al, p); } template static void destroy(A& al, value_type* p) noexcept { boost::allocator_destroy(al, p); } }; // map types using ufm_map_type = boost::unordered_flat_map; using cfoa_map_type = boost::unordered::detail::cfoa::table, boost::hash, std::equal_to, std::allocator>>; using cfoa_tbb_map_type = boost::unordered::detail::cfoa::table, boost::hash, std::equal_to, std::allocator>, tbb::spin_rw_mutex>; using cfoa_shm_map_type = boost::unordered::detail::cfoa::table, boost::hash, std::equal_to, std::allocator>, std::shared_mutex>; using fct_map_type = boost::unordered::detail::foa::concurrent_table, boost::hash, std::equal_to, std::allocator>>; using concurrent_flat_map_type = boost::unordered::concurrent_flat_map; using cuckoo_map_type = libcuckoo::cuckoohash_map, std::equal_to, std::allocator>>; struct tbb_hash_compare { std::size_t hash( std::string_view const& x ) const { return boost::hash()( x ); } bool equal( std::string_view const& x, std::string_view const& y ) const { return x == y; } }; using tbb_map_type = tbb::concurrent_hash_map; template using gtl_map_type = gtl::parallel_flat_hash_map, std::equal_to, std::allocator>, 9, Mutex>; // map operations inline void increment_element( ufm_map_type& map, std::string_view key ) { ++map[ key ]; } inline bool contains_element( ufm_map_type const& map, std::string_view key ) { return map.contains( key ); } inline void increment_element( cfoa_map_type& map, std::string_view key ) { map.try_emplace( []( auto& x, bool ){ ++x.second; }, key, 0 ); } inline bool contains_element( cfoa_map_type const& map, std::string_view key ) { return map.find( key, [&]( auto& ){} ); } inline void increment_element( cfoa_tbb_map_type& map, std::string_view key ) { map.try_emplace( []( auto& x, bool ){ ++x.second; }, key, 0 ); } inline bool contains_element( cfoa_tbb_map_type const& map, std::string_view key ) { return map.find( key, [&]( auto& ){} ); } inline void increment_element( cfoa_shm_map_type& map, std::string_view key ) { map.try_emplace( []( auto& x, bool ){ ++x.second; }, key, 0 ); } inline bool contains_element( cfoa_shm_map_type const& map, std::string_view key ) { return map.find( key, [&]( auto& ){} ); } inline void increment_element( fct_map_type& map, std::string_view key ) { map.try_emplace_or_visit( key, 0, []( auto& x ){ ++x.second; } ); } inline bool contains_element( fct_map_type const& map, std::string_view key ) { return map.contains( key ); } inline void increment_element( concurrent_flat_map_type& map, std::string_view key ) { map.try_emplace_or_visit( key, 0, []( auto& x ){ ++x.second; } ); } inline bool contains_element( concurrent_flat_map_type const& map, std::string_view key ) { return map.contains( key ); } inline void increment_element( cuckoo_map_type& map, std::string_view key ) { map.uprase_fn( key, []( auto& x){ ++x; return false; }, 0 ); } inline bool contains_element( cuckoo_map_type const& map, std::string_view key ) { return map.contains( key ); } inline void increment_element( tbb_map_type& map, std::string_view key ) { tbb_map_type::accessor acc; map.emplace( acc, key, 0 ); ++acc->second; } inline bool contains_element( tbb_map_type const& map, std::string_view key ) { return map.count( key ) != 0; } template inline void increment_element( gtl_map_type& map, std::string_view key ) { map.lazy_emplace_l( key, []( auto& x ){ ++x.second; }, [&]( auto const& ctor ){ ctor(key, 0); }); } template inline bool contains_element( gtl_map_type const& map, std::string_view key ) { return map.contains( key ); } // struct null_mutex { void lock() {} void unlock() {} void lock_shared() {} void unlock_shared() {} }; template class shared_lock { private: Mutex& mx_; public: shared_lock( Mutex& mx ): mx_( mx ) { mx_.lock_shared(); } ~shared_lock() { mx_.unlock_shared(); } }; template<> class shared_lock { private: using Mutex = std::mutex; Mutex& mx_; public: shared_lock( Mutex& mx ): mx_( mx ) { mx_.lock(); } ~shared_lock() { mx_.unlock(); } }; // template struct single_threaded { Map map; Mutex mtx; BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) { std::size_t s = 0; for( auto const& word: words ) { std::lock_guard lock( mtx ); increment_element( map, word ); ++s; } print_time( t1, "Word count", s, map.size() ); std::cout << std::endl; } BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) { std::size_t s = 0; for( auto const& word: words ) { std::string_view w2( word ); w2.remove_prefix( 1 ); ::shared_lock lock( mtx ); s += contains_element( map, w2 ); } print_time( t1, "Contains", s, map.size() ); std::cout << std::endl; } }; // template struct ufm_locked { alignas(64) boost::unordered_flat_map map; alignas(64) Mutex mtx; BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) { std::atomic s = 0; std::thread th[ Th ]; std::size_t m = words.size() / Th; for( std::size_t i = 0; i < Th; ++i ) { th[ i ] = std::thread( [this, i, m, &s]{ std::size_t s2 = 0; std::size_t start = i * m; std::size_t end = i == Th-1? words.size(): (i + 1) * m; for( std::size_t j = start; j < end; ++j ) { std::lock_guard lock( mtx ); ++map[ words[j] ]; ++s2; } s += s2; }); } for( std::size_t i = 0; i < Th; ++i ) { th[ i ].join(); } print_time( t1, "Word count", s, map.size() ); std::cout << std::endl; } BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) { std::atomic s = 0; std::thread th[ Th ]; std::size_t m = words.size() / Th; for( std::size_t i = 0; i < Th; ++i ) { th[ i ] = std::thread( [this, i, m, &s]{ std::size_t s2 = 0; std::size_t start = i * m; std::size_t end = i == Th-1? words.size(): (i + 1) * m; for( std::size_t j = start; j < end; ++j ) { ::shared_lock lock(mtx); std::string_view w2( words[j] ); w2.remove_prefix( 1 ); s2 += map.contains( w2 ); } s += s2; }); } for( std::size_t i = 0; i < Th; ++i ) { th[ i ].join(); } print_time( t1, "Contains", s, map.size() ); std::cout << std::endl; } }; template struct sync_map { alignas(64) boost::unordered_flat_map map; alignas(64) Mutex mtx; }; template struct ufm_sharded { sync_map sync[ Sh ]; BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) { std::atomic s = 0; std::thread th[ Th ]; std::size_t m = words.size() / Th; for( std::size_t i = 0; i < Th; ++i ) { th[ i ] = std::thread( [this, i, m, &s]{ std::size_t s2 = 0; std::size_t start = i * m; std::size_t end = i == Th-1? words.size(): (i + 1) * m; for( std::size_t j = start; j < end; ++j ) { auto const& word = words[ j ]; std::size_t hash = boost::hash()( word ); std::size_t shard = hash % Sh; std::lock_guard lock( sync[ shard ].mtx ); ++sync[ shard ].map[ word ]; ++s2; } s += s2; }); } for( std::size_t i = 0; i < Th; ++i ) { th[ i ].join(); } std::size_t n = 0; for( std::size_t i = 0; i < Sh; ++i ) { n += sync[ i ].map.size(); } print_time( t1, "Word count", s, n ); std::cout << std::endl; } BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) { std::atomic s = 0; std::thread th[ Th ]; std::size_t m = words.size() / Th; for( std::size_t i = 0; i < Th; ++i ) { th[ i ] = std::thread( [this, i, m, &s]{ std::size_t s2 = 0; std::size_t start = i * m; std::size_t end = i == Th-1? words.size(): (i + 1) * m; for( std::size_t j = start; j < end; ++j ) { std::string_view w2( words[j] ); w2.remove_prefix( 1 ); std::size_t hash = boost::hash()( w2 ); std::size_t shard = hash % Sh; std::lock_guard lock( sync[ shard ].mtx ); s2 += sync[ shard ].map.contains( w2 ); } s += s2; }); } for( std::size_t i = 0; i < Th; ++i ) { th[ i ].join(); } std::size_t n = 0; for( std::size_t i = 0; i < Sh; ++i ) { n += sync[ i ].map.size(); } print_time( t1, "Contains", s, n ); std::cout << std::endl; } }; // struct prehashed { std::string_view x; std::size_t h; explicit prehashed( std::string_view x_ ): x( x_ ), h( boost::hash()( x_ ) ) { } operator std::string_view () const { return x; } friend bool operator==( prehashed const& x, prehashed const& y ) { return x.x == y.x; } friend bool operator==( prehashed const& x, std::string_view y ) { return x.x == y; } friend bool operator==( std::string_view x, prehashed const& y ) { return x == y.x; } }; template<> struct boost::hash< prehashed > { using is_transparent = void; std::size_t operator()( prehashed const& x ) const { return x.h; } std::size_t operator()( std::string_view x ) const { return boost::hash()( x ); } }; template struct sync_map_prehashed { alignas(64) boost::unordered_flat_map< std::string_view, std::size_t, boost::hash, std::equal_to<> > map; alignas(64) Mutex mtx; }; template struct ufm_sharded_prehashed { sync_map_prehashed sync[ Sh ]; BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) { std::atomic s = 0; std::thread th[ Th ]; std::size_t m = words.size() / Th; for( std::size_t i = 0; i < Th; ++i ) { th[ i ] = std::thread( [this, i, m, &s]{ std::size_t s2 = 0; std::size_t start = i * m; std::size_t end = i == Th-1? words.size(): (i + 1) * m; for( std::size_t j = start; j < end; ++j ) { std::string_view word = words[ j ]; prehashed x( word ); std::size_t shard = x.h % Sh; std::lock_guard lock( sync[ shard ].mtx ); ++sync[ shard ].map[ x ]; ++s2; } s += s2; }); } for( std::size_t i = 0; i < Th; ++i ) { th[ i ].join(); } std::size_t n = 0; for( std::size_t i = 0; i < Sh; ++i ) { n += sync[ i ].map.size(); } print_time( t1, "Word count", s, n ); std::cout << std::endl; } BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) { std::atomic s = 0; std::thread th[ Th ]; std::size_t m = words.size() / Th; for( std::size_t i = 0; i < Th; ++i ) { th[ i ] = std::thread( [this, i, m, &s]{ std::size_t s2 = 0; std::size_t start = i * m; std::size_t end = i == Th-1? words.size(): (i + 1) * m; for( std::size_t j = start; j < end; ++j ) { std::string_view w2( words[j] ); w2.remove_prefix( 1 ); prehashed x( w2 ); std::size_t shard = x.h % Sh; ::shared_lock lock( sync[ shard ].mtx ); s2 += sync[ shard ].map.contains( x ); } s += s2; }); } for( std::size_t i = 0; i < Th; ++i ) { th[ i ].join(); } std::size_t n = 0; for( std::size_t i = 0; i < Sh; ++i ) { n += sync[ i ].map.size(); } print_time( t1, "Contains", s, n ); std::cout << std::endl; } }; // struct ufm_sharded_isolated { struct { alignas(64) boost::unordered_flat_map map; } sync[ Th ]; BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) { std::atomic s = 0; std::thread th[ Th ]; for( std::size_t i = 0; i < Th; ++i ) { th[ i ] = std::thread( [this, i, &s]{ std::size_t s2 = 0; for( std::size_t j = 0; j < words.size(); ++j ) { auto const& word = words[ j ]; std::size_t hash = boost::hash()( word ); std::size_t shard = hash % Th; if( shard == i ) { ++sync[ i ].map[ word ]; ++s2; } } s += s2; }); } for( std::size_t i = 0; i < Th; ++i ) { th[ i ].join(); } std::size_t n = 0; for( std::size_t i = 0; i < Th; ++i ) { n += sync[ i ].map.size(); } print_time( t1, "Word count", s, n ); std::cout << std::endl; } BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) { std::atomic s = 0; std::thread th[ Th ]; for( std::size_t i = 0; i < Th; ++i ) { th[ i ] = std::thread( [this, i, &s]{ std::size_t s2 = 0; for( std::size_t j = 0; j < words.size(); ++j ) { std::string_view w2( words[j] ); w2.remove_prefix( 1 ); std::size_t hash = boost::hash()( w2 ); std::size_t shard = hash % Th; if( shard == i ) { s2 += sync[ i ].map.contains( w2 ); } } s += s2; }); } for( std::size_t i = 0; i < Th; ++i ) { th[ i ].join(); } std::size_t n = 0; for( std::size_t i = 0; i < Th; ++i ) { n += sync[ i ].map.size(); } print_time( t1, "Contains", s, n ); std::cout << std::endl; } }; struct ufm_sharded_isolated_prehashed { struct { alignas(64) boost::unordered_flat_map, std::equal_to<>> map; } sync[ Th ]; BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) { std::atomic s = 0; std::thread th[ Th ]; for( std::size_t i = 0; i < Th; ++i ) { th[ i ] = std::thread( [this, i, &s]{ std::size_t s2 = 0; for( std::size_t j = 0; j < words.size(); ++j ) { std::string_view word = words[ j ]; prehashed x( word ); std::size_t shard = x.h % Th; if( shard == i ) { ++sync[ i ].map[ x ]; ++s2; } } s += s2; }); } for( std::size_t i = 0; i < Th; ++i ) { th[ i ].join(); } std::size_t n = 0; for( std::size_t i = 0; i < Th; ++i ) { n += sync[ i ].map.size(); } print_time( t1, "Word count", s, n ); std::cout << std::endl; } BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) { std::atomic s = 0; std::thread th[ Th ]; for( std::size_t i = 0; i < Th; ++i ) { th[ i ] = std::thread( [this, i, &s]{ std::size_t s2 = 0; for( std::size_t j = 0; j < words.size(); ++j ) { std::string_view w2( words[j] ); w2.remove_prefix( 1 ); prehashed x( w2 ); std::size_t shard = x.h % Th; if( shard == i ) { s2 += sync[ i ].map.contains( x ); } } s += s2; }); } for( std::size_t i = 0; i < Th; ++i ) { th[ i ].join(); } std::size_t n = 0; for( std::size_t i = 0; i < Th; ++i ) { n += sync[ i ].map.size(); } print_time( t1, "Contains", s, n ); std::cout << std::endl; } }; template struct parallel { Map map; BOOST_NOINLINE void test_word_count( std::chrono::steady_clock::time_point & t1 ) { std::atomic s = 0; std::thread th[ Th ]; std::size_t m = words.size() / Th; for( std::size_t i = 0; i < Th; ++i ) { th[ i ] = std::thread( [this, i, m, &s]{ std::size_t s2 = 0; std::size_t start = i * m; std::size_t end = i == Th-1? words.size(): (i + 1) * m; for( std::size_t j = start; j < end; ++j ) { increment_element( map, words[j] ); ++s2; } s += s2; }); } for( std::size_t i = 0; i < Th; ++i ) { th[ i ].join(); } print_time( t1, "Word count", s, map.size() ); std::cout << std::endl; } BOOST_NOINLINE void test_contains( std::chrono::steady_clock::time_point & t1 ) { std::atomic s = 0; std::thread th[ Th ]; std::size_t m = words.size() / Th; for( std::size_t i = 0; i < Th; ++i ) { th[ i ] = std::thread( [this, i, m, &s]{ std::size_t s2 = 0; std::size_t start = i * m; std::size_t end = i == Th-1? words.size(): (i + 1) * m; for( std::size_t j = start; j < end; ++j ) { std::string_view w2( words[j] ); w2.remove_prefix( 1 ); s2 += contains_element( map, w2 ); } s += s2; }); } for( std::size_t i = 0; i < Th; ++i ) { th[ i ].join(); } print_time( t1, "Contains", s, map.size() ); std::cout << std::endl; } }; // struct record { std::string label_; long long time_; }; static std::vector times; template BOOST_NOINLINE void test( char const* label ) { std::cout << label << ":\n\n"; Map map; auto t0 = std::chrono::steady_clock::now(); auto t1 = t0; record rec = { label, 0 }; map.test_word_count( t1 ); map.test_contains( t1 ); auto tN = std::chrono::steady_clock::now(); std::cout << "Total: " << ( tN - t0 ) / 1ms << " ms\n\n"; rec.time_ = ( tN - t0 ) / 1ms; times.push_back( rec ); } // int main() { init_words(); test>( "boost::unordered_flat_map, single threaded" ); // test>( "boost::unordered_flat_map, single threaded, mutex" ); test>( "boost::unordered_flat_map, single threaded, shared_mutex" ); test>( "boost::unordered_flat_map, single threaded, rw_spinlock" ); test>( "cfoa, single threaded" ); test>( "cfoa, tbb::spin_rw_mutex, single threaded" ); test>( "cfoa, std::shared_mutex, single threaded" ); test>( "foa::concurrent_table, single threaded" ); test>( "boost::concurrent_flat_map, single threaded" ); // test>( "libcuckoo::cuckoohash_map, single threaded" ); test>( "tbb::concurrent_hash_map, single threaded" ); // test>>( "gtl::parallel_flat_hash_map, single threaded" ); // test>( "boost::unordered_flat_map, locked" ); // test>( "boost::unordered_flat_map, locked" ); // test>( "boost::unordered_flat_map, locked" ); // test>( "boost::unordered_flat_map, sharded" ); test>( "boost::unordered_flat_map, sharded_prehashed" ); // test>("boost::unordered_flat_map, sharded"); test>( "boost::unordered_flat_map, sharded_prehashed" ); // test>( "boost::unordered_flat_map, sharded" ); test>( "boost::unordered_flat_map, sharded_prehashed" ); // test( "boost::unordered_flat_map, sharded isolated" ); test( "boost::unordered_flat_map, sharded isolated, prehashed" ); test>( "cfoa" ); test>( "cfoa, tbb::spin_rw_mutex" ); test>( "cfoa, std::shared_mutex" ); test>( "foa::concurrent_table" ); test>( "boost::concurrent_flat_map" ); // test>( "libcuckoo::cuckoohash_map" ); test>( "tbb::concurrent_hash_map" ); test>>( "gtl::parallel_flat_hash_map" ); test>>( "gtl::parallel_flat_hash_map" ); test>>( "gtl::parallel_flat_hash_map" ); std::cout << "---\n\n"; std::cout << "Number of threads: " << Th << "\n"; for( auto const& x: times ) { std::cout << std::setw( 60 ) << ( x.label_ + ": " ) << std::setw( 5 ) << x.time_ << " ms\n"; } }