mirror of
https://github.com/boostorg/bloom.git
synced 2026-01-19 04:02:11 +00:00
* removed superfluous inline (Alexander Grund) * made hasher equivalence a precondition for &=/|= (Andrzej Krzemienski) * documented exception safety guarantees (Andrzej Krzemienski) * mentioned Bloom filters are called so after Burton H Bloom (Dmitry Arkhipov) * added warning about OOM for very small FPR (Ivan Matek) * stressed config chart x axis is capacity/num elements rather than plain capacity (Ivan Matek) * s/[SIMD] is available/is enabled at compile time (Ivan Matek) * shut down clang-tidy warnings (Ivan Matek) * used "set union" for more clarity (Andrzej Krzemienski) * stressed early on that boost::bloom::filter is _not_ a container (Claudio DeSouza) * added bulk operations to roadmap (Dmitry Arkhipov) * added try_insert to roadmap (Konstantin Savvidy) * added estimated_size to roadmap (Konstantin Savvidy) * added alternative filters to roadmap (Konstantin Savvidy) * used <cstdint> instead of <boost/cstdint.hpp> (Rubén Pérez) * mentioned endianness when serializing filters (Rubén Pérez) * corrected sloppiness about optimum k determination (Tomer Vromen) * added run-time specification of k to roadmap (Tomer Vromen) * added test/CMakeLists.txt (Rubén Pérez) * added CMake-based testing to GHA (Rubén Pérez) (#8) * added <boost/bloom.hpp> (Rubén Pérez) * added Codecov reporting (Rubén Pérez) (#9) * moved from boost::unordered::hash_is_avalanching to ContainerHash's boost::hash_is_avalanching (Ivan Matek/Peter Dimov) * added syntax highlighting to code snippets (Rubén Pérez) * avoided C-style casts in examples (Rubén Pérez) * added acknowledgements section (Peter Turcan) * added Getting Started section (Peter Turcan) * fixed example Jamfile and added example building to CI (Rubén Pérez) (#10) * added diagram about overlapping vs. non-overlapping subarrays (Rubén Pérez/Ivan Matek/Vinnie Falco) * made first code snippet self-contained (Rubén Pérez/Peter Turcan) * added more comments to genome.cpp (Rubén Pérez) * added support for arrays as blocks (Tomer Vromen) (#24) * removed emplace (Seth Heeren/Peter Dimov) (#25) * required the allocator to be of unsigned char (Seth Heeren/Peter Dimov) (#26) * added compile-time validation of Block types (Rubén Pérez) (#27) * added value type to displayed filter names in tables (Tomer Vromen) (#28) * used -march=native rather than -mavx2 (Ivan Matek) * adopted hash strategy with fastrange plus a separate MCG (Kostas Savvidis/Peter Dimov) (#30) * several maintenance commits
118 lines
3.0 KiB
C++
118 lines
3.0 KiB
C++
/* For a given filter type, outputs FPR vs. c = m/n with optimum k.
|
|
*
|
|
* Copyright 2025 Joaquin M Lopez Munoz.
|
|
* Distributed under the Boost Software License, Version 1.0.
|
|
* (See accompanying file LICENSE_1_0.txt or copy at
|
|
* http://www.boost.org/LICENSE_1_0.txt)
|
|
*
|
|
* See https://www.boost.org/libs/bloom for library home page.
|
|
*/
|
|
|
|
#include <boost/algorithm/string/replace.hpp>
|
|
#include <boost/bloom/filter.hpp>
|
|
#include <boost/bloom/block.hpp>
|
|
#include <boost/bloom/multiblock.hpp>
|
|
#include <boost/core/detail/splitmix64.hpp>
|
|
#include <boost/mp11.hpp>
|
|
#include <boost/type_index.hpp>
|
|
#include <boost/unordered/unordered_flat_set.hpp>
|
|
#include <cstdlib>
|
|
#include <functional>
|
|
#include <iostream>
|
|
#include <vector>
|
|
|
|
template<typename Filter>
|
|
double fpr(std::size_t c)
|
|
{
|
|
using value_type=typename Filter::value_type;
|
|
|
|
std::size_t num_elements=(std::size_t)(1000/Filter::fpr_for(1,c));
|
|
std::vector<value_type> data_in,data_out;
|
|
{
|
|
boost::detail::splitmix64 rng;
|
|
boost::unordered_flat_set<value_type> unique;
|
|
for(std::size_t i=0;i<num_elements;++i){
|
|
for(;;){
|
|
auto x=value_type(rng());
|
|
if(unique.insert(x).second){
|
|
data_in.push_back(x);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
for(std::size_t i=0;i<num_elements;++i){
|
|
for(;;){
|
|
auto x=value_type(rng());
|
|
if(!unique.contains(x)){
|
|
data_out.push_back(x);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
double fpr=0.0;
|
|
{
|
|
std::size_t res=0;
|
|
Filter f(c*num_elements);
|
|
for(const auto& x:data_in)f.insert(x);
|
|
for(const auto& x:data_out)res+=f.may_contain(x);
|
|
fpr=(double)res/num_elements;
|
|
}
|
|
return fpr;
|
|
}
|
|
|
|
using namespace boost::bloom;
|
|
|
|
/* change this definition to test another filter type */
|
|
template<std::size_t K>
|
|
using filter=boost::bloom::filter<int,1,multiblock<std::uint32_t,K>,1>;
|
|
|
|
/* change this to your desired c range */
|
|
std::size_t c_min=4,
|
|
c_max=24;
|
|
|
|
/* you may need to change this if optimum k >= k_max */
|
|
constexpr std::size_t k_max=20;
|
|
|
|
using fpr_function=std::function<double(std::size_t)>;
|
|
static std::vector<fpr_function> fprs=[]
|
|
{
|
|
std::vector<fpr_function> fprs;
|
|
using ks=boost::mp11::mp_iota_c<k_max,1>;
|
|
boost::mp11::mp_for_each<ks>([&](auto K){
|
|
fprs.emplace_back(fpr< ::filter<K> >);
|
|
});
|
|
return fprs;
|
|
}();
|
|
|
|
int main()
|
|
{
|
|
std::string filter_name=
|
|
boost::typeindex::type_id< ::filter<666> >().pretty_name();
|
|
boost::replace_all(filter_name,"boost::bloom::","");
|
|
boost::replace_all(filter_name,"class ","");
|
|
boost::replace_all(filter_name,"struct ","");
|
|
boost::replace_all(filter_name,"666","K");
|
|
|
|
std::cout
|
|
<<filter_name<<"\n"
|
|
<<"c;fpr;k\n";
|
|
|
|
std::size_t ik=0; /* k-1 */
|
|
for(std::size_t c=c_min;c<=c_max;++c){
|
|
double r=fprs[ik](c);
|
|
for(;;){
|
|
if(ik+1>=k_max){
|
|
std::cerr<<"k_max hit, raise it and rerun\n";
|
|
return EXIT_FAILURE;
|
|
}
|
|
double rn=fprs[ik+1](c);
|
|
if(rn>=r)break;
|
|
r=rn;
|
|
++ik;
|
|
}
|
|
std::cout<<c<<";"<<r<<";"<<(ik+1)<<"\n";
|
|
}
|
|
}
|