mirror of
https://github.com/boostorg/bloom.git
synced 2026-01-19 16:12:10 +00:00
* removed superfluous inline (Alexander Grund) * made hasher equivalence a precondition for &=/|= (Andrzej Krzemienski) * documented exception safety guarantees (Andrzej Krzemienski) * mentioned Bloom filters are called so after Burton H Bloom (Dmitry Arkhipov) * added warning about OOM for very small FPR (Ivan Matek) * stressed config chart x axis is capacity/num elements rather than plain capacity (Ivan Matek) * s/[SIMD] is available/is enabled at compile time (Ivan Matek) * shut down clang-tidy warnings (Ivan Matek) * used "set union" for more clarity (Andrzej Krzemienski) * stressed early on that boost::bloom::filter is _not_ a container (Claudio DeSouza) * added bulk operations to roadmap (Dmitry Arkhipov) * added try_insert to roadmap (Konstantin Savvidy) * added estimated_size to roadmap (Konstantin Savvidy) * added alternative filters to roadmap (Konstantin Savvidy) * used <cstdint> instead of <boost/cstdint.hpp> (Rubén Pérez) * mentioned endianness when serializing filters (Rubén Pérez) * corrected sloppiness about optimum k determination (Tomer Vromen) * added run-time specification of k to roadmap (Tomer Vromen) * added test/CMakeLists.txt (Rubén Pérez) * added CMake-based testing to GHA (Rubén Pérez) (#8) * added <boost/bloom.hpp> (Rubén Pérez) * added Codecov reporting (Rubén Pérez) (#9) * moved from boost::unordered::hash_is_avalanching to ContainerHash's boost::hash_is_avalanching (Ivan Matek/Peter Dimov) * added syntax highlighting to code snippets (Rubén Pérez) * avoided C-style casts in examples (Rubén Pérez) * added acknowledgements section (Peter Turcan) * added Getting Started section (Peter Turcan) * fixed example Jamfile and added example building to CI (Rubén Pérez) (#10) * added diagram about overlapping vs. non-overlapping subarrays (Rubén Pérez/Ivan Matek/Vinnie Falco) * made first code snippet self-contained (Rubén Pérez/Peter Turcan) * added more comments to genome.cpp (Rubén Pérez) * added support for arrays as blocks (Tomer Vromen) (#24) * removed emplace (Seth Heeren/Peter Dimov) (#25) * required the allocator to be of unsigned char (Seth Heeren/Peter Dimov) (#26) * added compile-time validation of Block types (Rubén Pérez) (#27) * added value type to displayed filter names in tables (Tomer Vromen) (#28) * used -march=native rather than -mavx2 (Ivan Matek) * adopted hash strategy with fastrange plus a separate MCG (Kostas Savvidis/Peter Dimov) (#30) * several maintenance commits
84 lines
2.1 KiB
C++
84 lines
2.1 KiB
C++
/* Serialization of boost::bloom::filter.
|
|
*
|
|
* Copyright 2025 Joaquin M Lopez Munoz.
|
|
* Distributed under the Boost Software License, Version 1.0.
|
|
* (See accompanying file LICENSE_1_0.txt or copy at
|
|
* http://www.boost.org/LICENSE_1_0.txt)
|
|
*
|
|
* See https://www.boost.org/libs/bloom for library home page.
|
|
*/
|
|
|
|
#include <boost/bloom/filter.hpp>
|
|
#include <boost/bloom/multiblock.hpp>
|
|
#include <boost/core/detail/splitmix64.hpp>
|
|
#include <boost/uuid/uuid.hpp>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
|
|
/* emits a deterministic pseudorandom sequence of UUIDs */
|
|
|
|
struct uuid_generator
|
|
{
|
|
boost::uuids::uuid operator()()
|
|
{
|
|
std::uint8_t data[16];
|
|
std::uint64_t x = rng();
|
|
std::memcpy(&data[0], &x, sizeof(x));
|
|
x = rng();
|
|
std::memcpy(&data[8], &x, sizeof(x));
|
|
|
|
return {data};
|
|
}
|
|
|
|
boost::detail::splitmix64 rng;
|
|
};
|
|
|
|
using filter = boost::bloom::filter<
|
|
boost::uuids::uuid, 1, boost::bloom::multiblock<std::uint64_t, 8> >;
|
|
|
|
static constexpr std::size_t num_elements = 10000;
|
|
|
|
/* creates a filter with num_elements UUIDs */
|
|
|
|
filter create_filter()
|
|
{
|
|
uuid_generator gen;
|
|
filter f(num_elements, 0.005);
|
|
for(std::size_t i = 0; i < num_elements; ++i) f.insert(gen());
|
|
return f;
|
|
}
|
|
|
|
void save_filter(const filter& f, const char* filename)
|
|
{
|
|
std::ofstream out(filename, std::ios::binary | std::ios::trunc);
|
|
std::size_t c=f.capacity();
|
|
out.write(reinterpret_cast<const char*>(&c), sizeof(c)); /* save capacity (bits) */
|
|
auto s = f.array();
|
|
out.write(reinterpret_cast<const char*>(s.data()), s.size()); /* save array */
|
|
}
|
|
|
|
filter load_filter(const char* filename)
|
|
{
|
|
std::ifstream in(filename, std::ios::binary);
|
|
std::size_t c;
|
|
in.read(reinterpret_cast<char*>(&c), sizeof(c));
|
|
filter f(c);
|
|
auto s = f.array();
|
|
in.read(reinterpret_cast<char*>(s.data()), s.size()); /* load array */
|
|
return f;
|
|
}
|
|
|
|
int main()
|
|
{
|
|
static constexpr const char* filename = "filter.bin";
|
|
|
|
auto f1 = create_filter();
|
|
save_filter(f1, filename);
|
|
auto f2 = load_filter(filename);
|
|
|
|
if (f1 == f2) std::cout << "serialization correct\n";
|
|
else std::cout << "something went wrong\n";
|
|
}
|