review feedback (#32)
* removed superfluous inline (Alexander Grund) * made hasher equivalence a precondition for &=/|= (Andrzej Krzemienski) * documented exception safety guarantees (Andrzej Krzemienski) * mentioned Bloom filters are called so after Burton H Bloom (Dmitry Arkhipov) * added warning about OOM for very small FPR (Ivan Matek) * stressed config chart x axis is capacity/num elements rather than plain capacity (Ivan Matek) * s/[SIMD] is available/is enabled at compile time (Ivan Matek) * shut down clang-tidy warnings (Ivan Matek) * used "set union" for more clarity (Andrzej Krzemienski) * stressed early on that boost::bloom::filter is _not_ a container (Claudio DeSouza) * added bulk operations to roadmap (Dmitry Arkhipov) * added try_insert to roadmap (Konstantin Savvidy) * added estimated_size to roadmap (Konstantin Savvidy) * added alternative filters to roadmap (Konstantin Savvidy) * used <cstdint> instead of <boost/cstdint.hpp> (Rubén Pérez) * mentioned endianness when serializing filters (Rubén Pérez) * corrected sloppiness about optimum k determination (Tomer Vromen) * added run-time specification of k to roadmap (Tomer Vromen) * added test/CMakeLists.txt (Rubén Pérez) * added CMake-based testing to GHA (Rubén Pérez) (#8) * added <boost/bloom.hpp> (Rubén Pérez) * added Codecov reporting (Rubén Pérez) (#9) * moved from boost::unordered::hash_is_avalanching to ContainerHash's boost::hash_is_avalanching (Ivan Matek/Peter Dimov) * added syntax highlighting to code snippets (Rubén Pérez) * avoided C-style casts in examples (Rubén Pérez) * added acknowledgements section (Peter Turcan) * added Getting Started section (Peter Turcan) * fixed example Jamfile and added example building to CI (Rubén Pérez) (#10) * added diagram about overlapping vs. non-overlapping subarrays (Rubén Pérez/Ivan Matek/Vinnie Falco) * made first code snippet self-contained (Rubén Pérez/Peter Turcan) * added more comments to genome.cpp (Rubén Pérez) * added support for arrays as blocks (Tomer Vromen) (#24) * removed emplace (Seth Heeren/Peter Dimov) (#25) * required the allocator to be of unsigned char (Seth Heeren/Peter Dimov) (#26) * added compile-time validation of Block types (Rubén Pérez) (#27) * added value type to displayed filter names in tables (Tomer Vromen) (#28) * used -march=native rather than -mavx2 (Ivan Matek) * adopted hash strategy with fastrange plus a separate MCG (Kostas Savvidis/Peter Dimov) (#30) * several maintenance commits
23
.codecov.yml
Normal file
@@ -0,0 +1,23 @@
|
||||
# Copyright 2019 - 2021 Alexander Grund
|
||||
# Distributed under the Boost Software License, Version 1.0.
|
||||
# (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt)
|
||||
#
|
||||
# Sample codecov configuration file. Edit as required
|
||||
|
||||
codecov:
|
||||
max_report_age: off
|
||||
require_ci_to_pass: yes
|
||||
notify:
|
||||
# Increase this if you have multiple coverage collection jobs
|
||||
after_n_builds: 1
|
||||
wait_for_ci: yes
|
||||
|
||||
# Change how pull request comments look
|
||||
comment:
|
||||
layout: "reach,diff,flags,files,footer"
|
||||
|
||||
# Ignore specific files or folders. Glob patterns are supported.
|
||||
# See https://docs.codecov.com/docs/ignoring-paths
|
||||
ignore:
|
||||
- extra/**/*
|
||||
# - test/**/*
|
||||
216
.github/workflows/ci.yml
vendored
@@ -11,6 +11,8 @@ on:
|
||||
|
||||
env:
|
||||
UBSAN_OPTIONS: print_stacktrace=1
|
||||
B2_CI_VERSION: 1
|
||||
LCOV_BRANCH_COVERAGE: 0
|
||||
|
||||
jobs:
|
||||
posix:
|
||||
@@ -246,19 +248,17 @@ jobs:
|
||||
export ADDRMD=${{matrix.address-model}}
|
||||
./b2 -j3 libs/$LIBRARY/test toolset=${{matrix.toolset}} cxxstd=${{matrix.cxxstd}} ${ADDRMD:+address-model=$ADDRMD} variant=debug,release
|
||||
|
||||
- name: Compile examples
|
||||
run: |
|
||||
cd ../boost-root
|
||||
export ADDRMD=${{matrix.address-model}}
|
||||
./b2 -j3 libs/$LIBRARY/example toolset=${{matrix.toolset}} cxxstd=${{matrix.cxxstd}} ${ADDRMD:+address-model=$ADDRMD} variant=debug,release
|
||||
|
||||
windows:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- toolset: msvc-14.0
|
||||
cxxstd: 14,latest
|
||||
addrmd: 32,64
|
||||
os: windows-2019
|
||||
- toolset: msvc-14.2
|
||||
cxxstd: "14,17,20,latest"
|
||||
addrmd: 32,64
|
||||
os: windows-2019
|
||||
- toolset: msvc-14.3
|
||||
cxxstd: "14,17,20,latest"
|
||||
addrmd: 32,64
|
||||
@@ -270,7 +270,7 @@ jobs:
|
||||
- toolset: gcc
|
||||
cxxstd: "11,14,17,2a"
|
||||
addrmd: 64
|
||||
os: windows-2019
|
||||
os: windows-2022
|
||||
|
||||
runs-on: ${{matrix.os}}
|
||||
|
||||
@@ -305,3 +305,201 @@ jobs:
|
||||
run: |
|
||||
cd ../boost-root
|
||||
b2 -j3 libs/%LIBRARY%/test toolset=${{matrix.toolset}} cxxstd=${{matrix.cxxstd}} address-model=${{matrix.addrmd}} variant=debug,release embed-manifest-via=linker
|
||||
|
||||
- name: Compile examples
|
||||
shell: cmd
|
||||
run: |
|
||||
cd ../boost-root
|
||||
b2 -j3 libs/%LIBRARY%/example toolset=${{matrix.toolset}} cxxstd=${{matrix.cxxstd}} address-model=${{matrix.addrmd}} variant=debug,release embed-manifest-via=linker
|
||||
|
||||
posix-cmake-test:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
- os: ubuntu-24.04
|
||||
- os: macos-13
|
||||
- os: macos-14
|
||||
- os: macos-15
|
||||
|
||||
runs-on: ${{matrix.os}}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install packages
|
||||
if: matrix.install
|
||||
run: sudo apt-get -y install ${{matrix.install}}
|
||||
|
||||
- name: Setup Boost
|
||||
run: |
|
||||
echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY
|
||||
LIBRARY=${GITHUB_REPOSITORY#*/}
|
||||
echo LIBRARY: $LIBRARY
|
||||
echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV
|
||||
echo GITHUB_BASE_REF: $GITHUB_BASE_REF
|
||||
echo GITHUB_REF: $GITHUB_REF
|
||||
REF=${GITHUB_BASE_REF:-$GITHUB_REF}
|
||||
REF=${REF#refs/heads/}
|
||||
echo REF: $REF
|
||||
BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true
|
||||
echo BOOST_BRANCH: $BOOST_BRANCH
|
||||
cd ..
|
||||
git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root
|
||||
cd boost-root
|
||||
mkdir -p libs/$LIBRARY
|
||||
cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY
|
||||
git submodule update --init tools/boostdep
|
||||
python tools/boostdep/depinst/depinst.py -I benchmark -I example --git_args "--jobs 3" $LIBRARY
|
||||
|
||||
- name: Configure
|
||||
run: |
|
||||
cd ../boost-root
|
||||
mkdir __build__ && cd __build__
|
||||
cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON ..
|
||||
|
||||
- name: Build tests
|
||||
run: |
|
||||
cd ../boost-root/__build__
|
||||
cmake --build . --target tests
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
cd ../boost-root/__build__
|
||||
ctest --output-on-failure --no-tests=error
|
||||
|
||||
windows-cmake-test:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- os: windows-2022
|
||||
|
||||
runs-on: ${{matrix.os}}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Boost
|
||||
shell: cmd
|
||||
run: |
|
||||
echo GITHUB_REPOSITORY: %GITHUB_REPOSITORY%
|
||||
for /f %%i in ("%GITHUB_REPOSITORY%") do set LIBRARY=%%~nxi
|
||||
echo LIBRARY: %LIBRARY%
|
||||
echo LIBRARY=%LIBRARY%>>%GITHUB_ENV%
|
||||
echo GITHUB_BASE_REF: %GITHUB_BASE_REF%
|
||||
echo GITHUB_REF: %GITHUB_REF%
|
||||
if "%GITHUB_BASE_REF%" == "" set GITHUB_BASE_REF=%GITHUB_REF%
|
||||
set BOOST_BRANCH=develop
|
||||
for /f %%i in ("%GITHUB_BASE_REF%") do if "%%~nxi" == "master" set BOOST_BRANCH=master
|
||||
echo BOOST_BRANCH: %BOOST_BRANCH%
|
||||
cd ..
|
||||
git clone -b %BOOST_BRANCH% --depth 1 https://github.com/boostorg/boost.git boost-root
|
||||
cd boost-root
|
||||
mkdir -p libs\%LIBRARY% # remove when/if the library makes it into Boost
|
||||
xcopy /s /e /q %GITHUB_WORKSPACE% libs\%LIBRARY%\
|
||||
git submodule update --init tools/boostdep
|
||||
python tools/boostdep/depinst/depinst.py -I benchmark -I example --git_args "--jobs 3" %LIBRARY%
|
||||
|
||||
- name: Configure
|
||||
shell: cmd
|
||||
run: |
|
||||
cd ../boost-root
|
||||
mkdir __build__ && cd __build__
|
||||
cmake -DBOOST_INCLUDE_LIBRARIES=%LIBRARY% -DBUILD_TESTING=ON ..
|
||||
|
||||
- name: Build tests (Debug)
|
||||
shell: cmd
|
||||
run: |
|
||||
cd ../boost-root/__build__
|
||||
cmake --build . --target tests --config Debug
|
||||
|
||||
- name: Run tests (Debug)
|
||||
shell: cmd
|
||||
run: |
|
||||
cd ../boost-root/__build__
|
||||
ctest --output-on-failure --no-tests=error -C Debug
|
||||
|
||||
- name: Build tests (Release)
|
||||
shell: cmd
|
||||
run: |
|
||||
cd ../boost-root/__build__
|
||||
cmake --build . --target tests --config Release
|
||||
|
||||
- name: Run tests (Release)
|
||||
shell: cmd
|
||||
run: |
|
||||
cd ../boost-root/__build__
|
||||
ctest --output-on-failure --no-tests=error -C Release
|
||||
|
||||
codecov:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- toolset: gcc-14
|
||||
cxxstd: "11"
|
||||
os: ubuntu-24.04
|
||||
install: g++-14-multilib
|
||||
address-model: 32,64
|
||||
|
||||
runs-on: ${{matrix.os}}
|
||||
container:
|
||||
image: ${{matrix.container}}
|
||||
volumes:
|
||||
- /node20217:/node20217:rw,rshared
|
||||
- ${{ startsWith(matrix.container, 'ubuntu:1') && '/node20217:/__e/node20:ro,rshared' || ' ' }}
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
steps:
|
||||
- name: Setup container environment
|
||||
if: matrix.container
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get -y install sudo python3 git g++ curl xz-utils
|
||||
if [[ "${{matrix.container}}" == "ubuntu:1"* ]]; then
|
||||
# Node 20 doesn't work with Ubuntu 16/18 glibc: https://github.com/actions/checkout/issues/1590
|
||||
curl -sL https://archives.boost.io/misc/node/node-v20.9.0-linux-x64-glibc-217.tar.xz | tar -xJ --strip-components 1 -C /node20217
|
||||
fi
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Fetch Boost.CI
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: boostorg/boost-ci
|
||||
ref: master
|
||||
path: boost-ci-cloned
|
||||
|
||||
- name: Install Boost.CI
|
||||
run: |
|
||||
cp -r boost-ci-cloned/ci .
|
||||
rm -rf boost-ci-cloned
|
||||
|
||||
- name: Install packages
|
||||
if: matrix.install
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install ${{matrix.install}}
|
||||
|
||||
- name: Setup Boost
|
||||
env:
|
||||
B2_TOOLSET: ${{matrix.toolset}}
|
||||
B2_CXXSTD: ${{matrix.cxxstd}}
|
||||
B2_ADDRESS_MODEL: ${{matrix.address-model}}
|
||||
run: source ci/github/install.sh
|
||||
|
||||
- name: Setup coverage collection
|
||||
run: ci/github/codecov.sh "setup"
|
||||
|
||||
- name: Run tests
|
||||
run: ci/build.sh
|
||||
|
||||
- name: Upload coverage
|
||||
env:
|
||||
CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}}
|
||||
run: ci/codecov.sh "upload"
|
||||
@@ -20,7 +20,6 @@ target_link_libraries(boost_bloom
|
||||
Boost::core
|
||||
Boost::throw_exception
|
||||
Boost::type_traits
|
||||
Boost::unordered
|
||||
)
|
||||
|
||||
target_compile_features(boost_bloom INTERFACE cxx_std_11)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Candidate Boost Bloom Library
|
||||
|
||||
[](https://github.com/joaquintides/bloom/tree/master) [](https://github.com/joaquintides/bloom/actions/workflows/ci.yml) [](https://drone.cpp.al/joaquintides/bloom) [](https://master.bloom.cpp.al/) </br>
|
||||
[](https://github.com/joaquintides/bloom/tree/develop) [](https://github.com/joaquintides/bloom/actions/workflows/ci.yml) [](https://drone.cpp.al/joaquintides/bloom) [](https://develop.bloom.cpp.al/) </br>
|
||||
[](https://github.com/joaquintides/bloom/tree/master) [](https://github.com/joaquintides/bloom/actions/workflows/ci.yml) [](https://drone.cpp.al/joaquintides/bloom) [](https://app.codecov.io/gh/joaquintides/bloom/tree/master) [](https://master.bloom.cpp.al/) </br>
|
||||
[](https://github.com/joaquintides/bloom/tree/develop) [](https://github.com/joaquintides/bloom/actions/workflows/ci.yml) [](https://drone.cpp.al/joaquintides/bloom) [](https://app.codecov.io/gh/joaquintides/bloom/tree/develop) [](https://develop.bloom.cpp.al/) </br>
|
||||
[](https://www.boost.org/users/license.html) <img alt="C++11 required" src="https://img.shields.io/badge/standard-C%2b%2b11-blue.svg"> <img alt="Header-only library" src="https://img.shields.io/badge/build-header--only-blue.svg">
|
||||
|
||||
(Candidate) Boost.Bloom provides the class template `boost::bloom::filter` that
|
||||
|
||||
@@ -53,11 +53,7 @@ void resume_timing()
|
||||
measure_start+=std::chrono::high_resolution_clock::now()-measure_pause;
|
||||
}
|
||||
|
||||
#include <boost/bloom/block.hpp>
|
||||
#include <boost/bloom/fast_multiblock32.hpp>
|
||||
#include <boost/bloom/fast_multiblock64.hpp>
|
||||
#include <boost/bloom/filter.hpp>
|
||||
#include <boost/bloom/multiblock.hpp>
|
||||
#include <boost/bloom.hpp>
|
||||
#include <boost/core/detail/splitmix64.hpp>
|
||||
#include <boost/mp11/algorithm.hpp>
|
||||
#include <boost/mp11/list.hpp>
|
||||
@@ -212,14 +208,14 @@ using namespace boost::bloom;
|
||||
template<std::size_t K1,std::size_t K2,std::size_t K3>
|
||||
using filters1=boost::mp11::mp_list<
|
||||
filter<int,K1>,
|
||||
filter<int,1,block<boost::uint64_t,K2>>,
|
||||
filter<int,1,block<boost::uint64_t,K3>,1>
|
||||
filter<int,1,block<std::uint64_t,K2>>,
|
||||
filter<int,1,block<std::uint64_t,K3>,1>
|
||||
>;
|
||||
|
||||
template<std::size_t K1,std::size_t K2,std::size_t K3>
|
||||
using filters2=boost::mp11::mp_list<
|
||||
filter<int,1,multiblock<boost::uint64_t,K1>>,
|
||||
filter<int,1,multiblock<boost::uint64_t,K2>,1>,
|
||||
filter<int,1,multiblock<std::uint64_t,K1>>,
|
||||
filter<int,1,multiblock<std::uint64_t,K2>,1>,
|
||||
filter<int,1,fast_multiblock32<K3>>
|
||||
>;
|
||||
|
||||
@@ -230,6 +226,13 @@ using filters3=boost::mp11::mp_list<
|
||||
filter<int,1,fast_multiblock64<K3>,1>
|
||||
>;
|
||||
|
||||
template<std::size_t K1,std::size_t K2,std::size_t K3>
|
||||
using filters4=boost::mp11::mp_list<
|
||||
filter<int,1,block<std::uint64_t[8],K1>>,
|
||||
filter<int,1,block<std::uint64_t[8],K2>,1>,
|
||||
filter<int,1,multiblock<std::uint64_t[8],K3>>
|
||||
>;
|
||||
|
||||
int main(int argc,char* argv[])
|
||||
{
|
||||
if(argc<2){
|
||||
@@ -275,9 +278,9 @@ int main(int argc,char* argv[])
|
||||
"<table>\n"
|
||||
" <tr>\n"
|
||||
" <th></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<K></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<1,block<uint64_t,K>></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<1,block<uint64_t,K>,1></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<int,K></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<int,1,block<uint64_t,K>></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<int,1,block<uint64_t,K>,1></code></th>\n"
|
||||
" </tr>\n"
|
||||
" <tr>\n"
|
||||
" <th>c</th>\n"<<
|
||||
@@ -294,9 +297,9 @@ int main(int argc,char* argv[])
|
||||
std::cout<<
|
||||
" <tr>\n"
|
||||
" <th></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<1,multiblock<uint64_t,K>></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<1,multiblock<uint64_t,K>,1></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<1,fast_multiblock32<K>></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<int,1,multiblock<uint64_t,K>></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<int,1,multiblock<uint64_t,K>,1></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<int,1,fast_multiblock32<K>></code></th>\n"
|
||||
" </tr>\n"
|
||||
" <tr>\n"
|
||||
" <th>c</th>\n"<<
|
||||
@@ -313,9 +316,9 @@ int main(int argc,char* argv[])
|
||||
std::cout<<
|
||||
" <tr>\n"
|
||||
" <th></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<1,fast_multiblock32<K>,1></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<1,fast_multiblock64<K>></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<1,fast_multiblock64<K>,1></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<int,1,fast_multiblock32<K>,1></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<int,1,fast_multiblock64<K>></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<int,1,fast_multiblock64<K>,1></code></th>\n"
|
||||
" </tr>\n"
|
||||
" <tr>\n"
|
||||
" <th>c</th>\n"<<
|
||||
@@ -329,5 +332,24 @@ int main(int argc,char* argv[])
|
||||
row<filters3<11, 11, 11>>(16);
|
||||
row<filters3<13, 13, 14>>(20);
|
||||
|
||||
std::cout<<
|
||||
" <tr>\n"
|
||||
" <th></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<int,1,block<uint64_t[8],K>></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<int,1,block<uint64_t[8],K>,1></code></th>\n"
|
||||
" <th colspan=\"5\"><code>filter<int,1,multiblock<uint64_t[8],K>></code></th>\n"
|
||||
" </tr>\n"
|
||||
" <tr>\n"
|
||||
" <th>c</th>\n"<<
|
||||
subheader<<
|
||||
subheader<<
|
||||
subheader<<
|
||||
" </tr>\n";
|
||||
|
||||
row<filters4< 5, 6, 7>>( 8);
|
||||
row<filters4< 7, 7, 10>>(12);
|
||||
row<filters4< 9, 10, 11>>(16);
|
||||
row<filters4<12, 12, 15>>(20);
|
||||
|
||||
std::cout<<"</table>\n";
|
||||
}
|
||||
|
||||
@@ -97,7 +97,7 @@ int main()
|
||||
|
||||
std::cout
|
||||
<<filter_name<<"\n"
|
||||
<<"fpr;c;k\n";
|
||||
<<"c;fpr;k\n";
|
||||
|
||||
std::size_t ik=0; /* k-1 */
|
||||
for(std::size_t c=c_min;c<=c_max;++c){
|
||||
|
||||
@@ -26,6 +26,14 @@
|
||||
.bordered_table th, .bordered_table td {
|
||||
border: 1px solid lightgray;
|
||||
}
|
||||
|
||||
.formula-center {
|
||||
display: block;
|
||||
overflow-x: auto;
|
||||
overflow-y: hidden;
|
||||
max-width: 100%;
|
||||
text-align: center;
|
||||
}
|
||||
</style>
|
||||
++++
|
||||
|
||||
@@ -35,7 +43,9 @@ include::bloom/tutorial.adoc[]
|
||||
include::bloom/configuration.adoc[]
|
||||
include::bloom/benchmarks.adoc[]
|
||||
include::bloom/reference.adoc[]
|
||||
include::bloom/future_work.adoc[]
|
||||
include::bloom/fpr_estimation.adoc[]
|
||||
include::bloom/implementation_notes.adoc[]
|
||||
include::bloom/release_notes.adoc[]
|
||||
include::bloom/acknowledgements.adoc[]
|
||||
include::bloom/copyright.adoc[]
|
||||
|
||||
31
doc/bloom/acknowledgements.adoc
Normal file
@@ -0,0 +1,31 @@
|
||||
[#acknowledgements]
|
||||
= Acknowledgements
|
||||
|
||||
:idprefix: acknowledgements_
|
||||
|
||||
Peter Dimov and Christian Mazakas reviewed significant portions of the code
|
||||
and documentation during the development phase. Sam Darwin provided support
|
||||
for CI setup and documentation building.
|
||||
|
||||
The Boost acceptance review took place between the 13th and 22nd of May,
|
||||
2025. Big thanks to Arnaud Becheler for his expert managing. The
|
||||
following people participated in the review:
|
||||
Dmitry Arkhipov,
|
||||
David Bien,
|
||||
Claudio DeSouza,
|
||||
Peter Dimov,
|
||||
Vinnie Falco,
|
||||
Alexander Grund,
|
||||
Seth Heeren,
|
||||
Andrzej Krzemieński,
|
||||
Ivan Matek,
|
||||
Christian Mazakas,
|
||||
Rubén Pérez,
|
||||
Kostas Savvidis,
|
||||
Peter Turcan,
|
||||
Tomer Vromen. Many thanks to all of them for their very helpful feedback.
|
||||
|
||||
Boost.Bloom was designed and written in
|
||||
https://en.wikipedia.org/wiki/C%C3%A1ceres%2c_Spain[Cáceres^] and
|
||||
https://en.wikipedia.org/wiki/Oropesa,_Spain[Oropesa^],
|
||||
January-June 2025.
|
||||
@@ -16,7 +16,8 @@ The chart plots FPR vs. _c_ (capacity / number of elements inserted) for several
|
||||
as shown in the table below.
|
||||
|
||||
+++
|
||||
<table class="bordered_table" style="text-align: center;">
|
||||
<div style="overflow-x: auto;">
|
||||
<table class="bordered_table" style="text-align: center; font-size: 85%;">
|
||||
<tr>
|
||||
<th rowspan="2"></th>
|
||||
<th colspan="21"><i>c</i> = capacity / number of elements inserted</th>
|
||||
@@ -26,69 +27,81 @@ as shown in the table below.
|
||||
<th>14</th> <th>15</th> <th>16</th> <th>17</th> <th>18</th> <th>19</th> <th>20</th> <th>21</th> <th>22</th> <th>23</th> <th>24</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,block<uint32_t,K>></code></td> <td>3</td> <td>3</td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td>
|
||||
<td style="text-align: left;"><code>filter<T,1,block<uint32_t,K>></code></td> <td>3</td> <td>3</td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td>
|
||||
<td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,block<uint32_t,K>,1></code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>6</td>
|
||||
<td style="text-align: left;"><code>filter<T,1,block<uint32_t,K>,1></code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>6</td>
|
||||
<td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,block<uint64_t,K>></code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>6</td>
|
||||
<td style="text-align: left;"><code>filter<T,1,block<uint64_t,K>></code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>6</td>
|
||||
<td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,block<uint64_t,K>,1></code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td>
|
||||
<td style="text-align: left;"><code>filter<T,1,block<uint64_t,K>,1></code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td>
|
||||
<td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td> <td>9</td> <td>9</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,multiblock<uint32_t,K>></code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td>
|
||||
<td style="text-align: left;"><code>filter<T,1,multiblock<uint32_t,K>></code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td>
|
||||
<td>9</td> <td>9</td> <td>9</td> <td>10</td> <td>13</td> <td>13</td> <td>15</td> <td>15</td> <td>15</td> <td>16</td> <td>16</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,multiblock<uint32_t,K>,1></code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>8</td> <td>8</td>
|
||||
<td style="text-align: left;"><code>filter<T,1,block<uint64_t[8],K>></code></td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>8</td>
|
||||
<td>8</td> <td>9</td> <td>9</td> <td>10</td> <td>10</td> <td>11</td> <td>12</td> <td>12</td> <td>12</td> <td>12</td> <td>12</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<T,1,multiblock<uint32_t,K>,1></code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>8</td> <td>8</td>
|
||||
<td>9</td> <td>9</td> <td>10</td> <td>10</td> <td>12</td> <td>12</td> <td>14</td> <td>14</td> <td>14</td> <td>14</td> <td>15</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,multiblock<uint64_t,K>></code></td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>8</td> <td>8</td>
|
||||
<td style="text-align: left;"><code>filter<T,1,block<uint64_t[8],K>,1></code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>8</td>
|
||||
<td>8</td> <td>8</td> <td>10</td> <td>11</td> <td>11</td> <td>12</td> <td>12</td> <td>12</td> <td>12</td> <td>12</td> <td>13</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<T,1,multiblock<uint64_t,K>></code></td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>8</td> <td>8</td>
|
||||
<td>10</td> <td>10</td> <td>12</td> <td>13</td> <td>14</td> <td>15</td> <td>15</td> <td>15</td> <td>15</td> <td>16</td> <td>17</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,multiblock<uint64_t,K>,1></code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>9</td> <td>10</td>
|
||||
<td style="text-align: left;"><code>filter<T,1,multiblock<uint64_t,K>,1></code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>9</td> <td>10</td>
|
||||
<td>10</td> <td>11</td> <td>11</td> <td>12</td> <td>12</td> <td>13</td> <td>13</td> <td>13</td> <td>15</td> <td>16</td> <td>16</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<K></code></td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>8</td> <td>8</td> <td>9</td>
|
||||
<td style="text-align: left;"><code>filter<T,K></code></td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>8</td> <td>8</td> <td>9</td>
|
||||
<td>10</td> <td>11</td> <td>12</td> <td>13</td> <td>13</td> <td>13</td> <td>14</td> <td>16</td> <td>16</td> <td>16</td> <td>17</td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
+++
|
||||
|
||||
Let's see how this can be used by way of an example. Suppose we plan to insert 10M elements
|
||||
and want to keep the FPR at 10^−4^. The chart gives us five possibilities:
|
||||
and want to keep the FPR at 10^−4^. The chart gives us five different
|
||||
values of _c_ (the array capacity divided by the number of elements, in our case 10M):
|
||||
|
||||
* `filter<K>` -> _c_ ≅ 19 bits per element
|
||||
* `filter<1, multiblock<uint64_t, K>, 1>` -> _c_ ≅ 20 bits per element
|
||||
* `filter<1, multiblock<uint64_t, K>>` -> _c_ ≅ 21 bits per element
|
||||
* `filter<1, multiblock<uint32_t, K>, 1>` -> _c_ ≅ 21.5 bits per element
|
||||
* `filter<1, multiblock<uint32_t, K>>` -> _c_ ≅ 23 bits per element
|
||||
* `filter<T, K>` -> _c_ ≅ 19 bits per element
|
||||
* `filter<T, 1, multiblock<uint64_t, K>, 1>` -> _c_ ≅ 20 bits per element
|
||||
* `filter<T, 1, multiblock<uint64_t, K>>` -> _c_ ≅ 21 bits per element
|
||||
* `filter<T, 1, block<uint64_t[8], K>, 1>` -> _c_ ≅ 21 bits per element
|
||||
* `filter<T, 1, multiblock<uint32_t, K>, 1>` -> _c_ ≅ 21.5 bits per element
|
||||
* `filter<T, 1, block<uint64_t[8], K>>` -> _c_ ≅ 22 bits per element
|
||||
* `filter<T, 1, multiblock<uint32_t, K>>` -> _c_ ≅ 23 bits per element
|
||||
|
||||
These options have different tradeoffs in terms of space used and performance. If
|
||||
we choose `filter<1, multiblock<uint32_t, K>, 1>` as a compromise (or better yet,
|
||||
`filter<1, fast_multiblock32<K>, 1>`), the only remaining step is to consult the
|
||||
we choose `filter<T, 1, multiblock<uint32_t, K>, 1>` as a compromise (or better yet,
|
||||
`filter<T, 1, fast_multiblock32<K>, 1>`), the only remaining step is to consult the
|
||||
value of `K` in the table for _c_ = 21 or 22, and we get our final configuration:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
[source,subs="+macros,+quotes"]
|
||||
-----
|
||||
using my_filter=filter<std::string, 1, fast_multiblock32<**14**>, 1>;
|
||||
-----
|
||||
|
||||
The resulting filter can be constructed in any of the following ways:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
[source]
|
||||
-----
|
||||
// 1) calculate the capacity from the value of c we got from the chart
|
||||
my_filter pass:[f((]std::size_t)(10'000'000 * 21.5));
|
||||
my_filter f((std::size_t)(10'000'000 * 21.5));
|
||||
|
||||
// 2) let the library calculate the capacity from n and target fpr
|
||||
// expect some deviation from the capacity in 1)
|
||||
|
||||
@@ -6,57 +6,62 @@
|
||||
For a classical Bloom filter, the theoretical false positive rate, under some simplifying assumptions,
|
||||
is given by
|
||||
|
||||
[.text-center]
|
||||
[.formula-center]
|
||||
{small}stem:[\text{FPR}(n,m,k)=\left(1 - \left(1 - \displaystyle\frac{1}{m}\right)^{kn}\right)^k \approx \left(1 - e^{-kn/m}\right)^k]{small-end} for large {small}stem:[m]{small-end},
|
||||
|
||||
where {small}stem:[n]{small-end} is the number of elements inserted in the filter, {small}stem:[m]{small-end} its capacity in bits and {small}stem:[k]{small-end} the
|
||||
number of bits set per insertion (see a https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives[derivation^]
|
||||
of this formula). For a given inverse load factor {small}stem:[c=m/n]{small-end}, the optimum {small}stem:[k]{small-end} is
|
||||
the integer closest to:
|
||||
of this formula). For a fixed inverse load factor {small}stem:[c=m/n]{small-end},
|
||||
the expression reaches at
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[k_{\text{opt}}=c\cdot\ln2,]{small-end}
|
||||
[.formula-center]
|
||||
{small}stem:[k_{\text{opt}}=c\cdot\ln2]{small-end}
|
||||
|
||||
yielding a minimum attainable FPR of {small}stem:[1/2^{k_{\text{opt}}} \approx 0.6185^{c}]{small-end}.
|
||||
its minimum value
|
||||
{small}stem:[1/2^{k_{\text{opt}}} \approx 0.6185^{c}]{small-end}.
|
||||
The optimum {small}stem:[k]{small-end}, which must be an integer,
|
||||
is either
|
||||
{small}stem:[\lfloor k_{\text{opt}}\rfloor]{small-end} or
|
||||
{small}stem:[\lceil k_{\text{opt}}\rceil]{small-end}.
|
||||
|
||||
In the case of filter of the form `boost::bloom::filter<T, K, block<Block, K'>>`, we can extend
|
||||
the approach from https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=f376ff09a64b388bfcde2f5353e9ddb44033aac8[Putze et al.^]
|
||||
to derive the (approximate but very precise) formula:
|
||||
|
||||
[.text-center]
|
||||
[.formula-center]
|
||||
{small}stem:[\text{FPR}_{\text{block}}(n,m,b,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}(i,nbk/m) \cdot \text{FPR}(i,b,k')\right)^{k},]{small-end}
|
||||
|
||||
where
|
||||
|
||||
[.text-center]
|
||||
[.formula-center]
|
||||
{small}stem:[\text{Pois}(i,\lambda)=\displaystyle\frac{\lambda^i e^{-\lambda}}{i!}]{small-end}
|
||||
|
||||
is the probability mass function of a https://en.wikipedia.org/wiki/Poisson_distribution[Poisson distribution^]
|
||||
with mean {small}stem:[\lambda]{small-end}, and {small}stem:[b]{small-end} is the size of `Block` in bits. If we're using `multiblock<Block,K'>`, we have
|
||||
|
||||
[.text-center]
|
||||
[.formula-center]
|
||||
{small}stem:[\text{FPR}_\text{multiblock}(n,m,b,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}(i,nbkk'/m) \cdot \text{FPR}(i,b,1)^{k'}\right)^{k}.]{small-end}
|
||||
|
||||
As we have commented xref:primer_multiblock_filters[before], in general
|
||||
|
||||
[.text-center]
|
||||
[.formula-center]
|
||||
{small}stem:[\text{FPR}_\text{block}(n,m,b,k,k') \geq \text{FPR}_\text{multiblock}(n,m,b,k,k') \geq \text{FPR}(n,m,kk'),]{small-end}
|
||||
|
||||
that is, block and multiblock filters have worse FPR than the classical filter for the same number of bits
|
||||
set per insertion, but they will be faster. We have the particular case
|
||||
|
||||
[.text-center]
|
||||
[.formula-center]
|
||||
{small}stem:[\text{FPR}_{\text{block}}(n,m,b,k,1)=\text{FPR}_{\text{multiblock}}(n,m,b,k,1)=\text{FPR}(n,m,k),]{small-end}
|
||||
|
||||
which follows simply from the observation that using `{block|multiblock}<Block, 1>` behaves exactly as
|
||||
a classical Bloom filter.
|
||||
|
||||
We don't know of any closed, simple formula for the FPR of block and multiblock filters when
|
||||
`Bucketsize` is not its "natural" size `xref:subfilters_used_value_size[_used-value-size_]<Subfilter>`,
|
||||
`Stride` is not its "natural" size `xref:subfilters_used_value_size[_used-value-size_]<Subfilter>`,
|
||||
that is, when subfilter subarrays overlap.
|
||||
We can use the following approximations ({small}stem:[s]{small-end} = `BucketSize` in bits):
|
||||
We can use the following approximations ({small}stem:[s]{small-end} = `Stride` in bits):
|
||||
|
||||
[.text-center]
|
||||
[.formula-center]
|
||||
{small}stem:[\text{FPR}_{\text{block}}(n,m,b,s,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}\left(i,\frac{n(2b-s)k}{m}\right) \cdot \text{FPR}(i,2b-s,k')\right)^{k},]{small-end} +
|
||||
{small}stem:[\text{FPR}_\text{multiblock}(n,m,b,s,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}\left(i,\frac{n(2bk'-s)k}{m}\right) \cdot \text{FPR}\left(i,\frac{2bk'-s}{k'},1\right)^{k'}\right)^{k},]{small-end}
|
||||
|
||||
|
||||
71
doc/bloom/future_work.adoc
Normal file
@@ -0,0 +1,71 @@
|
||||
[#future_work]
|
||||
= Future Work
|
||||
|
||||
:idprefix: future_work_
|
||||
|
||||
A number of features asked by reviewers and users of Boost.Bloom are
|
||||
considered for inclusion into future versions of the library.
|
||||
|
||||
== Bulk operations
|
||||
|
||||
Each insertion/lookup operation for `boost::bloom::filter` likely involves one or more
|
||||
cache misses in the access to the internal bit array. Following a similar
|
||||
approach to that of
|
||||
https://bannalia.blogspot.com/2023/10/bulk-visitation-in-boostconcurrentflatm.html[bulk visitation^]
|
||||
in Boost.Unordered, we can pipeline several operations so that
|
||||
cache miss stalls are leveraged to do useful computation. The interface
|
||||
for this functionality could be as follows:
|
||||
|
||||
[source]
|
||||
-----
|
||||
f.insert(first1, last1);
|
||||
f.may_contain(first2, last2, [] (const value_type& x, bool res) {
|
||||
// x is (likely) in the filter if res == true
|
||||
});
|
||||
-----
|
||||
|
||||
== `try_insert`
|
||||
|
||||
To avoid inserting an already present element, we now have to do:
|
||||
|
||||
[source]
|
||||
-----
|
||||
if(!f.may_contain(x)) f.insert(x);
|
||||
-----
|
||||
|
||||
These two calls can be combined in a potentially faster,
|
||||
single operation:
|
||||
|
||||
[source]
|
||||
-----
|
||||
bool res = f.try_insert(x); // returns true if x was not present
|
||||
-----
|
||||
|
||||
== Estimation of number of elements inserted
|
||||
|
||||
For a classical Bloom filter, the number of elements actually inserted
|
||||
can be estimated from the number {small}stem:[B]{small-end} of bits set
|
||||
to one in the array as
|
||||
|
||||
[.formula-center]
|
||||
{small}stem:[n\approx-\displaystyle\frac{m}{k}\ln\left(1-\displaystyle\frac{B}{m}\right),]{small-end}
|
||||
|
||||
which can be used for the implementation of a member function
|
||||
`estimated_size`. As of this writing, we don't know how to extend the
|
||||
formula to the case of block and multiblock filters. Any help on this
|
||||
problem is much appreciated.
|
||||
|
||||
== Run-time specification of _k_
|
||||
|
||||
Currently, the number _k_ of bits set per operation is configured at compile time.
|
||||
A variation of (or extension to) `boost::bloom::filter` can be provided
|
||||
where the value of _k_ is specified at run-time, the tradeoff being that
|
||||
its performance will be worse than the static case (preliminary experiments
|
||||
show an increase in execution time of around 10-20%).
|
||||
|
||||
== Alternative filters
|
||||
|
||||
We can consider adding additional data structures such as
|
||||
https://en.wikipedia.org/wiki/Cuckoo_filter[cuckoo^] and
|
||||
https://arxiv.org/pdf/1912.08258[xor^] filters, which are more
|
||||
space efficient and potentially faster.
|
||||
@@ -8,7 +8,7 @@
|
||||
This is the bit-mixing post-process we use to improve the statistical properties
|
||||
of the hash function when it doesn't have the avalanching property:
|
||||
|
||||
[.text-center]
|
||||
[.formula-center]
|
||||
{small}stem:[m\leftarrow\text{mul}(h,C)]{small-end}, +
|
||||
{small}stem:[h'\leftarrow\text{high}(m)\text{ xor }\text{low}(m)]{small-end},
|
||||
|
||||
@@ -37,7 +37,7 @@ show how to relax this requirement down to two different hash functions
|
||||
{small}stem:[h_1(x)]{small-end} and {small}stem:[h_2(x)]{small-end} linearly
|
||||
combined as
|
||||
|
||||
[.text-center]
|
||||
[.formula-center]
|
||||
{small}stem:[g_i(x)=h_1(x)+ih_2(x).]{small-end}
|
||||
|
||||
Without formal justification, we have relaxed this even further to just one
|
||||
@@ -47,32 +47,35 @@ by means of very cheap mixing schemes. In what follows
|
||||
{small}stem:[k]{small-end}, {small}stem:[k']{small-end} are the homonym values
|
||||
in a filter of the form `boost::bloom::filter<T, K, {block|multiblock}<Block, K'>>`,
|
||||
{small}stem:[b]{small-end} is `sizeof(Block) * CHAR_BIT`,
|
||||
and {small}stem:[r]{small-end} is the number of buckets in the filter.
|
||||
and {small}stem:[r]{small-end} is the number of subarrays in the filter.
|
||||
|
||||
=== Bucket Location
|
||||
=== Subarray Location
|
||||
|
||||
To produce a location (i.e. a number {small}stem:[p]{small-end} in {small}stem:[[0,r)]{small-end}) from
|
||||
{small}stem:[h_{i-1}]{small-end}, instead of the straightforward but costly
|
||||
procedure {small}stem:[p\leftarrow h_{i-1}\bmod r]{small-end} we resort to
|
||||
Lemire's https://arxiv.org/pdf/1805.10941[fastrange technique^]. Moreover,
|
||||
we combine this calculation with the production of {small}stem:[h_{i}]{small-end}
|
||||
from {small}stem:[h_{i-1}]{small-end} as follows:
|
||||
Lemire's https://arxiv.org/pdf/1805.10941[fastrange technique^]:
|
||||
|
||||
[.text-center]
|
||||
[.formula-center]
|
||||
{small}stem:[m\leftarrow\text{mul}(h_{i-1},r),]{small-end} +
|
||||
{small}stem:[p\leftarrow\lfloor m/2^{64} \rfloor=\text{high}(m),]{small-end} +
|
||||
{small}stem:[h_i\leftarrow m \bmod 2^{64}=\text{low}(m).]{small-end}
|
||||
{small}stem:[p\leftarrow\lfloor m/2^{64} \rfloor=\text{high}(m).]{small-end}
|
||||
|
||||
To decorrelate {small}stem:[p]{small-end} from further uses of the hash value,
|
||||
we produce {small}stem:[h_{i}]{small-end} from {small}stem:[h_{i-1}]{small-end} as
|
||||
|
||||
[.formula-center]
|
||||
{small}stem:[h_i\leftarrow c \cdot h_{i-1} \bmod 2^{64}=\text{low}(c \cdot h_{i-1}),]{small-end}
|
||||
|
||||
with {small}stem:[c=\text{0xf1357aea2e62a9c5}]{small-end} (64-bit mode),
|
||||
{small}stem:[c=\text{0xe817fb2d}]{small-end} (32-bit mode) obtained
|
||||
from https://arxiv.org/pdf/2001.05304[Steele and Vigna^].
|
||||
The transformation {small}stem:[h_{i-1} \rightarrow h_i]{small-end} is
|
||||
a simple https://en.wikipedia.org/wiki/Linear_congruential_generator[multiplicative congruential generator^]
|
||||
over {small}stem:[2^{64}]{small-end}. For this MCG to produce long
|
||||
cycles, {small}stem:[h_0]{small-end} must be odd and the multiplicative constant
|
||||
{small}stem:[r]{small-end} must be {small}stem:[\equiv \pm 3 \text{ (mod 8)}]{small-end}:
|
||||
to meet these requirements, the implementation adjusts {small}stem:[h_0]{small-end}
|
||||
to {small}stem:[h_0']{small-end} and {small}stem:[r]{small-end}
|
||||
to {small}stem:[r']{small-end}. This renders the least significant bit
|
||||
of {small}stem:[h_i]{small-end} unsuitable for pseudorandomization
|
||||
(it is always one).
|
||||
cycles {small}stem:[h_0]{small-end} must be odd, so the implementation adjusts
|
||||
{small}stem:[h_0]{small-end} to {small}stem:[h_0'= (h_0\text{ or }1)]{small-end},
|
||||
which renders the least significant bit of {small}stem:[h_i]{small-end}
|
||||
unsuitable for pseudorandomization (it is always one).
|
||||
|
||||
=== Bit selection
|
||||
|
||||
@@ -109,14 +112,14 @@ In the case of SSE2, we don't have the 128-bit equivalent of
|
||||
`+++_+++mm256_sllv_epi32`, so we use the following, mildly interesting
|
||||
technique: a `+++__+++m128i` of the form
|
||||
|
||||
[.text-center]
|
||||
[.formula-center]
|
||||
{small}stem:[((x_0+127)\cdot 2^{23},(x_1+127)\cdot 2^{23},(x_2+127)\cdot 2^{23},(x_3+127)\cdot 2^{23}),]{small-end}
|
||||
|
||||
where each {small}stem:[x_i]{small-end} is in {small}stem:[[0,32)]{small-end},
|
||||
can be `reinterpret_cast`+++ed+++ to (i.e., has the same binary representation as)
|
||||
the `+++__+++m128` (register of `float`+++s+++)
|
||||
|
||||
[.text-center]
|
||||
[.formula-center]
|
||||
{small}stem:[(2^{x_0},2^{x_1},2^{x_2},2^{x_3}),]{small-end}
|
||||
|
||||
from which our desired `+++__+++m128i` of shifted 1s can be obtained
|
||||
|
||||
@@ -8,10 +8,11 @@ that can be configured to implement a classical Bloom filter as well as
|
||||
variations discussed in the literature such as block filters, multiblock filters,
|
||||
and more.
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
[source,subs="+macros,+quotes"]
|
||||
-----
|
||||
#include <boost/bloom/filter.hpp>
|
||||
#include <boost/bloom.hpp>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
int main()
|
||||
@@ -25,7 +26,6 @@ int main()
|
||||
// insert elements (they can't be erased, Bloom filters are insert-only)
|
||||
f.insert("hello");
|
||||
f.insert("Boost");
|
||||
//...
|
||||
|
||||
// elements inserted are always correctly checked as such
|
||||
assert(f.may_contain("hello") == true);
|
||||
@@ -35,7 +35,10 @@ int main()
|
||||
// the number of bits set per element and generally how the boost::bloom::filter
|
||||
// was specified
|
||||
if(f.may_contain("bye")) { // likely false
|
||||
//...
|
||||
std::cout << "false positive\n";
|
||||
}
|
||||
else {
|
||||
std::cout << "everything worked as expected\n";
|
||||
}
|
||||
}
|
||||
-----
|
||||
@@ -46,4 +49,20 @@ Boost.Bloom has been implemented with a focus on performance;
|
||||
SIMD technologies such as AVX2, Neon and SSE2 can be leveraged to speed up
|
||||
operations.
|
||||
|
||||
Boost.Bloom is a header-only library. C++11 or later required.
|
||||
== Getting Started
|
||||
|
||||
Consult the website
|
||||
https://www.boost.org/doc/user-guide/getting-started.html[section^]
|
||||
on how to install the entire Boost project or only Boost.Bloom
|
||||
and its dependencies.
|
||||
|
||||
Boost.Bloom is a header-only library, so no additional build phase is
|
||||
needed. C++11 or later required. The library has been verified to
|
||||
work with GCC 4.8, Clang 3.9 and Visual Studio 2015 (and later versions
|
||||
of those). You can check that your environment is correctly set up
|
||||
by compiling the
|
||||
link:../../example/basic.cpp[example program] shown above.
|
||||
|
||||
If you are not familiar with Bloom filters in general, see the
|
||||
xref:primer[primer]; otherwise, you can jump directly to the
|
||||
xref:tutorial[tutorial].
|
||||
@@ -3,12 +3,20 @@
|
||||
|
||||
:idprefix: primer_
|
||||
|
||||
A Bloom filter is a probabilistic data structure where inserted elements can be looked up
|
||||
with 100% accuracy, whereas looking up for a non-inserted element may fail with
|
||||
some probability called the filter's _false positive rate_ or FPR. The tradeoff here is
|
||||
that Bloom filters occupy much less space than traditional non-probabilistic containers
|
||||
(typically, around 8-20 bits per element) for an acceptably low FPR. The greater
|
||||
the filter's _capacity_ (its size in bits), the lower the resulting FPR.
|
||||
A Bloom filter (named after its inventor Burton Howard Bloom) is a probabilistic data
|
||||
structure where inserted elements can be looked up with 100% accuracy, whereas looking
|
||||
up for a non-inserted element may fail with some probability called the filter's
|
||||
_false positive rate_ or FPR. The tradeoff here is that Bloom filters occupy much less
|
||||
space than traditional non-probabilistic containers (typically, around 8-20 bits per
|
||||
element) for an acceptably low FPR. The greater the filter's _capacity_ (its size in bits),
|
||||
the lower the resulting FPR.
|
||||
|
||||
In general, Bloom filters are useful to prevent/mitigate queries against large data sets
|
||||
when exact retrieval is costly and/or can't be made in main memory.
|
||||
|
||||
[.boxed]
|
||||
====
|
||||
*Example: Speeding up unsuccessful requests to a database*
|
||||
|
||||
One prime application of Bloom filters and similar data structures is for the prevention
|
||||
of expensive disk/network accesses when these would fail to retrieve a given piece of
|
||||
@@ -18,7 +26,7 @@ For instance, suppose we are developing a frontend for a database with access ti
|
||||
Inserting a Bloom filter with a lookup time of 200 ns and a FPR of 0.5% will reduce the
|
||||
average response time of the system from 10 ms to
|
||||
|
||||
[.text-center]
|
||||
[.formula-center]
|
||||
(10 + 0.0002) × 50.25% + 0.0002 × 49.75% ≅ 5.03 ms,
|
||||
|
||||
that is, we get a ×1.99 overall speedup. If the database holds 1 billion records,
|
||||
@@ -27,8 +35,8 @@ which is perfectly realizable.
|
||||
|
||||
image::db_speedup.png[align=center, title="Improving DB negative access time with a Bloom filter."]
|
||||
|
||||
In general, Bloom filters are useful to prevent/mitigate queries against large data sets
|
||||
when exact retrieval is costly and/or can't be made in main memory.
|
||||
====
|
||||
|
||||
Applications have been described in the areas of web caching,
|
||||
dictionary compression, network routing and genomics, among others.
|
||||
https://www.eecs.harvard.edu/~michaelm/postscripts/im2005b.pdf[Broder and Mitzenmacher^]
|
||||
@@ -36,17 +44,17 @@ provide a rather extensive review of use cases with a focus on networking.
|
||||
|
||||
== Implementation
|
||||
|
||||
The implementation of a Bloom filter consists of an array of _m_ bits, initially set to zero.
|
||||
The implementation of a classical Bloom filter consists of an array of _m_ bits, initially set to zero.
|
||||
Inserting an element _x_ reduces to selecting _k_ positions pseudorandomly (with the help
|
||||
of _k_ independent hash functions) and setting them to one.
|
||||
|
||||
image::bloom_insertion.png[align=center, title="Insertion in a classical Bloom filter, _k_ = 6."]
|
||||
image::bloom_insertion.png[align=center, title="Insertion in a classical Bloom filter with _k_ = 6 different hash functions. Inserting _x_ reduces to setting to one the bits at positions 10, 14, 43, 58, 1, and 39 as indicated by _h_~1~(_x_), ... , _h_~6~(_x_)."]
|
||||
|
||||
To check if an element _y_ is in the filter, we follow the same procedure and see if
|
||||
the selected bits are all set to one. In the example figure there are two unset bits, which
|
||||
definitely indicates _y_ was not inserted in the filter.
|
||||
|
||||
image::bloom_lookup.png[align=center, title="Lookup in a classical Bloom filter."]
|
||||
image::bloom_lookup.png[align=center, title="Lookup in a classical Bloom filter. Value _y_ is not in the filter because bits at positions 20 and 61 are not set to one."]
|
||||
|
||||
A false positive occurs when the bits checked happen to be all set to one due to
|
||||
other, unrelated insertions. The probability of having a false positive increases as we
|
||||
@@ -57,19 +65,19 @@ when the array is sparsely populated, a higher value of _k_ improves (decreases)
|
||||
as there are more chances that we hit a non-set bit; however, if _k_ is very high
|
||||
the array will have more and more bits set to one as new elements are inserted, which
|
||||
eventually will reach a point where we lose out to a filter with a lower _k_ and
|
||||
thus a smaller proportions of set bits.
|
||||
thus a smaller proportions of set bits. For given values of _n_ and _m_, the optimum _k_ is
|
||||
{small}stem:[\lfloor k_{\text{opt}}\rfloor]{small-end} or
|
||||
{small}stem:[\lceil k_{\text{opt}}\rceil]{small-end}, with
|
||||
|
||||
image::fpr_n_k.png[align=center, title="FPR vs. number of inserted elements for two filters with _m_ = 10^5^ bits."]
|
||||
[.formula-center]
|
||||
{small}stem:[k_{\text{opt}}=\displaystyle\frac{m\cdot\ln2}{n},]{small-end}
|
||||
|
||||
For given values of _n_ and _m_, the optimum _k_ is the integer closest to
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[k_{\text{opt}}=\displaystyle\frac{m\cdot\ln2}{n}]{small-end}
|
||||
|
||||
for a minimum FPR of
|
||||
for a minimum FPR close to
|
||||
{small}stem:[1/2^{k_{\text{opt}}} \approx 0.6185^{m/n}]{small-end}. See the appendix
|
||||
on xref:fpr_estimation[FPR estimation] for more details.
|
||||
|
||||
image::fpr_n_k.png[align=center, title="FPR vs. number of inserted elements for two filters with _m_ = 10^5^ bits. _k_ = 6 (red) has a better (lower) FPR than _k_ = 2 (blue) for small values of _n_, but eventually degrades more as _n_ grows. The dotted line shows the minimum attainable FPR resulting from selecting the optimum value of _k_ for each value of _n_."]
|
||||
|
||||
== Variations on the Classical Filter
|
||||
|
||||
=== Block Filters
|
||||
@@ -82,21 +90,21 @@ setting/checking in a small block of _b_ bits pseudorandomly selected from the
|
||||
entire array. If the block is small enough, it will fit in a CPU cacheline,
|
||||
thus drastically reducing the number of cache misses.
|
||||
|
||||
image::block_insertion.png[align=center, title="Block filter."]
|
||||
image::block_insertion.png[align=center, title="Block filter. A block of _b_ bits is selected based on _h_~0~(x), and all subsequent bit setting is constrained there."]
|
||||
|
||||
The downside is that the resulting FPR is worse than that of a classical filter for
|
||||
the same values of _n_, _m_ and _k_. Intuitively, block filters reduce the
|
||||
uniformity of the distribution of bits in the array, which ultimately hurts their
|
||||
probabilistic performance.
|
||||
|
||||
image::fpr_n_k_bk.png[align=center, title="FPR (logarithmic scale) vs. number of inserted elements for a classical and a block filter, _m_ = 10^5^ bits."]
|
||||
image::fpr_n_k_bk.png[align=center, title="FPR (logarithmic scale) vs. number of inserted elements for a classical and a block filter with the same _k_ = 4, _m_ = 10^5^ bits."]
|
||||
|
||||
A further variation in this idea is to have operations select _k_ blocks
|
||||
with _k'_ bits set on each. This, again, will have a worse FPR than a classical
|
||||
filter with _k·k'_ bits per operation, but improves on a plain
|
||||
_k·k'_ block filter.
|
||||
|
||||
image::block_multi_insertion.png[align=center, title="Block filter with multi-insertion."]
|
||||
image::block_multi_insertion.png[align=center, title="Block filter with multi-insertion. _k_ = 2 blocks are selected, and _k_' = 3 bits are set in each."]
|
||||
|
||||
=== Multiblock Filters
|
||||
|
||||
@@ -106,7 +114,7 @@ so that each block takes exactly one bit. This still maintains a good cache
|
||||
locality but improves FPR with respect to block filters because bits set to one
|
||||
are more spread out across the array.
|
||||
|
||||
image::multiblock_insertion.png[align=center, title="Multiblock filter."]
|
||||
image::multiblock_insertion.png[align=center, title="Multiblock filter. A range of _k_' = 4 consecutive blocks is selected based on _h_~0~(x), and a bit is set to one in each of the blocks."]
|
||||
|
||||
Multiblock filters can also be combined with multi-insertion. In general,
|
||||
for the same number of bits per operation and equal values of _n_ and _m_,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
[#reference]
|
||||
= Reference
|
||||
|
||||
include::reference/header_bloom.adoc[]
|
||||
include::reference/header_filter.adoc[]
|
||||
include::reference/filter.adoc[]
|
||||
include::reference/subfilters.adoc[]
|
||||
|
||||
@@ -34,9 +34,11 @@ struct block
|
||||
|===
|
||||
|
||||
|`Block`
|
||||
|An unsigned integral type.
|
||||
|An unsigned integral type or an array of 2^`N`^ elements of unsigned integral type.
|
||||
|
||||
|`K`
|
||||
| Number of bits set/checked per operation. Must be greater than zero.
|
||||
|
||||
|===
|
||||
|
||||
'''
|
||||
@@ -50,3 +50,5 @@ The non-SIMD case falls back to regular `multiblock`.
|
||||
|
||||
`xref:subfilters_used_value_size[_used-value-size_]<fast_multiblock32<K>>` is
|
||||
`4 * K`.
|
||||
|
||||
'''
|
||||
@@ -34,8 +34,9 @@ namespace bloom{
|
||||
|
||||
template<
|
||||
typename T, std::size_t K,
|
||||
typename Subfilter = block<unsigned char, 1>, std::size_t BucketSize = 0,
|
||||
typename Hash = boost::hash<T>, typename Allocator = std::allocator<T>
|
||||
typename Subfilter = block<unsigned char, 1>, std::size_t Stride = 0,
|
||||
typename Hash = boost::hash<T>,
|
||||
typename Allocator = std::allocator<unsigned char>
|
||||
>
|
||||
class filter
|
||||
{
|
||||
@@ -44,7 +45,7 @@ public:
|
||||
using value_type = T;
|
||||
static constexpr std::size_t k = K;
|
||||
using subfilter = Subfilter;
|
||||
static constexpr std::size_t xref:filter_bucket_size[bucket_size] = xref:filter_bucket_size[__see below__];
|
||||
static constexpr std::size_t xref:filter_stride[stride] = xref:filter_stride[__see below__];
|
||||
using hasher = Hash;
|
||||
using allocator_type = Allocator;
|
||||
using size_type = std::size_t;
|
||||
@@ -120,8 +121,6 @@ public:
|
||||
boost::span<const unsigned char> xref:#filter_array[array]() const noexcept;
|
||||
|
||||
// modifiers
|
||||
template<typename... Args>
|
||||
void xref:#filter_emplace[emplace](Args&&... args);
|
||||
void xref:#filter_insert[insert](const value_type& x);
|
||||
template<typename U>
|
||||
void xref:#filter_insert[insert](const U& x);
|
||||
@@ -172,39 +171,52 @@ bit setting/checking into the filter's internal array. The subfilter is invoked
|
||||
per operation on `K` pseudorandomly selected portions of the array (_subarrays_) of width
|
||||
`xref:subfilters_used_value_size[_used-value-size_]<Subfilter>`.
|
||||
|
||||
|`BucketSize`
|
||||
|`Stride`
|
||||
| Distance in bytes between the initial positions of consecutive subarrays.
|
||||
If `BucketSize` is specified as zero, the actual distance is automatically selected to
|
||||
If `Stride` is specified as zero, the actual distance is automatically selected to
|
||||
`_used-value-size_<Subfilter>` (non-overlapping subarrays).
|
||||
Otherwise, `BucketSize` must be not greater than `_used-value-size_<Subfilter>`.
|
||||
Otherwise, `Stride` must be not greater than `_used-value-size_<Subfilter>`.
|
||||
|
||||
|`Hash`
|
||||
|A https://en.cppreference.com/w/cpp/named_req/Hash[Hash^] type over `T`.
|
||||
|
||||
|`Allocator`
|
||||
|An https://en.cppreference.com/w/cpp/named_req/Allocator[Allocator^] whose value type is `T`.
|
||||
|An https://en.cppreference.com/w/cpp/named_req/Allocator[Allocator^] whose value type is
|
||||
`unsigned char`.
|
||||
|
||||
|===
|
||||
|
||||
Allocation and deallocation of the internal array is done through an internal copy of the
|
||||
provided allocator. `value_type` construction/destruction (which only happens in
|
||||
`xref:filter_emplace[emplace]`) uses
|
||||
`std::allocator_traits<Allocator>::construct`/`destroy`.
|
||||
provided allocator. If `xref:filter_stride[stride]` is a
|
||||
multiple of _a_ = `alignof(Subfilter::value_type)`, the array is byte-aligned to
|
||||
max(64, _a_).
|
||||
|
||||
If `link:../../../unordered/doc/html/unordered/reference/hash_traits.html#hash_traits_hash_is_avalanching[boost::unordered::hash_is_avalanching]<Hash>::value`
|
||||
If `link:../../../container_hash/doc/html/hash.html#ref_hash_is_avalanchinghash[boost::hash_is_avalanching]<Hash>::value`
|
||||
is `true` and `sizeof(std::size_t) >= 8`,
|
||||
the hash function is used as-is; otherwise, a bit-mixing post-processing stage
|
||||
is added to increase the quality of hashing at the expense of extra computational cost.
|
||||
|
||||
*Exception Safety Guarantees*
|
||||
|
||||
Except when explicitly noted, all non-const member functions and associated functions taking
|
||||
`boost::bloom::filter` by non-const reference provide the
|
||||
https://en.cppreference.com/w/cpp/language/exceptions#Exception_safety[basic exception guarantee^],
|
||||
whereas all const member functions and associated functions taking
|
||||
`boost::bloom::filter` by const reference provide the
|
||||
https://en.cppreference.com/w/cpp/language/exceptions#Exception_safety[strong exception guarantee^].
|
||||
|
||||
Except when explicitly noted, no operation throws an exception unless that exception
|
||||
is thrown by the filter's `Hash` or `Allocator` object (if any).
|
||||
|
||||
=== Types and Constants
|
||||
|
||||
[[filter_bucket_size]]
|
||||
[[filter_stride]]
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
static constexpr std::size_t bucket_size;
|
||||
static constexpr std::size_t stride;
|
||||
----
|
||||
|
||||
Equal to `BucketSize` if that parameter was specified as distinct from zero.
|
||||
Equal to `Stride` if that parameter was specified as distinct from zero.
|
||||
Otherwise, equal to `xref:subfilters_used_value_size[_used-value-size_]<subfilter>`.
|
||||
|
||||
=== Constructors
|
||||
@@ -236,6 +248,7 @@ filter(
|
||||
Constructs an empty filter using copies of `h` and `al` as the hash function and allocator, respectively.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; `fpr` is between 0.0 and 1.0.
|
||||
Postconditions:;; `capacity() == 0` if `m == 0`, `capacity() >= m` otherwise (first overload). +
|
||||
`capacity() == capacity_for(n, fpr)` (second overload).
|
||||
|
||||
@@ -259,7 +272,8 @@ and inserts the values from `[first, last)` into it.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; `InputIterator` is a https://en.cppreference.com/w/cpp/named_req/InputIterator[LegacyInputIterator^] referring to `value_type`. +
|
||||
`[first, last)` is a valid range.
|
||||
`[first, last)` is a valid range. +
|
||||
`fpr` is between 0.0 and 1.0.
|
||||
Postconditions:;; `capacity() == 0` if `m == 0`, `capacity() >= m` otherwise (first overload). +
|
||||
`capacity() == capacity_for(n, fpr)` (second overload). +
|
||||
`may_contain(x)` for all values `x` from `[first, last)`.
|
||||
@@ -366,7 +380,6 @@ filter(
|
||||
Equivalent to `xref:#filter_iterator_range_constructor[filter](il.begin(), il.end(), m, h, al)` (first overload)
|
||||
or `xref:#filter_iterator_range_constructor[filter](il.begin(), il.end(), n, fpr, h, al)` (second overload).
|
||||
|
||||
|
||||
==== Capacity Constructor with Allocator
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
@@ -378,7 +391,6 @@ filter(size_type n, double fpr, const allocator_type& al);
|
||||
Equivalent to `xref:#filter_capacity_constructor[filter](m, hasher(), al)` (first overload)
|
||||
or `xref:#filter_capacity_constructor[filter](n, fpr, hasher(), al)` (second overload).
|
||||
|
||||
|
||||
==== Initializer List Constructor with Allocator
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
@@ -425,6 +437,7 @@ Preconditions:;; If `pocca`,
|
||||
`hasher` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^].
|
||||
Postconditions:;; `*this == x`.
|
||||
Returns:;; `*this`.
|
||||
Exception Safety:;; Strong.
|
||||
|
||||
==== Move Assignment
|
||||
|
||||
@@ -449,6 +462,7 @@ Preconditions:;; If `pocma`,
|
||||
`hasher` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^].
|
||||
Postconditions:;; `x.capacity() == 0`.
|
||||
Returns:;; `*this`.
|
||||
Exception Safety:;; Nothrow as indicated, otherwise strong.
|
||||
|
||||
==== Initializer List Assignment
|
||||
|
||||
@@ -520,20 +534,6 @@ Returns:;; A span over the internal array.
|
||||
|
||||
=== Modifiers
|
||||
|
||||
==== Emplace
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
template<typename... Args> void emplace(Args&&... args);
|
||||
----
|
||||
|
||||
Inserts an element constructed from `std::forward<Args>(args)+++...+++`.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/EmplaceConstructible[EmplaceConstructible^]
|
||||
into `filter` from `std::forward<Args>(args)+++...+++`. +
|
||||
`value_type` is https://en.cppreference.com/w/cpp/named_req/Erasable[Erasable^] from `filter`.
|
||||
|
||||
==== Insert
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
@@ -548,6 +548,7 @@ bits of the internal array deterministically selected from the value
|
||||
|
||||
[horizontal]
|
||||
Postconditions:;; `may_contain(x)`.
|
||||
Exception Safety:;; Strong.
|
||||
Notes:;; The second overload only participates in overload resolution if
|
||||
`hasher::is_transparent` is a valid member typedef.
|
||||
|
||||
@@ -591,7 +592,7 @@ If `pocs`, swaps the internal allocator with that of `x`.
|
||||
Preconditions:;; `pocs || get_allocator() == x.get_allocator()`. +
|
||||
If `pocs`, `Allocator` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^]. +
|
||||
`hasher` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^].
|
||||
|
||||
Exception Safety:;; Nothrow.
|
||||
|
||||
==== Clear
|
||||
|
||||
@@ -615,8 +616,10 @@ equal to `capacity()`, and clears the filter. +
|
||||
Second overload: Equivalent to `reset(capacity_for(n, fpr))`.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; `fpr` is between 0.0 and 1.0.
|
||||
Postconditions:;; In general, `capacity() >= m`. +
|
||||
If `m == 0` or `m == capacity()` or `m == capacity_for(n, fpr)` for some `n` and `fpr`, then `capacity() == m`.
|
||||
Exception Safety:;; If `m == 0` or `capacity_for(n, fpr) == 0`, nothrow, otherwise strong.
|
||||
|
||||
==== Combine with AND
|
||||
|
||||
@@ -630,7 +633,9 @@ otherwise, changes the value of each bit in the internal array with the result o
|
||||
doing a logical AND operation of that bit and the corresponding one in `x`.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; The `Hash` objects of `x` and `y` are equivalent.
|
||||
Returns:;; `*this`;
|
||||
Exception Safety:;; Strong.
|
||||
|
||||
==== Combine with OR
|
||||
|
||||
@@ -644,7 +649,9 @@ otherwise, changes the value of each bit in the internal array with the result o
|
||||
doing a logical OR operation of that bit and the corresponding one in `x`.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; The `Hash` objects of `x` and `y` are equivalent.
|
||||
Returns:;; `*this`;
|
||||
Exception Safety:;; Strong.
|
||||
|
||||
=== Observers
|
||||
|
||||
@@ -698,6 +705,7 @@ bool operator==(
|
||||
----
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; The `Hash` objects of `x` and `y` are equivalent.
|
||||
Returns:;; `true` iff `x.capacity() == y.capacity()` and
|
||||
`x`++'++s and `y`++'++s internal arrays are bitwise identical.
|
||||
|
||||
@@ -713,6 +721,7 @@ bool operator!=(
|
||||
----
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; The `Hash` objects of `x` and `y` are equivalent.
|
||||
Returns:;; `!(x xref:filter_operator[==] y)`.
|
||||
|
||||
|
||||
@@ -728,3 +737,5 @@ void swap(filter<T, K, S, B, H, A>& x, filter<T, K, S, B, H, A>& y)
|
||||
----
|
||||
|
||||
Equivalent to `x.xref:filter_swap[swap](y)`.
|
||||
|
||||
'''
|
||||
9
doc/bloom/reference/header_bloom.adoc
Normal file
@@ -0,0 +1,9 @@
|
||||
[#header_bloom]
|
||||
== `<boost/bloom.hpp>`
|
||||
|
||||
:idprefix: header_bloom_
|
||||
|
||||
Convenience header including all the other headers listed in this
|
||||
reference.
|
||||
|
||||
'''
|
||||
@@ -13,27 +13,28 @@ namespace bloom{
|
||||
|
||||
template<
|
||||
typename T, std::size_t K,
|
||||
typename Subfilter = block<unsigned char, 1>, std::size_t BucketSize = 0,
|
||||
typename Hash = boost::hash<T>, typename Allocator = std::allocator<T>
|
||||
typename Subfilter = block<unsigned char, 1>, std::size_t Stride = 0,
|
||||
typename Hash = boost::hash<T>,
|
||||
typename Allocator = std::allocator<unsigned char>
|
||||
>
|
||||
class xref:filter[filter];
|
||||
|
||||
template<
|
||||
typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
|
||||
typename T, std::size_t K, typename SF, std::size_t S, typename H, typename A
|
||||
>
|
||||
bool xref:filter_operator[operator+++==+++](
|
||||
const filter<T, K, S, B, H, A>& x, const filter<T, K, S, B, H, A>& y);
|
||||
const filter<T, K, SF, S, H, A>& x, const filter<T, K, SF, S, H, A>& y);
|
||||
|
||||
template<
|
||||
typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
|
||||
typename T, std::size_t K, typename SF, std::size_t S, typename H, typename A
|
||||
>
|
||||
bool xref:filter_operator_2[operator!=](
|
||||
const filter<T, K, S, B, H, A>& x, const filter<T, K, S, B, H, A>& y);
|
||||
const filter<T, K, SF, S, H, A>& x, const filter<T, K, SF, S, H, A>& y);
|
||||
|
||||
template<
|
||||
typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
|
||||
typename T, std::size_t K, typename SF, std::size_t S, typename H, typename A
|
||||
>
|
||||
void xref:filter_swap_2[swap](filter<T, K, S, B, H, A>& x, filter<T, K, S, B, H, A>& y)
|
||||
void xref:filter_swap_2[swap](filter<T, K, SF, S, H, A>& x, filter<T, K, SF, S, H, A>& y)
|
||||
noexcept(noexcept(x.swap(y)));
|
||||
|
||||
} // namespace bloom
|
||||
|
||||
@@ -34,7 +34,7 @@ struct multiblock
|
||||
|===
|
||||
|
||||
|`Block`
|
||||
|An unsigned integral type.
|
||||
|An unsigned integral type or an array of 2^`N`^ elements of unsigned integral type.
|
||||
|
||||
|`K`
|
||||
| Number of bits set/checked per operation. Must be greater than zero.
|
||||
@@ -43,3 +43,5 @@ struct multiblock
|
||||
|
||||
Each of the `K` bits set/checked is located in a different element of the
|
||||
`Block[K]` array.
|
||||
|
||||
'''
|
||||
@@ -55,3 +55,5 @@ constexpr std::size_t _used-value-size_; // exposition only
|
||||
constant exists, or `sizeof(Subfilter::value_type)` otherwise.
|
||||
The value is the effective size in bytes of the subarrays upon which a
|
||||
given subfilter operates.
|
||||
|
||||
'''
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
:idprefix: release_notes_
|
||||
|
||||
== Boost 1.xx
|
||||
== Boost 1.89
|
||||
|
||||
* Initial release.
|
||||
|
||||
|
||||
@@ -3,81 +3,127 @@
|
||||
|
||||
:idprefix: tutorial_
|
||||
|
||||
== Filter Definition
|
||||
|
||||
A `boost::bloom::filter` can be regarded as a bit array divided into _buckets_ that
|
||||
A `boost::bloom::filter` can be regarded as a bit array divided into _subarrays_ that
|
||||
are selected pseudo-randomly (based on a hash function) upon insertion:
|
||||
each of the buckets is passed to a _subfilter_ that marks several of its bits according
|
||||
each of the subarrays is passed to a _subfilter_ that marks several of its bits according
|
||||
to some associated strategy.
|
||||
|
||||
Note that although `boost::bloom::filter` mimics the interface of a container
|
||||
and provides operations such as `insert`, it is actually _not_ a
|
||||
container: for instance, insertion does not involve the actual storage
|
||||
of the element in the data stucture, but merely sets some bits in the internal
|
||||
array based on the hash value of the element.
|
||||
|
||||
== Filter Definition
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
template<
|
||||
typename T, std::size_t K,
|
||||
typename Subfilter = block<unsigned char, 1>, std::size_t BucketSize = 0,
|
||||
typename Hash = boost::hash<T>, typename Allocator = std::allocator<T>
|
||||
typename Subfilter = block<unsigned char, 1>, std::size_t Stride = 0,
|
||||
typename Hash = boost::hash<T>,
|
||||
typename Allocator = std::allocator<unsigned char>
|
||||
>
|
||||
class filter;
|
||||
-----
|
||||
|
||||
* `T`: Type of the elements inserted.
|
||||
* `K`: Number of buckets marked per insertion.
|
||||
* `K`: Number of subarrays marked per insertion.
|
||||
* `xref:tutorial_subfilter[Subfilter]`: Type of subfilter used.
|
||||
* `xref:tutorial_bucketsize[BucketSize`]: Size in bytes of the buckets.
|
||||
* `xref:tutorial_stride[Stride`]: Distance in bytes between the initial positions of consecutive subarrays.
|
||||
* `xref:tutorial_hash[Hash]`: A hash function for `T`.
|
||||
* `Allocator`: An allocator for `T`.
|
||||
* `Allocator`: An allocator for `unsigned char`.
|
||||
|
||||
=== `Subfilter`
|
||||
|
||||
The following subfilters can be selected, offering different compromises
|
||||
between performance and _false positive rate_ (FPR).
|
||||
See the xref:primer_variations_on_the_classical_filter[Bloom Filter Primer]
|
||||
for a general explanation of block and multiblock filters.
|
||||
A subfilter defines the local strategy for setting or checking bits within
|
||||
a selected subarray of the bit array. It determines how many bits are
|
||||
modified per operation, how they are arranged in memory, and how memory is accessed.
|
||||
The following subfilters are provided:
|
||||
|
||||
`block<Block, K'>`
|
||||
++++
|
||||
<div style="overflow-x: auto;">
|
||||
++++
|
||||
[options="header"]
|
||||
|===
|
||||
| Subfilter | Description | Pros | Cons
|
||||
|
||||
[.indent]
|
||||
Sets `K'` bits in an underlying value of the unsigned integral type `Block`
|
||||
(e.g. `unsigned char`, `uint32_t`, `uint64_t`). So,
|
||||
a `filter<T, K, block<Block, K'>>` will set `K * K'` bits per element.
|
||||
The tradeoff here is that insertion/lookup will be (much) faster than
|
||||
with `filter<T, K * K'>` while the FPR will be worse (larger).
|
||||
FPR is better the wider `Block` is.
|
||||
| `block<Block, K'>`
|
||||
| Sets `K'` bits in a subarray of type `Block`
|
||||
| Very fast access time
|
||||
| FPR is worse (higher) the smaller `Block` is
|
||||
|
||||
`multiblock<Block, K'>`
|
||||
| `multiblock<Block, K'>`
|
||||
| Sets one bit in each of the elements of a `Block[K']` subarray
|
||||
| Better (lower) FPR than `block<Block, K'>` for the same `Block` type
|
||||
| Performance may worsen if cacheline boundaries are crossed when accessing the subarray
|
||||
|
||||
[.indent]
|
||||
Instead of setting `K'` bits in a `Block` value, this subfilter sets
|
||||
one bit on each of the elements of a `Block[K']` subarray. This improves FPR
|
||||
but impacts performance with respect to `block<Block, K'>`, among other
|
||||
things because cacheline boundaries can be crossed when accessing the subarray.
|
||||
| `fast_multiblock32<K'>`
|
||||
| Statistically equivalent to `multiblock<uint32_t, K'>`, but uses
|
||||
faster SIMD-based algorithms when SSE2, AVX2 or Neon are enabled at
|
||||
compile time
|
||||
| Always prefer it to `multiblock<uint32_t, K'>` when SSE2/AVX2/Neon is available
|
||||
| FPR is worse (higher) than `fast_multiblock64<K'>` for the same `K'`
|
||||
|
||||
`fast_multiblock32<K'>`
|
||||
| `fast_multiblock64<K'>`
|
||||
| Statistically equivalent to `multiblock<uint64_t, K'>`, but uses a
|
||||
faster SIMD-based algorithm when AVX2 is enabled at compile time
|
||||
| Always prefer it to `multiblock<uint64_t, K'>` when AVX2 is available
|
||||
| Slower than `fast_multiblock32<K'>` for the same `K'`
|
||||
|===
|
||||
++++
|
||||
</div>
|
||||
++++
|
||||
|
||||
[.indent]
|
||||
Statistically equivalent to `multiblock<uint32_t, K'>`, but uses
|
||||
faster SIMD-based algorithms when SSE2, AVX2 or Neon are available.
|
||||
In the table above, `Block` can be an unsigned integral type
|
||||
(e.g. `unsigned char`, `uint32_t`, `uint64_t`), or
|
||||
an array of 2^`N`^ unsigned integrals (e.g. `uint64_t[8]`). In general,
|
||||
the wider `Block` is, the better (lower) the resulting FPR, but
|
||||
cache locality worsens and performance may suffer as a result.
|
||||
|
||||
`fast_multiblock64<K'>`
|
||||
Note that the total number of of bits set/checked for a
|
||||
`boost::bloom::filter<T, K, _subfilter_<..., K'>>` is `K * K'`. The
|
||||
default configuration `boost::bloom::filter<T, K>` =
|
||||
`boost::bloom::filter<T, K, block<unsigned char, 1>>`, which corresponds to a
|
||||
xref:primer_implementation[classical Bloom filter], has the best (lowest) FPR among all filters
|
||||
with the same number of bits per operation, but is also the slowest: a new
|
||||
subarray is accessed for each bit set/checked. Consult the
|
||||
xref:benchmarks[benchmarks section] to see different tradeoffs between FPR and
|
||||
performance.
|
||||
|
||||
[.indent]
|
||||
Statistically equivalent to `multiblock<uint64_t, K'>`, but uses a
|
||||
faster SIMD-based algorithm when AVX2 is available.
|
||||
Once a subfilter have been selected, the parameter `K'` can be tuned
|
||||
to its optimum value (minimum FPR) if the number of elements that will be inserted is
|
||||
known in advance, as explained in a xref:configuration[dedicated section];
|
||||
otherwise, low values of `K'` will generally be faster and preferred to
|
||||
higher values as long as the resulting FPR is at acceptable levels.
|
||||
|
||||
The default configuration with `block<unsigned char,1>` corresponds to a
|
||||
xref:primer[classical Bloom filter] setting `K` bits per element uniformly
|
||||
distributed across the array.
|
||||
=== `Stride`
|
||||
|
||||
=== `BucketSize`
|
||||
As we have seen, `Subfilter` defines the subarray (`Block` in the case of
|
||||
`block<Block, K'>`, `Block[K']` for `multiblock<Block, K'>`) used by
|
||||
`boost::bloom::filter`: contiguous portions of the underlying bit array
|
||||
are then accessed and treated as those subarrays. The `Stride` parameter
|
||||
controls the distance in bytes between the initial positions of
|
||||
consecutive subarrays.
|
||||
|
||||
When the default value 0 is used, buckets have the same size as
|
||||
the _subarrays_ subfilters operate on (non-overlapping case).
|
||||
Otherwise, bucket size is smaller and subarrays spill over adjacent buckets,
|
||||
which results in an improved (lower) FPR in exchange for a possibly
|
||||
worse performance due to memory unalignment.
|
||||
When the default value 0 is used, the stride is automatically set
|
||||
to the size of the subarrays, and so there's no overlapping between them.
|
||||
If `Stride` is set to a smaller value than that size, contiguous
|
||||
subarrays superimpose on one another: the level of overlap is larger
|
||||
for smaller values of `Stride`, with maximum overlap happening when
|
||||
`Stride` is 1 byte.
|
||||
|
||||
image::stride.png[align=center, title="Two different configurations of `Stride`: (a) non-overlapping subarrays, (b) overlapping subarrays.+++<br/>+++Each subarray is associated to the stride of the same color."]
|
||||
|
||||
As it happens, overlapping improves (decreases) the resulting FPR
|
||||
with respect to the non-overlapping case, the tradeoff being that
|
||||
subarrays may not be aligned in memory, which can impact performance
|
||||
negatively.
|
||||
|
||||
=== `Hash`
|
||||
|
||||
Unlike other Bloom filter implementations requiring several hash functions per operation,
|
||||
`boost::bloom::filter` uses only one.
|
||||
By default, link:../../../container_hash/index.html[Boost.ContainerHash] is used.
|
||||
Consult this library's link:../../../container_hash/doc/html/hash.html#user[dedicated section]
|
||||
if you need to extend `boost::hash` for your own types.
|
||||
@@ -87,16 +133,16 @@ as is; otherwise, a bit-mixing post-process is applied to hash values that impro
|
||||
their statistical properties so that the resulting FPR approaches its
|
||||
theoretical limit. The hash function is determined to be of high quality
|
||||
(more precisely, to have the so-called _avalanching_ property) via the
|
||||
`link:../../../unordered/doc/html/unordered/reference/hash_traits.html#hash_traits_hash_is_avalanching[boost::unordered::hash_is_avalanching]`
|
||||
`link:../../../container_hash/doc/html/hash.html#ref_hash_is_avalanchinghash[boost::hash_is_avalanching]`
|
||||
trait.
|
||||
|
||||
== Capacity
|
||||
|
||||
The size of the filter's internal array is specified at construction time:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
[source,subs="+macros,+quotes"]
|
||||
-----
|
||||
using filter = boost::bloom::filter<std::string, ...>;
|
||||
using filter = boost::bloom::filter<std::string, 8>;
|
||||
filter f(1'000'000); // array of 1'000'000 **bits**
|
||||
std::cout << f.capacity(); // >= 1'000'000
|
||||
-----
|
||||
@@ -109,7 +155,7 @@ Instead of specifying the array's capacity directly, we can let the library
|
||||
figure it out based on the number of elements we plan to insert and the
|
||||
desired FPR:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
[source]
|
||||
-----
|
||||
// we'll insert 100'000 elements and want a FPR ~ 1%
|
||||
filter f(100'000, 0.01);
|
||||
@@ -118,11 +164,20 @@ filter f(100'000, 0.01);
|
||||
filter f2(filter::capacity_for(100'000, 0.01));
|
||||
-----
|
||||
|
||||
Be careful when the FPR specified is very small, as the resulting capacity
|
||||
may be too large to fit in memory:
|
||||
|
||||
[source]
|
||||
-----
|
||||
// resulting capacity ~ 1.4E12, out of memory std::bad_alloc is thrown
|
||||
filter f3(100'000, 1E-50);
|
||||
-----
|
||||
|
||||
Once a filter is constructed, its array is fixed (for instance, it won't
|
||||
grow dynamically as elements are inserted). The only way to change it is
|
||||
by assignment/swapping from a different filter, or using `reset`:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
[source,subs="+macros,+quotes"]
|
||||
-----
|
||||
f.reset(2'000'000); // change to 2'000'000 bits **and clears the filter**
|
||||
f.reset(100'000, 0.005); // equivalent to reset(filter::capacity_for(100'000, 0.005));
|
||||
@@ -133,10 +188,9 @@ f.reset(); // null array (capacity == 0)
|
||||
|
||||
Insertion is done in much the same way as with a traditional container:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
[source]
|
||||
-----
|
||||
f.insert("hello");
|
||||
f.emplace(100, 'X'); // ~ insert(std::string(100, 'X'))
|
||||
f.insert(data.begin(), data.end());
|
||||
-----
|
||||
|
||||
@@ -145,7 +199,7 @@ storage of elements into the filter, but rather the setting of bits in the
|
||||
internal array based on the hash values of those elements.
|
||||
Lookup goes as follows:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
[source]
|
||||
-----
|
||||
bool b1 = f.may_contain("hello"); // b1 is true since we actually inserted "hello"
|
||||
bool b2 = f.may_contain("bye"); // b2 is most likely false
|
||||
@@ -156,7 +210,7 @@ element has not been previously inserted, that is, it may yield false
|
||||
positives -- this is the essence of probabilistic data structures.
|
||||
`fpr_for` provides an estimation of the false positive rate:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
[source]
|
||||
-----
|
||||
// we have inserted 100 elements so far, what's our FPR?
|
||||
std::cout<< filter::fpr_for(100, f.capacity());
|
||||
@@ -170,7 +224,7 @@ operation.
|
||||
Once inserted, there is no way to remove a specific element from the filter.
|
||||
We can only clear up the filter entirely:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
[source]
|
||||
-----
|
||||
f.clear(); // sets all the bits in the array to zero
|
||||
-----
|
||||
@@ -180,18 +234,18 @@ f.clear(); // sets all the bits in the array to zero
|
||||
`boost::bloom::filter`+++s+++ can be combined by doing the OR logical operation
|
||||
of the bits of their arrays:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
[source]
|
||||
-----
|
||||
filter f2 = ...;
|
||||
...
|
||||
f |= f2; // f and f2 must have exactly the same capacity
|
||||
-----
|
||||
|
||||
The result is equivalent to a filter "containing" both the elements
|
||||
The result is equivalent to a filter "containing" the set union of the elements
|
||||
of `f` and `f2`. AND combination, on the other hand, results in a filter
|
||||
holding the _intersection_ of the elements:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
[source]
|
||||
-----
|
||||
filter f3 = ...;
|
||||
...
|
||||
@@ -208,7 +262,7 @@ case.
|
||||
The contents of the bit array can be accessed directly with the `array`
|
||||
member function, which can be leveraged for filter serialization:
|
||||
|
||||
[listing,subs="+quotes"]
|
||||
[source]
|
||||
-----
|
||||
filter f1 = ...;
|
||||
...
|
||||
@@ -216,25 +270,29 @@ filter f1 = ...;
|
||||
// save filter
|
||||
std::ofstream out("filter.bin", std::ios::binary);
|
||||
std::size_t c1=f1.capacity();
|
||||
out.write((const char*) &c1, sizeof(c1)); // save capacity (bits)
|
||||
out.write(reinterpret_cast<const char*>(&c1), sizeof(c1)); // save capacity (bits)
|
||||
boost::span<const unsigned char> s1 = f1.array();
|
||||
out.write((const char*) s1.data(), s1.size()); // save array
|
||||
out.write(reinterpret_cast<const char*>(s1.data()), s1.size()); // save array
|
||||
out.close();
|
||||
|
||||
// load filter
|
||||
filter f2;
|
||||
std::ifstream in("filter.bin", std::ios::binary);
|
||||
std::size_t c2;
|
||||
in.read((char*) &c2, sizeof(c2));
|
||||
in.read(reinterpret_cast<char*>(&c2), sizeof(c2));
|
||||
f2.reset(c2); // restore capacity
|
||||
boost::span<unsigned char> s2 = f2.array();
|
||||
in.read((char*) s2.data(), s2.size()); // load array
|
||||
in.read(reinterpret_cast<char*>(s2.data()), s2.size()); // load array
|
||||
in.close();
|
||||
-----
|
||||
|
||||
Note that `array()` is a span over `unsigned char`+++s+++ whereas
|
||||
capacities are measured in bits, so `array.size()` is
|
||||
`capacity() / CHAR_BIT`.
|
||||
`capacity() / CHAR_BIT`. If you load a serialized filter in a computer
|
||||
other than the one where it was saved, take into account that
|
||||
the CPU architectures at each end must have the same
|
||||
https://es.wikipedia.org/wiki/Endianness[endianness^] for the
|
||||
reconstruction to work.
|
||||
|
||||
== Debugging
|
||||
|
||||
|
||||
|
Before Width: | Height: | Size: 4.8 KiB After Width: | Height: | Size: 5.3 KiB |
|
Before Width: | Height: | Size: 2.9 KiB After Width: | Height: | Size: 5.2 KiB |
|
Before Width: | Height: | Size: 4.6 KiB After Width: | Height: | Size: 6.5 KiB |
|
Before Width: | Height: | Size: 56 KiB After Width: | Height: | Size: 49 KiB |
|
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 14 KiB |
|
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 16 KiB |
BIN
doc/img/stride.png
Normal file
|
After Width: | Height: | Size: 11 KiB |
@@ -8,12 +8,12 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<meta http-equiv="refresh" content="0; URL=html/index.html">
|
||||
<meta http-equiv="refresh" content="0; URL=html/bloom.html">
|
||||
<title>Boost.Bloom Documentation</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
Automatic redirection failed, please go to
|
||||
<a href="html/index.html">html/index.html</a>
|
||||
<a href="html/bloom.html">html/bloom.html</a>
|
||||
</body>
|
||||
</html>
|
||||
@@ -5,12 +5,14 @@
|
||||
#
|
||||
# See http://www.boost.org/libs/bloom for library home page.
|
||||
|
||||
import config : requires ;
|
||||
|
||||
project
|
||||
: requirements
|
||||
# <library>/boost/bloom//boost_bloom
|
||||
<cxxstd>11
|
||||
[ requires cxx11_noexcept ] # used as a proxy for C++11 support
|
||||
;
|
||||
|
||||
exe basic : basic.cpp ;
|
||||
exe genome : genome.cpp : <cxxstd>17 ;
|
||||
exe genome : genome.cpp : [ requires cxx17_if_constexpr ] ;
|
||||
exe serialization : serialization.cpp ;
|
||||
@@ -8,7 +8,7 @@
|
||||
* See https://www.boost.org/libs/bloom for library home page.
|
||||
*/
|
||||
|
||||
#include <boost/bloom/filter.hpp>
|
||||
#include <boost/bloom.hpp>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
@@ -41,4 +41,7 @@ int main()
|
||||
if(f.may_contain("bye")) { /* likely false */
|
||||
std::cout << "false positive\n";
|
||||
}
|
||||
else {
|
||||
std::cout << "everything worked as expected\n";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
#include <array>
|
||||
#include <boost/bloom/filter.hpp>
|
||||
#include <boost/bloom/fast_multiblock32.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
@@ -29,7 +29,7 @@ struct k_mer
|
||||
{
|
||||
static_assert(
|
||||
K >= 0 &&
|
||||
2 * K <= sizeof(boost::uint64_t) * CHAR_BIT);
|
||||
2 * K <= sizeof(std::uint64_t) * CHAR_BIT);
|
||||
|
||||
static constexpr std::size_t size()
|
||||
{
|
||||
@@ -45,8 +45,8 @@ struct k_mer
|
||||
|
||||
k_mer& operator+=(char n)
|
||||
{
|
||||
static constexpr boost::uint64_t mask=
|
||||
(((boost::uint64_t)1) << (2 * size())) - 1;
|
||||
static constexpr std::uint64_t mask=
|
||||
(((std::uint64_t)1) << (2 * size())) - 1;
|
||||
|
||||
data <<= 2;
|
||||
data &= mask;
|
||||
@@ -54,7 +54,7 @@ struct k_mer
|
||||
return *this;
|
||||
}
|
||||
|
||||
boost::uint64_t data = 0;
|
||||
std::uint64_t data = 0;
|
||||
|
||||
using table_type=std::array<unsigned char, UCHAR_MAX>;
|
||||
|
||||
@@ -71,20 +71,28 @@ struct k_mer
|
||||
template<std::size_t N>
|
||||
std::size_t hash_value(const k_mer<N>& km)
|
||||
{
|
||||
if constexpr (sizeof(std::size_t) >= sizeof(boost::uint64_t)) {
|
||||
/* k:mer::data is 8 bytes wide. We use it directly as the associated
|
||||
* hash value in 64-bit mode, as std::size_t is the same size; in 32-bit
|
||||
* mode, we XOR the high and low portions of data to make it fit into
|
||||
* a std::size_t.
|
||||
*/
|
||||
|
||||
if constexpr (sizeof(std::size_t) >= sizeof(std::uint64_t)) {
|
||||
return (std::size_t)km.data;
|
||||
}
|
||||
else{
|
||||
else{ /* 32-bit mode */
|
||||
return (std::size_t)(km.data ^ (km.data >> 32));
|
||||
}
|
||||
}
|
||||
|
||||
/* Insert all the k-mers of a given genome in a boost::bloom::filter.
|
||||
* Assumed format is FASTA with A, C, G, T.
|
||||
* https://en.wikipedia.org/wiki/FASTA_format
|
||||
*/
|
||||
|
||||
using genome_filter = boost::bloom::filter<
|
||||
k_mer<20>, 1, boost::bloom::fast_multiblock32<8> >;
|
||||
k_mer<20>, /* using k-mers of length 20 */
|
||||
1, boost::bloom::fast_multiblock32<8> >;
|
||||
|
||||
genome_filter make_genome_filter(const char* filename)
|
||||
{
|
||||
@@ -93,7 +101,11 @@ genome_filter make_genome_filter(const char* filename)
|
||||
std::ifstream in(filename, std::ios::ate); /* open at end to tell size */
|
||||
if(!in) throw std::runtime_error("can't open file");
|
||||
|
||||
/* number of k-mers ~ length of the genome, FPR = 1% */
|
||||
/* As a rough estimation, we assume that the number of k-mers
|
||||
* is approximately equal to the length of the genome --this is
|
||||
* overpessimistic due to the likely presence of duplicate k-mers.
|
||||
* We set FPR = 1%.
|
||||
*/
|
||||
|
||||
genome_filter f((std::size_t)in.tellg(), 0.01);
|
||||
in.seekg(0);
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
#include <boost/bloom/filter.hpp>
|
||||
#include <boost/bloom/multiblock.hpp>
|
||||
#include <boost/core/detail/splitmix64.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <boost/uuid/uuid.hpp>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
@@ -24,7 +24,7 @@ struct uuid_generator
|
||||
boost::uuids::uuid operator()()
|
||||
{
|
||||
std::uint8_t data[16];
|
||||
boost::uint64_t x = rng();
|
||||
std::uint64_t x = rng();
|
||||
std::memcpy(&data[0], &x, sizeof(x));
|
||||
x = rng();
|
||||
std::memcpy(&data[8], &x, sizeof(x));
|
||||
@@ -36,7 +36,7 @@ struct uuid_generator
|
||||
};
|
||||
|
||||
using filter = boost::bloom::filter<
|
||||
boost::uuids::uuid, 1, boost::bloom::multiblock<boost::uint64_t, 8> >;
|
||||
boost::uuids::uuid, 1, boost::bloom::multiblock<std::uint64_t, 8> >;
|
||||
|
||||
static constexpr std::size_t num_elements = 10000;
|
||||
|
||||
@@ -54,19 +54,19 @@ void save_filter(const filter& f, const char* filename)
|
||||
{
|
||||
std::ofstream out(filename, std::ios::binary | std::ios::trunc);
|
||||
std::size_t c=f.capacity();
|
||||
out.write((const char*) &c, sizeof(c)); /* save capacity (bits) */
|
||||
out.write(reinterpret_cast<const char*>(&c), sizeof(c)); /* save capacity (bits) */
|
||||
auto s = f.array();
|
||||
out.write((const char*) s.data(), s.size()); /* save array */
|
||||
out.write(reinterpret_cast<const char*>(s.data()), s.size()); /* save array */
|
||||
}
|
||||
|
||||
filter load_filter(const char* filename)
|
||||
{
|
||||
std::ifstream in(filename, std::ios::binary);
|
||||
std::size_t c;
|
||||
in.read((char*) &c, sizeof(c));
|
||||
in.read(reinterpret_cast<char*>(&c), sizeof(c));
|
||||
filter f(c);
|
||||
auto s = f.array();
|
||||
in.read((char*) s.data(), s.size()); /* load array */
|
||||
in.read(reinterpret_cast<char*>(s.data()), s.size()); /* load array */
|
||||
return f;
|
||||
}
|
||||
|
||||
|
||||
@@ -12,12 +12,12 @@ See https://www.boost.org/libs/bloom for library home page.
|
||||
<Type Name="boost::bloom::filter<*>" Inheritable="false">
|
||||
<Intrinsic Name="core" Expression="*static_cast<super*>(this)" />
|
||||
<Intrinsic Name="has_array" Expression="core().ar.data!=nullptr" />
|
||||
<Intrinsic Name="data" Expression="has_array()?core().ar.buckets:nullptr" />
|
||||
<Intrinsic Name="data" Expression="has_array()?core().ar.array:nullptr" />
|
||||
<Intrinsic
|
||||
Name="array_size"
|
||||
Expression="
|
||||
has_array()?
|
||||
core().hs.rng*super::bucket_size+(super::used_value_size-super::bucket_size):
|
||||
core().hs.rng*super::stride+(super::used_value_size-super::stride):
|
||||
0"
|
||||
/>
|
||||
<Intrinsic Name="capacity" Expression="array_size()*8" />
|
||||
|
||||
18
include/boost/bloom.hpp
Normal file
@@ -0,0 +1,18 @@
|
||||
/* Copyright 2025 Joaquin M Lopez Munoz.
|
||||
* Distributed under the Boost Software License, Version 1.0.
|
||||
* (See accompanying file LICENSE_1_0.txt or copy at
|
||||
* http://www.boost.org/LICENSE_1_0.txt)
|
||||
*
|
||||
* See https://www.boost.org/libs/bloom for library home page.
|
||||
*/
|
||||
|
||||
#ifndef BOOST_BLOOM_HPP
|
||||
#define BOOST_BLOOM_HPP
|
||||
|
||||
#include <boost/bloom/filter.hpp>
|
||||
#include <boost/bloom/block.hpp>
|
||||
#include <boost/bloom/multiblock.hpp>
|
||||
#include <boost/bloom/fast_multiblock32.hpp>
|
||||
#include <boost/bloom/fast_multiblock64.hpp>
|
||||
|
||||
#endif
|
||||
@@ -10,36 +10,61 @@
|
||||
#define BOOST_BLOOM_BLOCK_HPP
|
||||
|
||||
#include <boost/bloom/detail/block_base.hpp>
|
||||
#include <boost/bloom/detail/block_ops.hpp>
|
||||
#include <boost/bloom/detail/block_fpr_base.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<typename Block,std::size_t K>
|
||||
struct block:
|
||||
private detail::block_base<Block,K>,public detail::block_fpr_base<K>
|
||||
public detail::block_fpr_base<K>,
|
||||
private detail::block_base<Block,K>
|
||||
{
|
||||
static constexpr std::size_t k=K;
|
||||
using value_type=Block;
|
||||
|
||||
static inline void mark(value_type& x,boost::uint64_t hash)
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
static inline void mark(value_type& x,std::uint64_t hash)
|
||||
{
|
||||
loop(hash,[&](boost::uint64_t h){x|=Block(1)<<(h&mask);});
|
||||
loop(hash,[&](std::uint64_t h){block_ops::set(x,h&mask);});
|
||||
}
|
||||
|
||||
static inline bool check(const value_type& x,boost::uint64_t hash)
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
static inline bool check(const value_type& x,std::uint64_t hash)
|
||||
{
|
||||
Block fp=0;
|
||||
mark(fp,hash);
|
||||
return (x&fp)==fp;
|
||||
return check(x,hash,typename block_ops::is_extended_block{});
|
||||
}
|
||||
|
||||
private:
|
||||
using super=detail::block_base<Block,K>;
|
||||
using super::mask;
|
||||
using super::loop;
|
||||
using super::loop_while;
|
||||
using block_ops=detail::block_ops<Block>;
|
||||
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
static inline bool check(
|
||||
const value_type& x,std::uint64_t hash,
|
||||
std::false_type /* non-extended block */)
|
||||
{
|
||||
Block fp;
|
||||
block_ops::zero(fp);
|
||||
mark(fp,hash);
|
||||
return block_ops::testc(x,fp);
|
||||
}
|
||||
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
static inline bool check(
|
||||
const value_type& x,std::uint64_t hash,
|
||||
std::true_type /* extended block */)
|
||||
{
|
||||
return loop_while(hash,[&](std::uint64_t h){
|
||||
return block_ops::get_at_lsb(x,h&mask)&1;
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace bloom */
|
||||
|
||||
@@ -12,8 +12,9 @@
|
||||
#include <boost/config.hpp>
|
||||
#include <boost/bloom/detail/constexpr_bit_width.hpp>
|
||||
#include <boost/bloom/detail/mulx64.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <boost/bloom/detail/type_traits.hpp>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
@@ -24,23 +25,31 @@ namespace detail{
|
||||
#pragma warning(disable:4714) /* marked as __forceinline not inlined */
|
||||
#endif
|
||||
|
||||
// TODO: describe
|
||||
/* Validates type Block and provides common looping facilities for block
|
||||
* and multiblock.
|
||||
*/
|
||||
|
||||
template<typename Block,std::size_t K>
|
||||
struct block_base
|
||||
{
|
||||
static constexpr std::size_t k=K;
|
||||
static constexpr std::size_t hash_width=sizeof(boost::uint64_t)*CHAR_BIT;
|
||||
static constexpr std::size_t block_width=sizeof(Block)*CHAR_BIT;
|
||||
static_assert(
|
||||
(block_width&(block_width-1))==0,
|
||||
"Block's size in bits must be a power of two");
|
||||
is_unsigned_integral_or_extended_unsigned_integral<Block>::value||
|
||||
(
|
||||
is_array_of<
|
||||
Block,is_unsigned_integral_or_extended_unsigned_integral>::value&&
|
||||
is_power_of_two<array_size<Block>::value>::value
|
||||
),
|
||||
"Block must be an (extended) unsigned integral type or an array T[N] "
|
||||
"with T an (extended) unsigned integral type and N a power of two");
|
||||
static constexpr std::size_t k=K;
|
||||
static constexpr std::size_t hash_width=sizeof(std::uint64_t)*CHAR_BIT;
|
||||
static constexpr std::size_t block_width=sizeof(Block)*CHAR_BIT;
|
||||
static constexpr std::size_t mask=block_width-1;
|
||||
static constexpr std::size_t shift=constexpr_bit_width(mask);
|
||||
static constexpr std::size_t rehash_k=(hash_width-shift)/shift;
|
||||
|
||||
template<typename F>
|
||||
static BOOST_FORCEINLINE void loop(boost::uint64_t hash,F f)
|
||||
static BOOST_FORCEINLINE void loop(std::uint64_t hash,F f)
|
||||
{
|
||||
for(std::size_t i=0;i<k/rehash_k;++i){
|
||||
auto h=hash;
|
||||
@@ -56,6 +65,25 @@ struct block_base
|
||||
f(h);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
static BOOST_FORCEINLINE bool loop_while(std::uint64_t hash,F f)
|
||||
{
|
||||
for(std::size_t i=0;i<k/rehash_k;++i){
|
||||
auto h=hash;
|
||||
for(std::size_t j=0;j<rehash_k;++j){
|
||||
h>>=shift;
|
||||
if(!f(h))return false;
|
||||
}
|
||||
hash=detail::mulx64(hash);
|
||||
}
|
||||
auto h=hash;
|
||||
for(std::size_t i=0;i<k%rehash_k;++i){
|
||||
h>>=shift;
|
||||
if(!f(h))return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(BOOST_MSVC)
|
||||
|
||||
95
include/boost/bloom/detail/block_ops.hpp
Normal file
@@ -0,0 +1,95 @@
|
||||
/* Copyright 2025 Joaquin M Lopez Munoz.
|
||||
* Distributed under the Boost Software License, Version 1.0.
|
||||
* (See accompanying file LICENSE_1_0.txt or copy at
|
||||
* http://www.boost.org/LICENSE_1_0.txt)
|
||||
*
|
||||
* See https://www.boost.org/libs/bloom for library home page.
|
||||
*/
|
||||
|
||||
#ifndef BOOST_BLOOM_DETAIL_BLOCK_OPS_HPP
|
||||
#define BOOST_BLOOM_DETAIL_BLOCK_OPS_HPP
|
||||
|
||||
#include <boost/config.hpp>
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
namespace detail{
|
||||
|
||||
#if defined(BOOST_MSVC)
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable:4714) /* marked as __forceinline not inlined */
|
||||
#endif
|
||||
|
||||
template<typename Block>
|
||||
struct block_ops
|
||||
{
|
||||
using is_extended_block=std::false_type;
|
||||
using value_type=Block;
|
||||
|
||||
static BOOST_FORCEINLINE void zero(Block& x)
|
||||
{
|
||||
x=0;
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE void set(value_type& x,std::uint64_t n)
|
||||
{
|
||||
x|=Block(1)<<n;
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE int get_at_lsb(const value_type& x,std::uint64_t n)
|
||||
{
|
||||
return static_cast<int>(x>>n);
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE void reduce(
|
||||
int& res,const value_type& x,std::uint64_t n)
|
||||
{
|
||||
res&=get_at_lsb(x,n);
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE bool testc(const value_type& x,const value_type& y)
|
||||
{
|
||||
return (x&y)==y;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Block,std::size_t N>
|
||||
struct block_ops<Block[N]>
|
||||
{
|
||||
using is_extended_block=std::true_type;
|
||||
using value_type=Block[N];
|
||||
|
||||
static BOOST_FORCEINLINE void zero(value_type& x)
|
||||
{
|
||||
for(std::size_t i=0;i<N;++i)x[i]=0;
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE void set(value_type& x,std::uint64_t n)
|
||||
{
|
||||
x[n%N]|=Block(1)<<(n/N);
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE int get_at_lsb(const value_type& x,std::uint64_t n)
|
||||
{
|
||||
return static_cast<int>(x[n%N]>>(n/N));
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE void reduce(
|
||||
int& res,const value_type& x,std::uint64_t n)
|
||||
{
|
||||
res&=get_at_lsb(x,n);
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(BOOST_MSVC)
|
||||
#pragma warning(pop) /* C4714 */
|
||||
#endif
|
||||
|
||||
|
||||
} /* namespace detail */
|
||||
} /* namespace bloom */
|
||||
} /* namespace boost */
|
||||
|
||||
#endif
|
||||
@@ -17,7 +17,7 @@ namespace detail{
|
||||
|
||||
/* boost::core::bit_width is not always C++11 constexpr */
|
||||
|
||||
inline constexpr std::size_t constexpr_bit_width(std::size_t x)
|
||||
constexpr std::size_t constexpr_bit_width(std::size_t x)
|
||||
{
|
||||
return x?1+constexpr_bit_width(x>>1):0;
|
||||
}
|
||||
|
||||
@@ -19,9 +19,10 @@
|
||||
#include <boost/core/allocator_traits.hpp>
|
||||
#include <boost/core/empty_value.hpp>
|
||||
#include <boost/core/span.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <boost/throw_exception.hpp>
|
||||
#include <climits>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
@@ -60,45 +61,46 @@ namespace detail{
|
||||
#pragma warning(disable:4714) /* marked as __forceinline not inlined */
|
||||
#endif
|
||||
|
||||
/* mcg_and_fastrange produces (pos,hash') from hash, where
|
||||
* - m=mulx64(hash,range), mulx64 denotes extended multiplication
|
||||
* - pos=high(m)
|
||||
* - hash'=low(m)
|
||||
* pos is uniformly distributed in [0,range) (see
|
||||
* https://arxiv.org/pdf/1805.10941), whereas hash'<-hash is a multiplicative
|
||||
* congruential generator of the form hash'<-hash*rng mod 2^64. This MCG
|
||||
* generates long cycles when the initial value of hash is odd and
|
||||
* rng = +-3 (mod 8), which is why we adjust hash and rng as seen below. As a
|
||||
* result, the low bits of hash' are of poor quality, and the least
|
||||
* significant bit in particular is always one.
|
||||
/* fastrange_and_mcg produces (pos,hash') from hash as follows:
|
||||
* - pos=high(mulx64(hash,range))
|
||||
* - hash'=c*m
|
||||
* pos is uniformly distributed in [0,range) (see Lemire 2018
|
||||
* https://arxiv.org/pdf/1805.10941), whereas hash'<-hash is a multiplicative
|
||||
* congruential generator using well-behaved multipliers c from Steele and
|
||||
* Vigna 2021 https://arxiv.org/pdf/2001.05304 . To ensure the MCG generates
|
||||
* long cycles the initial value of hash is adjusted to be odd, which implies
|
||||
* that the least significant of hash' is always one. In general, the low bits
|
||||
* of MCG-produced values are of low quality and we don't use them downstream.
|
||||
*/
|
||||
|
||||
struct mcg_and_fastrange
|
||||
struct fastrange_and_mcg
|
||||
{
|
||||
constexpr mcg_and_fastrange(std::size_t m)noexcept:
|
||||
rng{
|
||||
m+(
|
||||
(m%8<=3)?3-(m%8):
|
||||
(m%8<=5)?5-(m%8):
|
||||
8-(m%8)+3)
|
||||
}
|
||||
{}
|
||||
constexpr fastrange_and_mcg(std::size_t m)noexcept:rng{m}{}
|
||||
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
inline constexpr std::size_t range()const noexcept{return (std::size_t)rng;}
|
||||
|
||||
inline void prepare_hash(boost::uint64_t& hash)const noexcept
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
inline void prepare_hash(std::uint64_t& hash)const noexcept
|
||||
{
|
||||
hash|=1u;
|
||||
}
|
||||
|
||||
inline std::size_t next_position(boost::uint64_t& hash)const noexcept
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
inline std::size_t next_position(std::uint64_t& hash)const noexcept
|
||||
{
|
||||
boost::uint64_t hi;
|
||||
hash=umul128(hash,rng,hi);
|
||||
umul128(hash,rng,hi);
|
||||
|
||||
#if ((((SIZE_MAX>>16)>>16)>>16)>>15)!=0 /* 64-bit mode (or higher) */
|
||||
hash*=0xf1357aea2e62a9c5ull;
|
||||
#else /* 32-bit mode */
|
||||
hash*=0xe817fb2d;
|
||||
#endif
|
||||
return (std::size_t)hi;
|
||||
}
|
||||
|
||||
boost::uint64_t rng;
|
||||
std::uint64_t rng;
|
||||
};
|
||||
|
||||
/* used_value_size<Subfilter>::value is Subfilter::used_value_size if it
|
||||
@@ -124,7 +126,7 @@ struct used_value_size<
|
||||
|
||||
/* GCD with x,p > 1, p a power of two */
|
||||
|
||||
inline constexpr std::size_t gcd_pow2(std::size_t x,std::size_t p)
|
||||
constexpr std::size_t gcd_pow2(std::size_t x,std::size_t p)
|
||||
{
|
||||
/* x&-x: maximum power of two dividing x */
|
||||
return (x&(0-x))<p?(x&(0-x)):p;
|
||||
@@ -132,7 +134,7 @@ inline constexpr std::size_t gcd_pow2(std::size_t x,std::size_t p)
|
||||
|
||||
/* std::ldexp is not constexpr in C++11 */
|
||||
|
||||
inline constexpr double constexpr_ldexp_1_positive(int exp)
|
||||
constexpr double constexpr_ldexp_1_positive(int exp)
|
||||
{
|
||||
return exp==0?1.0:2.0*constexpr_ldexp_1_positive(exp-1);
|
||||
}
|
||||
@@ -140,7 +142,7 @@ inline constexpr double constexpr_ldexp_1_positive(int exp)
|
||||
struct filter_array
|
||||
{
|
||||
unsigned char* data;
|
||||
unsigned char* buckets; /* adjusted from data for proper alignment */
|
||||
unsigned char* array; /* adjusted from data for proper alignment */
|
||||
};
|
||||
|
||||
struct if_constexpr_void_else{void operator()()const{}};
|
||||
@@ -170,7 +172,7 @@ template<bool B,typename T,typename std::enable_if<!B>::type* =nullptr>
|
||||
void swap_if(T&,T&){}
|
||||
|
||||
template<
|
||||
std::size_t K,typename Subfilter,std::size_t BucketSize,typename Allocator
|
||||
std::size_t K,typename Subfilter,std::size_t Stride,typename Allocator
|
||||
>
|
||||
class filter_core:empty_value<Allocator,0>
|
||||
{
|
||||
@@ -192,23 +194,22 @@ private:
|
||||
detail::used_value_size<subfilter>::value;
|
||||
|
||||
public:
|
||||
static constexpr std::size_t bucket_size=
|
||||
BucketSize?BucketSize:used_value_size;
|
||||
static constexpr std::size_t stride=Stride?Stride:used_value_size;
|
||||
static_assert(
|
||||
bucket_size<=used_value_size,"BucketSize can't exceed the block size");
|
||||
stride<=used_value_size,"Stride can't exceed the block size");
|
||||
|
||||
private:
|
||||
static constexpr std::size_t tail_size=sizeof(block_type)-bucket_size;
|
||||
static constexpr std::size_t tail_size=sizeof(block_type)-stride;
|
||||
static constexpr bool are_blocks_aligned=
|
||||
(bucket_size%alignof(block_type)==0);
|
||||
(stride%alignof(block_type)==0);
|
||||
static constexpr std::size_t cacheline=64; /* unknown at compile time */
|
||||
static constexpr std::size_t initial_alignment=
|
||||
are_blocks_aligned?
|
||||
alignof(block_type)>cacheline?alignof(block_type):cacheline:
|
||||
1;
|
||||
static constexpr std::size_t prefetched_cachelines=
|
||||
1+(block_size+cacheline-1-gcd_pow2(bucket_size,cacheline))/cacheline;
|
||||
using hash_strategy=detail::mcg_and_fastrange;
|
||||
1+(block_size+cacheline-1-gcd_pow2(stride,cacheline))/cacheline;
|
||||
using hash_strategy=detail::fastrange_and_mcg;
|
||||
|
||||
public:
|
||||
using allocator_type=Allocator;
|
||||
@@ -362,15 +363,15 @@ public:
|
||||
|
||||
boost::span<unsigned char> array()noexcept
|
||||
{
|
||||
return {ar.data?ar.buckets:nullptr,capacity()/CHAR_BIT};
|
||||
return {ar.data?ar.array:nullptr,capacity()/CHAR_BIT};
|
||||
}
|
||||
|
||||
boost::span<const unsigned char> array()const noexcept
|
||||
{
|
||||
return {ar.data?ar.buckets:nullptr,capacity()/CHAR_BIT};
|
||||
return {ar.data?ar.array:nullptr,capacity()/CHAR_BIT};
|
||||
}
|
||||
|
||||
BOOST_FORCEINLINE void insert(boost::uint64_t hash)
|
||||
BOOST_FORCEINLINE void insert(std::uint64_t hash)
|
||||
{
|
||||
hs.prepare_hash(hash);
|
||||
for(auto n=k;n--;){
|
||||
@@ -438,7 +439,7 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
BOOST_FORCEINLINE bool may_contain(boost::uint64_t hash)const
|
||||
BOOST_FORCEINLINE bool may_contain(std::uint64_t hash)const
|
||||
{
|
||||
hs.prepare_hash(hash);
|
||||
#if 1
|
||||
@@ -464,7 +465,7 @@ public:
|
||||
{
|
||||
if(x.range()!=y.range())return false;
|
||||
else if(!x.ar.data)return true;
|
||||
else return std::memcmp(x.ar.buckets,y.ar.buckets,x.used_array_size())==0;
|
||||
else return std::memcmp(x.ar.array,y.ar.array,x.used_array_size())==0;
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -475,25 +476,25 @@ private:
|
||||
|
||||
static std::size_t requested_range(std::size_t m)
|
||||
{
|
||||
if(m>(used_value_size-bucket_size)*CHAR_BIT){
|
||||
if(m>(used_value_size-stride)*CHAR_BIT){
|
||||
/* ensures filter_core{f.capacity()}.capacity()==f.capacity() */
|
||||
m-=(used_value_size-bucket_size)*CHAR_BIT;
|
||||
m-=(used_value_size-stride)*CHAR_BIT;
|
||||
}
|
||||
return
|
||||
(std::numeric_limits<std::size_t>::max)()-m>=bucket_size*CHAR_BIT-1?
|
||||
(m+bucket_size*CHAR_BIT-1)/(bucket_size*CHAR_BIT):
|
||||
m/(bucket_size*CHAR_BIT);
|
||||
(std::numeric_limits<std::size_t>::max)()-m>=stride*CHAR_BIT-1?
|
||||
(m+stride*CHAR_BIT-1)/(stride*CHAR_BIT):
|
||||
m/(stride*CHAR_BIT);
|
||||
}
|
||||
|
||||
static filter_array new_array(allocator_type& al,std::size_t rng)
|
||||
{
|
||||
if(rng){
|
||||
auto p=allocator_allocate(al,space_for(rng));
|
||||
return {p,buckets_for(p)};
|
||||
return {p,array_for(p)};
|
||||
}
|
||||
else{
|
||||
/* To avoid dynamic allocation for zero capacity or moved-from filters,
|
||||
* we point buckets to a statically allocated dummy array with all bits
|
||||
* we point array to a statically allocated dummy array with all bits
|
||||
* set to one. This is good for read operations but not so for write
|
||||
* operations, where we need to resort to a null check on
|
||||
* filter_array::data.
|
||||
@@ -502,7 +503,7 @@ private:
|
||||
static struct {unsigned char x=-1;}
|
||||
dummy[space_for(hash_strategy{0}.range())];
|
||||
|
||||
return {nullptr,buckets_for(reinterpret_cast<unsigned char*>(&dummy))};
|
||||
return {nullptr,array_for(reinterpret_cast<unsigned char*>(&dummy))};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -513,13 +514,13 @@ private:
|
||||
|
||||
void clear_bytes()noexcept
|
||||
{
|
||||
std::memset(ar.buckets,0,used_array_size());
|
||||
std::memset(ar.array,0,used_array_size());
|
||||
}
|
||||
|
||||
void copy_bytes(const filter_core& x)
|
||||
{
|
||||
BOOST_ASSERT(range()==x.range());
|
||||
std::memcpy(ar.buckets,x.ar.buckets,used_array_size());
|
||||
std::memcpy(ar.array,x.ar.array,used_array_size());
|
||||
}
|
||||
|
||||
std::size_t range()const noexcept
|
||||
@@ -529,14 +530,14 @@ private:
|
||||
|
||||
static constexpr std::size_t space_for(std::size_t rng)noexcept
|
||||
{
|
||||
return (initial_alignment-1)+rng*bucket_size+tail_size;
|
||||
return (initial_alignment-1)+rng*stride+tail_size;
|
||||
}
|
||||
|
||||
static unsigned char* buckets_for(unsigned char* p)noexcept
|
||||
static unsigned char* array_for(unsigned char* p)noexcept
|
||||
{
|
||||
return p+
|
||||
(boost::uintptr_t(initial_alignment)-
|
||||
boost::uintptr_t(p))%initial_alignment;
|
||||
(std::uintptr_t(initial_alignment)-
|
||||
std::uintptr_t(p))%initial_alignment;
|
||||
}
|
||||
|
||||
std::size_t used_array_size()const noexcept
|
||||
@@ -546,7 +547,7 @@ private:
|
||||
|
||||
static std::size_t used_array_size(std::size_t rng)noexcept
|
||||
{
|
||||
return rng?rng*bucket_size+(used_value_size-bucket_size):0;
|
||||
return rng?rng*stride+(used_value_size-stride):0;
|
||||
}
|
||||
|
||||
static std::size_t unadjusted_capacity_for(std::size_t n,double fpr)
|
||||
@@ -609,7 +610,7 @@ private:
|
||||
|
||||
static double fpr_for_c(double c)
|
||||
{
|
||||
constexpr std::size_t w=(2*used_value_size-bucket_size)*CHAR_BIT;
|
||||
constexpr std::size_t w=(2*used_value_size-stride)*CHAR_BIT;
|
||||
const double lambda=w*k/c;
|
||||
const double loglambda=std::log(lambda);
|
||||
double res=0.0;
|
||||
@@ -639,20 +640,20 @@ private:
|
||||
std::pow(1.0-std::exp(-(double)k_total/c),(double)k_total));
|
||||
}
|
||||
|
||||
BOOST_FORCEINLINE bool get(const unsigned char* p,boost::uint64_t hash)const
|
||||
BOOST_FORCEINLINE bool get(const unsigned char* p,std::uint64_t hash)const
|
||||
{
|
||||
return get(p,hash,std::integral_constant<bool,are_blocks_aligned>{});
|
||||
}
|
||||
|
||||
BOOST_FORCEINLINE bool get(
|
||||
const unsigned char* p,boost::uint64_t hash,
|
||||
const unsigned char* p,std::uint64_t hash,
|
||||
std::true_type /* blocks aligned */)const
|
||||
{
|
||||
return subfilter::check(*reinterpret_cast<const block_type*>(p),hash);
|
||||
}
|
||||
|
||||
BOOST_FORCEINLINE bool get(
|
||||
const unsigned char* p,boost::uint64_t hash,
|
||||
const unsigned char* p,std::uint64_t hash,
|
||||
std::false_type /* blocks not aligned */)const
|
||||
{
|
||||
block_type x;
|
||||
@@ -660,20 +661,20 @@ private:
|
||||
return subfilter::check(x,hash);
|
||||
}
|
||||
|
||||
BOOST_FORCEINLINE void set(unsigned char* p,boost::uint64_t hash)
|
||||
BOOST_FORCEINLINE void set(unsigned char* p,std::uint64_t hash)
|
||||
{
|
||||
return set(p,hash,std::integral_constant<bool,are_blocks_aligned>{});
|
||||
}
|
||||
|
||||
BOOST_FORCEINLINE void set(
|
||||
unsigned char* p,boost::uint64_t hash,
|
||||
unsigned char* p,std::uint64_t hash,
|
||||
std::true_type /* blocks aligned */)
|
||||
{
|
||||
subfilter::mark(*reinterpret_cast<block_type*>(p),hash);
|
||||
}
|
||||
|
||||
BOOST_FORCEINLINE void set(
|
||||
unsigned char* p,boost::uint64_t hash,
|
||||
unsigned char* p,std::uint64_t hash,
|
||||
std::false_type /* blocks not aligned */)
|
||||
{
|
||||
block_type x;
|
||||
@@ -683,9 +684,9 @@ private:
|
||||
}
|
||||
|
||||
BOOST_FORCEINLINE
|
||||
unsigned char* next_element(boost::uint64_t& h)noexcept
|
||||
unsigned char* next_element(std::uint64_t& h)noexcept
|
||||
{
|
||||
auto p=ar.buckets+hs.next_position(h)*bucket_size;
|
||||
auto p=ar.array+hs.next_position(h)*stride;
|
||||
for(std::size_t i=0;i<prefetched_cachelines;++i){
|
||||
BOOST_BLOOM_PREFETCH_WRITE((unsigned char*)p+i*cacheline);
|
||||
}
|
||||
@@ -693,9 +694,9 @@ private:
|
||||
}
|
||||
|
||||
BOOST_FORCEINLINE
|
||||
const unsigned char* next_element(boost::uint64_t& h)const noexcept
|
||||
const unsigned char* next_element(std::uint64_t& h)const noexcept
|
||||
{
|
||||
auto p=ar.buckets+hs.next_position(h)*bucket_size;
|
||||
auto p=ar.array+hs.next_position(h)*stride;
|
||||
for(std::size_t i=0;i<prefetched_cachelines;++i){
|
||||
BOOST_BLOOM_PREFETCH((unsigned char*)p+i*cacheline);
|
||||
}
|
||||
@@ -708,9 +709,9 @@ private:
|
||||
if(range()!=x.range()){
|
||||
BOOST_THROW_EXCEPTION(std::invalid_argument("incompatible filters"));
|
||||
}
|
||||
auto first0=ar.buckets,
|
||||
auto first0=ar.array,
|
||||
last0=first0+used_array_size(),
|
||||
first1=x.ar.buckets;
|
||||
first1=x.ar.array;
|
||||
while(first0!=last0)f(*first0++,*first1++);
|
||||
}
|
||||
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
#include <boost/bloom/detail/multiblock_fpr_base.hpp>
|
||||
#include <boost/bloom/detail/mulx64.hpp>
|
||||
#include <boost/config.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
@@ -29,9 +29,9 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
|
||||
{
|
||||
static constexpr std::size_t k=K;
|
||||
using value_type=__m256i[(k+7)/8];
|
||||
static constexpr std::size_t used_value_size=sizeof(boost::uint32_t)*k;
|
||||
static constexpr std::size_t used_value_size=sizeof(std::uint32_t)*k;
|
||||
|
||||
static BOOST_FORCEINLINE void mark(value_type& x,boost::uint64_t hash)
|
||||
static BOOST_FORCEINLINE void mark(value_type& x,std::uint64_t hash)
|
||||
{
|
||||
for(std::size_t i=0;i<k/8;++i){
|
||||
mark_m256i(x[i],hash,8);
|
||||
@@ -42,7 +42,7 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
|
||||
}
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE bool check(const value_type& x,boost::uint64_t hash)
|
||||
static BOOST_FORCEINLINE bool check(const value_type& x,std::uint64_t hash)
|
||||
{
|
||||
for(std::size_t i=0;i<k/8;++i){
|
||||
if(!check_m256i(x[i],hash,8))return false;
|
||||
@@ -56,7 +56,7 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
|
||||
|
||||
private:
|
||||
static BOOST_FORCEINLINE __m256i make_m256i(
|
||||
boost::uint64_t hash,std::size_t kp)
|
||||
std::uint64_t hash,std::size_t kp)
|
||||
{
|
||||
const __m256i ones[8]={
|
||||
_mm256_set_epi32(0,0,0,0,0,0,0,1),
|
||||
@@ -76,14 +76,14 @@ private:
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE void mark_m256i(
|
||||
__m256i& x,boost::uint64_t hash,std::size_t kp)
|
||||
__m256i& x,std::uint64_t hash,std::size_t kp)
|
||||
{
|
||||
__m256i h=make_m256i(hash,kp);
|
||||
x=_mm256_or_si256(x,h);
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE bool check_m256i(
|
||||
const __m256i& x,boost::uint64_t hash,std::size_t kp)
|
||||
const __m256i& x,std::uint64_t hash,std::size_t kp)
|
||||
{
|
||||
__m256i h=make_m256i(hash,kp);
|
||||
return _mm256_testc_si256(x,h);
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
#include <boost/bloom/detail/mulx64.hpp>
|
||||
#include <boost/bloom/detail/neon.hpp>
|
||||
#include <boost/config.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
@@ -28,11 +28,11 @@ namespace bloom{
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define BOOST_BLOOM_INIT_U32X4(w,x,y,z) \
|
||||
{(boost::uint32_t(w)+(unsigned long long(x)<<32)), \
|
||||
(boost::uint32_t(y)+(unsigned long long(z)<<32))}
|
||||
{(std::uint32_t(w)+(unsigned long long(x)<<32)), \
|
||||
(std::uint32_t(y)+(unsigned long long(z)<<32))}
|
||||
#else
|
||||
#define BOOST_BLOOM_INIT_U32X4(w,x,y,z) \
|
||||
{boost::uint32_t(w),boost::uint32_t(x),boost::uint32_t(y),boost::uint32_t(z)}
|
||||
{std::uint32_t(w),std::uint32_t(x),std::uint32_t(y),std::uint32_t(z)}
|
||||
#endif
|
||||
|
||||
#define BOOST_BLOOM_INIT_U32X4X2(w0,x0,y0,z0,w1,x1,y1,z1) \
|
||||
@@ -43,9 +43,9 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
|
||||
{
|
||||
static constexpr std::size_t k=K;
|
||||
using value_type=uint32x4x2_t[(k+7)/8];
|
||||
static constexpr std::size_t used_value_size=sizeof(boost::uint32_t)*k;
|
||||
static constexpr std::size_t used_value_size=sizeof(std::uint32_t)*k;
|
||||
|
||||
static BOOST_FORCEINLINE void mark(value_type& x,boost::uint64_t hash)
|
||||
static BOOST_FORCEINLINE void mark(value_type& x,std::uint64_t hash)
|
||||
{
|
||||
for(std::size_t i=0;i<k/8;++i){
|
||||
mark_uint32x4x2_t(x[i],hash,8);
|
||||
@@ -56,7 +56,7 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
|
||||
}
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE bool check(const value_type& x,boost::uint64_t hash)
|
||||
static BOOST_FORCEINLINE bool check(const value_type& x,std::uint64_t hash)
|
||||
{
|
||||
for(std::size_t i=0;i<k/8;++i){
|
||||
if(!check_uint32x4x2_t(x[i],hash,8))return false;
|
||||
@@ -70,7 +70,7 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
|
||||
|
||||
private:
|
||||
static BOOST_FORCEINLINE uint32x4x2_t make_uint32x4x2_t(
|
||||
boost::uint64_t hash,std::size_t kp)
|
||||
std::uint64_t hash,std::size_t kp)
|
||||
{
|
||||
static const uint32x4x2_t ones[8]={
|
||||
BOOST_BLOOM_INIT_U32X4X2(1,0,0,0,0,0,0,0),
|
||||
@@ -101,7 +101,7 @@ private:
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE void mark_uint32x4x2_t(
|
||||
uint32x4x2_t& x,boost::uint64_t hash,std::size_t kp)
|
||||
uint32x4x2_t& x,std::uint64_t hash,std::size_t kp)
|
||||
{
|
||||
uint32x4x2_t h=make_uint32x4x2_t(hash,kp);
|
||||
x.val[0]=vorrq_u32(x.val[0],h.val[0]);
|
||||
@@ -109,7 +109,7 @@ private:
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE bool check_uint32x4x2_t(
|
||||
const uint32x4x2_t& x,boost::uint64_t hash,std::size_t kp)
|
||||
const uint32x4x2_t& x,std::uint64_t hash,std::size_t kp)
|
||||
{
|
||||
uint32x4x2_t h=make_uint32x4x2_t(hash,kp);
|
||||
uint32x4_t lo=vtstq_u32(x.val[0],h.val[0]);
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
#include <boost/bloom/detail/mulx64.hpp>
|
||||
#include <boost/bloom/detail/sse2.hpp>
|
||||
#include <boost/config.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
#include <smmintrin.h>
|
||||
@@ -35,6 +35,7 @@ struct m128ix2
|
||||
__m128i lo,hi;
|
||||
};
|
||||
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
static inline int mm_testc_si128(__m128i x,__m128i y)
|
||||
{
|
||||
#ifdef __SSE4_1__
|
||||
@@ -51,9 +52,9 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
|
||||
{
|
||||
static constexpr std::size_t k=K;
|
||||
using value_type=detail::m128ix2[(k+7)/8];
|
||||
static constexpr std::size_t used_value_size=sizeof(boost::uint32_t)*k;
|
||||
static constexpr std::size_t used_value_size=sizeof(std::uint32_t)*k;
|
||||
|
||||
static BOOST_FORCEINLINE void mark(value_type& x,boost::uint64_t hash)
|
||||
static BOOST_FORCEINLINE void mark(value_type& x,std::uint64_t hash)
|
||||
{
|
||||
for(std::size_t i=0;i<k/8;++i){
|
||||
mark_m128ix2(x[i],hash,8);
|
||||
@@ -64,7 +65,7 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
|
||||
}
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE bool check(const value_type& x,boost::uint64_t hash)
|
||||
static BOOST_FORCEINLINE bool check(const value_type& x,std::uint64_t hash)
|
||||
{
|
||||
for(std::size_t i=0;i<k/8;++i){
|
||||
if(!check_m128ix2(x[i],hash,8))return false;
|
||||
@@ -78,10 +79,10 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
|
||||
|
||||
private:
|
||||
static BOOST_FORCEINLINE detail::m128ix2 make_m128ix2(
|
||||
boost::uint64_t hash,std::size_t kp)
|
||||
std::uint64_t hash,std::size_t kp)
|
||||
{
|
||||
const boost::uint32_t mask=boost::uint32_t(31)<<23,
|
||||
exp=boost::uint32_t(127)<<23;
|
||||
const std::uint32_t mask=std::uint32_t(31)<<23,
|
||||
exp=std::uint32_t(127)<<23;
|
||||
const __m128i exps[4]={
|
||||
_mm_set_epi32( 0 , 0 , 0 ,exp),
|
||||
_mm_set_epi32( 0 , 0 ,exp,exp),
|
||||
@@ -113,7 +114,7 @@ private:
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE void mark_m128ix2(
|
||||
detail::m128ix2& x,boost::uint64_t hash,std::size_t kp)
|
||||
detail::m128ix2& x,std::uint64_t hash,std::size_t kp)
|
||||
{
|
||||
detail::m128ix2 h=make_m128ix2(hash,kp);
|
||||
x.lo=_mm_or_si128(x.lo,h.lo);
|
||||
@@ -121,7 +122,7 @@ private:
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE bool check_m128ix2(
|
||||
const detail::m128ix2& x,boost::uint64_t hash,std::size_t kp)
|
||||
const detail::m128ix2& x,std::uint64_t hash,std::size_t kp)
|
||||
{
|
||||
detail::m128ix2 h=make_m128ix2(hash,kp);
|
||||
auto res=detail::mm_testc_si128(x.lo,h.lo);
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
#include <boost/bloom/detail/multiblock_fpr_base.hpp>
|
||||
#include <boost/bloom/detail/mulx64.hpp>
|
||||
#include <boost/config.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
@@ -38,9 +38,9 @@ struct fast_multiblock64:detail::multiblock_fpr_base<K>
|
||||
{
|
||||
static constexpr std::size_t k=K;
|
||||
using value_type=detail::m256ix2[(k+7)/8];
|
||||
static constexpr std::size_t used_value_size=sizeof(boost::uint64_t)*k;
|
||||
static constexpr std::size_t used_value_size=sizeof(std::uint64_t)*k;
|
||||
|
||||
static BOOST_FORCEINLINE void mark(value_type& x,boost::uint64_t hash)
|
||||
static BOOST_FORCEINLINE void mark(value_type& x,std::uint64_t hash)
|
||||
{
|
||||
for(int i=0;i<k/8;++i){
|
||||
mark_m256ix2(x[i],hash,8);
|
||||
@@ -51,7 +51,7 @@ struct fast_multiblock64:detail::multiblock_fpr_base<K>
|
||||
}
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE bool check(const value_type& x,boost::uint64_t hash)
|
||||
static BOOST_FORCEINLINE bool check(const value_type& x,std::uint64_t hash)
|
||||
{
|
||||
for(int i=0;i<k/8;++i){
|
||||
if(!check_m256ix2(x[i],hash,8))return false;
|
||||
@@ -65,7 +65,7 @@ struct fast_multiblock64:detail::multiblock_fpr_base<K>
|
||||
|
||||
private:
|
||||
static BOOST_FORCEINLINE detail::m256ix2 make_m256ix2(
|
||||
boost::uint64_t hash,std::size_t kp)
|
||||
std::uint64_t hash,std::size_t kp)
|
||||
{
|
||||
const detail::m256ix2 ones[8]={
|
||||
{_mm256_set_epi64x(0,0,0,1),_mm256_set_epi64x(0,0,0,0)},
|
||||
@@ -92,7 +92,7 @@ private:
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE void mark_m256ix2(
|
||||
detail::m256ix2& x,boost::uint64_t hash,std::size_t kp)
|
||||
detail::m256ix2& x,std::uint64_t hash,std::size_t kp)
|
||||
{
|
||||
detail::m256ix2 h=make_m256ix2(hash,kp);
|
||||
x.lo=_mm256_or_si256(x.lo,h.lo);
|
||||
@@ -100,7 +100,7 @@ private:
|
||||
}
|
||||
|
||||
static BOOST_FORCEINLINE bool check_m256ix2(
|
||||
const detail::m256ix2& x,boost::uint64_t hash,std::size_t kp)
|
||||
const detail::m256ix2& x,std::uint64_t hash,std::size_t kp)
|
||||
{
|
||||
detail::m256ix2 h=make_m256ix2(hash,kp);
|
||||
auto res=_mm256_testc_si256(x.lo,h.lo);
|
||||
|
||||
@@ -10,9 +10,9 @@
|
||||
#ifndef BOOST_BLOOM_DETAIL_MULX64_HPP
|
||||
#define BOOST_BLOOM_DETAIL_MULX64_HPP
|
||||
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#if defined(_MSC_VER)&&!defined(__clang__)
|
||||
#include <intrin.h>
|
||||
@@ -24,16 +24,16 @@ namespace detail{
|
||||
|
||||
#if defined(_MSC_VER)&&defined(_M_X64)&&!defined(__clang__)
|
||||
|
||||
__forceinline boost::uint64_t umul128(
|
||||
boost::uint64_t x,boost::uint64_t y,boost::uint64_t& hi)
|
||||
__forceinline std::uint64_t umul128(
|
||||
std::uint64_t x,std::uint64_t y,std::uint64_t& hi)
|
||||
{
|
||||
return _umul128(x,y,&hi);
|
||||
}
|
||||
|
||||
#elif defined(_MSC_VER)&&defined(_M_ARM64)&&!defined(__clang__)
|
||||
|
||||
__forceinline boost::uint64_t umul128(
|
||||
boost::uint64_t x,boost::uint64_t y,boost::uint64_t& hi)
|
||||
__forceinline std::uint64_t umul128(
|
||||
std::uint64_t x,std::uint64_t y,std::uint64_t& hi)
|
||||
{
|
||||
hi=__umulh(x,y);
|
||||
return x*y;
|
||||
@@ -41,40 +41,42 @@ __forceinline boost::uint64_t umul128(
|
||||
|
||||
#elif defined(__SIZEOF_INT128__)
|
||||
|
||||
inline boost::uint64_t umul128(
|
||||
boost::uint64_t x,boost::uint64_t y,boost::uint64_t& hi)
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
inline std::uint64_t umul128(
|
||||
std::uint64_t x,std::uint64_t y,std::uint64_t& hi)
|
||||
{
|
||||
__uint128_t r=(__uint128_t)x*y;
|
||||
hi=(boost::uint64_t)(r>>64);
|
||||
return (boost::uint64_t)r;
|
||||
hi=(std::uint64_t)(r>>64);
|
||||
return (std::uint64_t)r;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline boost::uint64_t umul128(
|
||||
boost::uint64_t x,boost::uint64_t y,boost::uint64_t& hi)
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
inline std::uint64_t umul128(
|
||||
std::uint64_t x,std::uint64_t y,std::uint64_t& hi)
|
||||
{
|
||||
boost::uint64_t x1=(boost::uint32_t)x;
|
||||
boost::uint64_t x2=x >> 32;
|
||||
std::uint64_t x1=(std::uint32_t)x;
|
||||
std::uint64_t x2=x >> 32;
|
||||
|
||||
boost::uint64_t y1=(boost::uint32_t)y;
|
||||
boost::uint64_t y2=y >> 32;
|
||||
std::uint64_t y1=(std::uint32_t)y;
|
||||
std::uint64_t y2=y >> 32;
|
||||
|
||||
boost::uint64_t r3=x2*y2;
|
||||
std::uint64_t r3=x2*y2;
|
||||
|
||||
boost::uint64_t r2a=x1*y2;
|
||||
std::uint64_t r2a=x1*y2;
|
||||
|
||||
r3+=r2a>>32;
|
||||
|
||||
boost::uint64_t r2b=x2*y1;
|
||||
std::uint64_t r2b=x2*y1;
|
||||
|
||||
r3+=r2b>>32;
|
||||
|
||||
boost::uint64_t r1=x1*y1;
|
||||
std::uint64_t r1=x1*y1;
|
||||
|
||||
boost::uint64_t r2=(r1>>32)+(boost::uint32_t)r2a+(boost::uint32_t)r2b;
|
||||
std::uint64_t r2=(r1>>32)+(std::uint32_t)r2a+(std::uint32_t)r2b;
|
||||
|
||||
r1=(r2<<32)+(boost::uint32_t)r1;
|
||||
r1=(r2<<32)+(std::uint32_t)r1;
|
||||
r3+=r2>>32;
|
||||
|
||||
hi=r3;
|
||||
@@ -83,11 +85,12 @@ inline boost::uint64_t umul128(
|
||||
|
||||
#endif
|
||||
|
||||
inline boost::uint64_t mulx64(boost::uint64_t x)noexcept
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
inline std::uint64_t mulx64(std::uint64_t x)noexcept
|
||||
{
|
||||
/* multiplier is 2^64/phi */
|
||||
boost::uint64_t hi;
|
||||
boost::uint64_t lo=umul128(x,0x9E3779B97F4A7C15ull,hi);
|
||||
std::uint64_t hi;
|
||||
std::uint64_t lo=umul128(x,0x9E3779B97F4A7C15ull,hi);
|
||||
return hi^lo;
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,9 @@
|
||||
#ifndef BOOST_BLOOM_DETAIL_TYPE_TRAITS_HPP
|
||||
#define BOOST_BLOOM_DETAIL_TYPE_TRAITS_HPP
|
||||
|
||||
#include <boost/config.hpp>
|
||||
#include <boost/type_traits/make_void.hpp>
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
@@ -86,6 +88,54 @@ template<typename T,class Q=void>
|
||||
using enable_if_transparent_t=
|
||||
typename std::enable_if<is_transparent<T>::value,Q>::type;
|
||||
|
||||
template<typename T>
|
||||
struct is_integral_or_extended_integral:std::is_integral<T>{};
|
||||
template<typename T>
|
||||
struct is_unsigned_or_extended_unsigned:std::is_unsigned<T>{};
|
||||
|
||||
#if defined(__SIZEOF_INT128__)
|
||||
|
||||
#if defined(BOOST_GCC)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wpedantic"
|
||||
#endif
|
||||
|
||||
template<>
|
||||
struct is_integral_or_extended_integral<__int128>:std::true_type{};
|
||||
template<>
|
||||
struct is_integral_or_extended_integral<unsigned __int128>:std::true_type{};
|
||||
template<>
|
||||
struct is_unsigned_or_extended_unsigned<unsigned __int128>:std::true_type{};
|
||||
|
||||
#if defined(BOOST_GCC)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
struct is_unsigned_integral_or_extended_unsigned_integral:
|
||||
std::integral_constant<
|
||||
bool,
|
||||
is_integral_or_extended_integral<T>::value&&
|
||||
is_unsigned_or_extended_unsigned<T>::value
|
||||
>
|
||||
{};
|
||||
|
||||
template<typename T,template <typename...> class Trait>
|
||||
struct is_array_of:std::false_type{};
|
||||
|
||||
template<typename T,std::size_t N,template <typename...> class Trait>
|
||||
struct is_array_of<T[N],Trait>:Trait<T>{};
|
||||
|
||||
template<typename T> struct array_size:
|
||||
std::integral_constant<std::size_t,0>{};
|
||||
template<typename T,std::size_t N> struct array_size<T[N]>:
|
||||
std::integral_constant<std::size_t,N>{};
|
||||
|
||||
template<std::size_t N>
|
||||
struct is_power_of_two:std::integral_constant<bool,(N!=0)&&((N&(N-1))==0)>{};
|
||||
|
||||
} /* namespace detail */
|
||||
} /* namespace bloom */
|
||||
} /* namespace boost */
|
||||
|
||||
@@ -21,14 +21,14 @@
|
||||
#include <boost/bloom/detail/fast_multiblock32_neon.hpp>
|
||||
#else /* fallback */
|
||||
#include <boost/bloom/multiblock.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<std::size_t K>
|
||||
using fast_multiblock32=multiblock<boost::uint32_t,K>;
|
||||
using fast_multiblock32=multiblock<std::uint32_t,K>;
|
||||
|
||||
} /* namespace bloom */
|
||||
} /* namespace boost */
|
||||
|
||||
@@ -15,14 +15,14 @@
|
||||
#include <boost/bloom/detail/fast_multiblock64_avx2.hpp>
|
||||
#else /* fallback */
|
||||
#include <boost/bloom/multiblock.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<std::size_t K>
|
||||
using fast_multiblock64=multiblock<boost::uint64_t,K>;
|
||||
using fast_multiblock64=multiblock<std::uint64_t,K>;
|
||||
|
||||
} /* namespace bloom */
|
||||
} /* namespace boost */
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
#include <boost/bloom/detail/type_traits.hpp>
|
||||
#include <boost/config.hpp>
|
||||
#include <boost/container_hash/hash.hpp>
|
||||
#include <boost/container_hash/hash_is_avalanching.hpp>
|
||||
#include <boost/core/allocator_traits.hpp>
|
||||
#include <boost/core/empty_value.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <boost/unordered/hash_traits.hpp> // TODO: internalize?
|
||||
#include <cstdint>
|
||||
#include <initializer_list>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
@@ -37,57 +37,29 @@ namespace detail{
|
||||
* filter mixes hash results with mulx64 if the hash is not marked as
|
||||
* avalanching, i.e. it's not of good quality (see
|
||||
* <boost/unordered/hash_traits.hpp>), or if std::size_t is less than 64 bits
|
||||
* (mixing policies promote to boost::uint64_t).
|
||||
* (mixing policies promote to std::uint64_t).
|
||||
*/
|
||||
|
||||
struct no_mix_policy
|
||||
{
|
||||
template<typename Hash,typename T>
|
||||
static inline boost::uint64_t mix(const Hash& h,const T& x)
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
static inline std::uint64_t mix(const Hash& h,const T& x)
|
||||
{
|
||||
return (boost::uint64_t)h(x);
|
||||
return (std::uint64_t)h(x);
|
||||
}
|
||||
};
|
||||
|
||||
struct mulx64_mix_policy
|
||||
{
|
||||
template<typename Hash,typename T>
|
||||
static inline boost::uint64_t mix(const Hash& h,const T& x)
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
static inline std::uint64_t mix(const Hash& h,const T& x)
|
||||
{
|
||||
return mulx64((boost::uint64_t)h(x));
|
||||
return mulx64((std::uint64_t)h(x));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Allocator,typename T>
|
||||
class allocator_constructed
|
||||
{
|
||||
public:
|
||||
template<typename...Args>
|
||||
allocator_constructed(const Allocator& al_,Args&&... args):al{al_}
|
||||
{
|
||||
allocator_construct(al,std::addressof(u.x),std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
~allocator_constructed()
|
||||
{
|
||||
allocator_destroy(al,std::addressof(u.x));
|
||||
}
|
||||
|
||||
const T& value()const noexcept{return u.x;}
|
||||
|
||||
private:
|
||||
union uninitialized_value
|
||||
{
|
||||
uninitialized_value(){}
|
||||
~uninitialized_value(){}
|
||||
|
||||
T x;
|
||||
};
|
||||
|
||||
uninitialized_value u;
|
||||
Allocator al;
|
||||
};
|
||||
|
||||
} /* namespace detail */
|
||||
|
||||
#if defined(BOOST_MSVC)
|
||||
@@ -97,8 +69,8 @@ private:
|
||||
|
||||
template<
|
||||
typename T,std::size_t K,
|
||||
typename Subfilter=block<unsigned char,1>,std::size_t BucketSize=0,
|
||||
typename Hash=boost::hash<T>,typename Allocator=std::allocator<T>
|
||||
typename Subfilter=block<unsigned char,1>,std::size_t Stride=0,
|
||||
typename Hash=boost::hash<T>,typename Allocator=std::allocator<unsigned char>
|
||||
>
|
||||
class
|
||||
|
||||
@@ -108,20 +80,18 @@ __declspec(empty_bases) /* activate EBO with multiple inheritance */
|
||||
|
||||
filter:
|
||||
detail::filter_core<
|
||||
K,Subfilter,BucketSize,allocator_rebind_t<Allocator,unsigned char>
|
||||
K,Subfilter,Stride,allocator_rebind_t<Allocator,unsigned char>
|
||||
>,
|
||||
empty_value<Hash,0>
|
||||
{
|
||||
BOOST_BLOOM_STATIC_ASSERT_IS_CV_UNQUALIFIED_OBJECT(T);
|
||||
static_assert(
|
||||
std::is_same<T,allocator_value_type_t<Allocator>>::value,
|
||||
"Allocator's value_type must be T");
|
||||
using super=detail::filter_core<
|
||||
K,Subfilter,BucketSize,allocator_rebind_t<Allocator,unsigned char>
|
||||
>;
|
||||
std::is_same<unsigned char,allocator_value_type_t<Allocator>>::value,
|
||||
"Allocator's value_type must be unsigned char");
|
||||
using super=detail::filter_core<K,Subfilter,Stride,Allocator>;
|
||||
using mix_policy=typename std::conditional<
|
||||
unordered::hash_is_avalanching<Hash>::value&&
|
||||
sizeof(std::size_t)>=sizeof(boost::uint64_t),
|
||||
boost::hash_is_avalanching<Hash>::value&&
|
||||
sizeof(std::size_t)>=sizeof(std::uint64_t),
|
||||
detail::no_mix_policy,
|
||||
detail::mulx64_mix_policy
|
||||
>::type;
|
||||
@@ -130,7 +100,7 @@ public:
|
||||
using value_type=T;
|
||||
using super::k;
|
||||
using subfilter=typename super::subfilter;
|
||||
using super::bucket_size;
|
||||
using super::stride;
|
||||
using hasher=Hash;
|
||||
using allocator_type=Allocator;
|
||||
using size_type=typename super::size_type;
|
||||
@@ -258,23 +228,6 @@ public:
|
||||
using super::fpr_for;
|
||||
using super::array;
|
||||
|
||||
template<typename... Args>
|
||||
BOOST_FORCEINLINE void emplace(Args&&... args)
|
||||
{
|
||||
insert(detail::allocator_constructed<allocator_type,value_type>{
|
||||
get_allocator(),std::forward<Args>(args)...}.value());
|
||||
}
|
||||
|
||||
template<
|
||||
typename U,
|
||||
typename std::enable_if<
|
||||
std::is_same<T,detail::remove_cvref_t<U>>::value>::type* =nullptr
|
||||
>
|
||||
BOOST_FORCEINLINE void emplace(U&& x)
|
||||
{
|
||||
insert(x); /* avoid value_type construction */
|
||||
}
|
||||
|
||||
BOOST_FORCEINLINE void insert(const T& x)
|
||||
{
|
||||
super::insert(hash_for(x));
|
||||
@@ -292,7 +245,7 @@ public:
|
||||
template<typename InputIterator>
|
||||
void insert(InputIterator first,InputIterator last)
|
||||
{
|
||||
while(first!=last)emplace(*first++);
|
||||
while(first!=last)insert(*first++);
|
||||
}
|
||||
|
||||
void insert(std::initializer_list<value_type> il)
|
||||
@@ -346,10 +299,10 @@ public:
|
||||
|
||||
private:
|
||||
template<
|
||||
typename T1,std::size_t K1,typename S,std::size_t B,typename H,typename A
|
||||
typename T1,std::size_t K1,typename SF,std::size_t S,typename H,typename A
|
||||
>
|
||||
bool friend operator==(
|
||||
const filter<T1,K1,S,B,H,A>& x,const filter<T1,K1,S,B,H,A>& y);
|
||||
const filter<T1,K1,SF,S,H,A>& x,const filter<T1,K1,SF,S,H,A>& y);
|
||||
|
||||
using hash_base=empty_value<Hash,0>;
|
||||
|
||||
@@ -357,33 +310,34 @@ private:
|
||||
Hash& h(){return hash_base::get();}
|
||||
|
||||
template<typename U>
|
||||
inline boost::uint64_t hash_for(const U& x)const
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
inline std::uint64_t hash_for(const U& x)const
|
||||
{
|
||||
return mix_policy::mix(h(),x);
|
||||
}
|
||||
};
|
||||
|
||||
template<
|
||||
typename T,std::size_t K,typename S,std::size_t B,typename H,typename A
|
||||
typename T,std::size_t K,typename SF,std::size_t S,typename H,typename A
|
||||
>
|
||||
bool operator==(const filter<T,K,S,B,H,A>& x,const filter<T,K,S,B,H,A>& y)
|
||||
bool operator==(const filter<T,K,SF,S,H,A>& x,const filter<T,K,SF,S,H,A>& y)
|
||||
{
|
||||
using super=typename filter<T,K,S,B,H,A>::super;
|
||||
using super=typename filter<T,K,SF,S,H,A>::super;
|
||||
return static_cast<const super&>(x)==static_cast<const super&>(y);
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,std::size_t K,typename S,std::size_t B,typename H,typename A
|
||||
typename T,std::size_t K,typename SF,std::size_t S,typename H,typename A
|
||||
>
|
||||
bool operator!=(const filter<T,K,S,B,H,A>& x,const filter<T,K,S,B,H,A>& y)
|
||||
bool operator!=(const filter<T,K,SF,S,H,A>& x,const filter<T,K,SF,S,H,A>& y)
|
||||
{
|
||||
return !(x==y);
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,std::size_t K,typename S,std::size_t B,typename H,typename A
|
||||
typename T,std::size_t K,typename SF,std::size_t S,typename H,typename A
|
||||
>
|
||||
void swap(filter<T,K,S,B,H,A>& x,filter<T,K,S,B,H,A>& y)
|
||||
void swap(filter<T,K,SF,S,H,A>& x,filter<T,K,SF,S,H,A>& y)
|
||||
noexcept(noexcept(x.swap(y)))
|
||||
{
|
||||
x.swap(y);
|
||||
|
||||
@@ -10,31 +10,35 @@
|
||||
#define BOOST_BLOOM_MULTIBLOCK_HPP
|
||||
|
||||
#include <boost/bloom/detail/block_base.hpp>
|
||||
#include <boost/bloom/detail/block_ops.hpp>
|
||||
#include <boost/bloom/detail/multiblock_fpr_base.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<typename Block,std::size_t K>
|
||||
struct multiblock:
|
||||
private detail::block_base<Block,K>,public detail::multiblock_fpr_base<K>
|
||||
public detail::multiblock_fpr_base<K>,
|
||||
private detail::block_base<Block,K>
|
||||
{
|
||||
static constexpr std::size_t k=K;
|
||||
using value_type=Block[k];
|
||||
|
||||
static inline void mark(value_type& x,boost::uint64_t hash)
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
static inline void mark(value_type& x,std::uint64_t hash)
|
||||
{
|
||||
std::size_t i=0;
|
||||
loop(hash,[&](boost::uint64_t h){x[i++]|=Block(1)<<(h&mask);});
|
||||
loop(hash,[&](std::uint64_t h){block_ops::set(x[i++],h&mask);});
|
||||
}
|
||||
|
||||
static inline bool check(const value_type& x,boost::uint64_t hash)
|
||||
/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
|
||||
static inline bool check(const value_type& x,std::uint64_t hash)
|
||||
{
|
||||
Block res=1;
|
||||
int res=1;
|
||||
std::size_t i=0;
|
||||
loop(hash,[&](boost::uint64_t h){res&=(x[i++]>>(h&mask));});
|
||||
loop(hash,[&](std::uint64_t h){block_ops::reduce(res,x[i++],h&mask);});
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -42,6 +46,7 @@ private:
|
||||
using super=detail::block_base<Block,K>;
|
||||
using super::mask;
|
||||
using super::loop;
|
||||
using block_ops=detail::block_ops<Block>;
|
||||
};
|
||||
|
||||
} /* namespace bloom */
|
||||
|
||||
@@ -8,12 +8,12 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<meta http-equiv="refresh" content="0; URL=doc/index.html">
|
||||
<meta http-equiv="refresh" content="0; URL=doc/html/bloom.html">
|
||||
<title>Boost.Bloom Documentation</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
Automatic redirection failed, please go to
|
||||
<a href="doc/index.html">doc/index.html</a>
|
||||
<a href="doc/html/bloom.html">doc/html/bloom.html</a>
|
||||
</body>
|
||||
</html>
|
||||
13
test/CMakeLists.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
# Copyright 2018, 2019, 2021, 2022 Peter Dimov
|
||||
# Copyright 2025 Joaquin M Lopez Muñoz
|
||||
# Distributed under the Boost Software License, Version 1.0.
|
||||
# See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt
|
||||
|
||||
include(BoostTestJamfile OPTIONAL RESULT_VARIABLE HAVE_BOOST_TEST)
|
||||
|
||||
if(HAVE_BOOST_TEST)
|
||||
|
||||
boost_test_jamfile(FILE Jamfile.v2
|
||||
LINK_LIBRARIES Boost::bloom Boost::core Boost::mp11)
|
||||
|
||||
endif()
|
||||
@@ -18,12 +18,11 @@ project
|
||||
<toolset>msvc:<cxxflags>-D_SCL_SECURE_NO_WARNINGS
|
||||
;
|
||||
|
||||
test-suite "bloom" :
|
||||
[ run test_array.cpp ]
|
||||
[ run test_capacity.cpp ]
|
||||
[ run test_combination.cpp ]
|
||||
[ run test_comparison.cpp ]
|
||||
[ run test_construction.cpp ]
|
||||
[ run test_fpr.cpp ]
|
||||
[ run test_insertion.cpp ]
|
||||
;
|
||||
run test_array.cpp ;
|
||||
run test_boost_bloom_hpp.cpp ;
|
||||
run test_capacity.cpp ;
|
||||
run test_combination.cpp ;
|
||||
run test_comparison.cpp ;
|
||||
run test_construction.cpp ;
|
||||
run test_fpr.cpp ;
|
||||
run test_insertion.cpp ;
|
||||
25
test/test_boost_bloom_hpp.cpp
Normal file
@@ -0,0 +1,25 @@
|
||||
/* Copyright 2025 Joaquin M Lopez Munoz.
|
||||
* Distributed under the Boost Software License, Version 1.0.
|
||||
* (See accompanying file LICENSE_1_0.txt or copy at
|
||||
* http://www.boost.org/LICENSE_1_0.txt)
|
||||
*
|
||||
* See https://www.boost.org/libs/bloom for library home page.
|
||||
*/
|
||||
|
||||
#include <boost/bloom.hpp>
|
||||
#include <boost/core/lightweight_test.hpp>
|
||||
|
||||
struct use_types
|
||||
{
|
||||
using type1=boost::bloom::filter<int,1>;
|
||||
using type2=boost::bloom::block<unsigned char,1>;
|
||||
using type3=boost::bloom::multiblock<unsigned char,1>;
|
||||
using type4=boost::bloom::fast_multiblock32<1>;
|
||||
using type5=boost::bloom::fast_multiblock64<1>;
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
(void)use_types{};
|
||||
return boost::report_errors();
|
||||
}
|
||||
@@ -43,9 +43,7 @@ struct counting_allocator
|
||||
template<typename Filter,typename ValueFactory>
|
||||
void test_capacity()
|
||||
{
|
||||
using filter=realloc_filter<
|
||||
Filter,counting_allocator<typename Filter::value_type>
|
||||
>;
|
||||
using filter=realloc_filter<Filter,counting_allocator<unsigned char>>;
|
||||
|
||||
ValueFactory fac;
|
||||
|
||||
|
||||
@@ -88,7 +88,7 @@ void test_pocxx()
|
||||
static constexpr auto always_equal=AlwaysEqual::value;
|
||||
using filter=realloc_filter<
|
||||
rehash_filter<Filter,stateful<typename Filter::hasher>>,
|
||||
stateful_allocator<typename Filter::value_type,Propagate,AlwaysEqual>
|
||||
stateful_allocator<unsigned char,Propagate,AlwaysEqual>
|
||||
>;
|
||||
using value_type=typename filter::value_type;
|
||||
using hasher=typename filter::hasher;
|
||||
@@ -170,7 +170,7 @@ void test_construction()
|
||||
{
|
||||
using filter=realloc_filter<
|
||||
rehash_filter<Filter,stateful<typename Filter::hasher>>,
|
||||
stateful_allocator<typename Filter::value_type>
|
||||
stateful_allocator<unsigned char>
|
||||
>;
|
||||
using value_type=typename filter::value_type;
|
||||
using hasher=typename filter::hasher;
|
||||
@@ -443,65 +443,6 @@ void test_construction()
|
||||
}
|
||||
}
|
||||
|
||||
struct allocator_only_constructible
|
||||
{
|
||||
allocator_only_constructible()=delete;
|
||||
~allocator_only_constructible()=delete;
|
||||
|
||||
int n;
|
||||
};
|
||||
|
||||
struct allocator_only_constructible_hash
|
||||
{
|
||||
using is_transparent=void;
|
||||
|
||||
std::size_t operator()(const allocator_only_constructible& x)const
|
||||
{
|
||||
return (*this)(x.n);
|
||||
}
|
||||
|
||||
std::size_t operator()(int n)const
|
||||
{
|
||||
return boost::hash<int>{}(n);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct constructing_allocator
|
||||
{
|
||||
using value_type=T;
|
||||
|
||||
constructing_allocator()=default;
|
||||
template<typename U>
|
||||
constructing_allocator(const constructing_allocator<U>&){}
|
||||
|
||||
T* allocate(std::size_t n)
|
||||
{
|
||||
return static_cast<T*>(::operator new(n*sizeof(T)));
|
||||
}
|
||||
|
||||
void deallocate(T* p,std::size_t){::operator delete(p);}
|
||||
|
||||
void construct(allocator_only_constructible* p,int n){p->n=n;}
|
||||
void destroy(allocator_only_constructible* p){}
|
||||
|
||||
bool operator==(const constructing_allocator& x)const{return true;}
|
||||
bool operator!=(const constructing_allocator& x)const{return false;}
|
||||
};
|
||||
|
||||
void test_allocator_aware_construction()
|
||||
{
|
||||
using value_type=allocator_only_constructible;
|
||||
using filter=boost::bloom::filter<
|
||||
value_type,5,boost::bloom::block<unsigned char,1>,0,
|
||||
allocator_only_constructible_hash,constructing_allocator<value_type>
|
||||
>;
|
||||
|
||||
filter f(1000);
|
||||
f.emplace(42);
|
||||
BOOST_TEST(f.may_contain(42));
|
||||
}
|
||||
|
||||
struct lambda
|
||||
{
|
||||
template<typename T>
|
||||
@@ -517,6 +458,5 @@ struct lambda
|
||||
int main()
|
||||
{
|
||||
boost::mp11::mp_for_each<identity_test_types>(lambda{});
|
||||
test_allocator_aware_construction();
|
||||
return boost::report_errors();
|
||||
}
|
||||
|
||||
@@ -55,10 +55,7 @@ void test_fpr()
|
||||
{
|
||||
using filter=rehash_filter<
|
||||
revalue_filter<
|
||||
realloc_filter<
|
||||
Filter,
|
||||
throwing_allocator<typename Filter::value_type>
|
||||
>,
|
||||
realloc_filter<Filter,throwing_allocator<unsigned char>>,
|
||||
std::string
|
||||
>,
|
||||
boost::hash<std::string>
|
||||
|
||||
@@ -50,21 +50,6 @@ void test_insertion()
|
||||
filter f(10000);
|
||||
ValueFactory fac;
|
||||
|
||||
{
|
||||
auto x=fac();
|
||||
f.emplace(x,0,"hello",3.1416);
|
||||
BOOST_TEST(f.may_contain(value_type{x,1}));
|
||||
}
|
||||
{
|
||||
auto x=fac();
|
||||
f.emplace(value_type{x,0,"boost"}); /* must avoid value_type move ctor */
|
||||
BOOST_TEST(f.may_contain(value_type{x,1}));
|
||||
}
|
||||
{
|
||||
value_type x{fac(),0,"boost"};
|
||||
f.emplace(x); /* same with copy ctor */
|
||||
BOOST_TEST(f.may_contain(x));
|
||||
}
|
||||
{
|
||||
value_type x{fac(),0};
|
||||
f.insert(const_cast<value_type&>(x));
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
#include <boost/bloom/fast_multiblock64.hpp>
|
||||
#include <boost/bloom/filter.hpp>
|
||||
#include <boost/bloom/multiblock.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
#include <boost/mp11/algorithm.hpp>
|
||||
#include <boost/mp11/list.hpp>
|
||||
#include <boost/mp11/utility.hpp>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
using test_types=boost::mp11::mp_list<
|
||||
@@ -25,10 +25,16 @@ using test_types=boost::mp11::mp_list<
|
||||
int,2
|
||||
>,
|
||||
boost::bloom::filter<
|
||||
std::string,1,boost::bloom::block<boost::uint16_t,3>,1
|
||||
std::string,1,boost::bloom::block<std::uint16_t,3>,1
|
||||
>,
|
||||
boost::bloom::filter<
|
||||
std::size_t,1,boost::bloom::multiblock<boost::uint64_t,3>
|
||||
int,1,boost::bloom::block<std::uint32_t[4],4>
|
||||
>,
|
||||
boost::bloom::filter<
|
||||
std::size_t,1,boost::bloom::multiblock<std::uint64_t,3>
|
||||
>,
|
||||
boost::bloom::filter<
|
||||
std::size_t,1,boost::bloom::multiblock<unsigned char[4],3>,1
|
||||
>,
|
||||
boost::bloom::filter<
|
||||
unsigned char,1,boost::bloom::fast_multiblock32<5>,2
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
#define BOOST_BLOOM_TEST_TEST_UTILITIES_HPP
|
||||
|
||||
#include <boost/bloom/filter.hpp>
|
||||
#include <boost/core/allocator_traits.hpp>
|
||||
#include <limits>
|
||||
#include <new>
|
||||
#include <string>
|
||||
@@ -44,7 +43,7 @@ template<
|
||||
>
|
||||
struct revalue_filter_impl<boost::bloom::filter<T,K,S,B,H,A>,U>
|
||||
{
|
||||
using type=boost::bloom::filter<U,K,S,B,H,boost::allocator_rebind_t<A,U>>;
|
||||
using type=boost::bloom::filter<U,K,S,B,H,A>;
|
||||
};
|
||||
|
||||
template<typename Filter,typename U>
|
||||
|
||||