review feedback (#32)

* removed superfluous inline (Alexander Grund) * made hasher equivalence a precondition for &=/|= (Andrzej Krzemienski) * documented exception safety guarantees (Andrzej Krzemienski) * mentioned Bloom filters are called so after Burton H Bloom (Dmitry Arkhipov) * added warning about OOM for very small FPR (Ivan Matek) * stressed config chart x axis is capacity/num elements rather than plain capacity (Ivan Matek) * s/[SIMD] is available/is enabled at compile time (Ivan Matek) * shut down clang-tidy warnings (Ivan Matek) * used "set union" for more clarity (Andrzej Krzemienski) * stressed early on that boost::bloom::filter is _not_ a container (Claudio DeSouza) * added bulk operations to roadmap (Dmitry Arkhipov) * added try_insert to roadmap (Konstantin Savvidy) * added estimated_size to roadmap (Konstantin Savvidy) * added alternative filters to roadmap (Konstantin Savvidy) * used <cstdint> instead of <boost/cstdint.hpp> (Rubén Pérez) * mentioned endianness when serializing filters (Rubén Pérez) * corrected sloppiness about optimum k determination (Tomer Vromen) * added run-time specification of k to roadmap (Tomer Vromen) * added test/CMakeLists.txt (Rubén Pérez) * added CMake-based testing to GHA (Rubén Pérez) (#8) * added <boost/bloom.hpp> (Rubén Pérez) * added Codecov reporting (Rubén Pérez) (#9) * moved from boost::unordered::hash_is_avalanching to ContainerHash's boost::hash_is_avalanching (Ivan Matek/Peter Dimov) * added syntax highlighting to code snippets (Rubén Pérez) * avoided C-style casts in examples (Rubén Pérez) * added acknowledgements section (Peter Turcan) * added Getting Started section (Peter Turcan) * fixed example Jamfile and added example building to CI (Rubén Pérez) (#10) * added diagram about overlapping vs. non-overlapping subarrays (Rubén Pérez/Ivan Matek/Vinnie Falco) * made first code snippet self-contained (Rubén Pérez/Peter Turcan) * added more comments to genome.cpp (Rubén Pérez) * added support for arrays as blocks (Tomer Vromen) (#24) * removed emplace (Seth Heeren/Peter Dimov) (#25) * required the allocator to be of unsigned char (Seth Heeren/Peter Dimov) (#26) * added compile-time validation of Block types (Rubén Pérez) (#27) * added value type to displayed filter names in tables (Tomer Vromen) (#28) * used -march=native rather than -mavx2 (Ivan Matek) * adopted hash strategy with fastrange plus a separate MCG (Kostas Savvidis/Peter Dimov) (#30) * several maintenance commits
2026-01-19 04:02:11 +00:00 · 2025-06-24 23:27:54 +02:00
parent 0ec3f1813b
commit 2592193066
64 changed files with 2369 additions and 1329 deletions
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -0,0 +1,23 @@
+# Copyright 2019 - 2021 Alexander Grund
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt)
+#
+# Sample codecov configuration file. Edit as required
+
+codecov:
+  max_report_age: off
+  require_ci_to_pass: yes
+  notify:
+    # Increase this if you have multiple coverage collection jobs
+    after_n_builds: 1
+    wait_for_ci: yes
+
+# Change how pull request comments look
+comment:
+  layout: "reach,diff,flags,files,footer"
+
+# Ignore specific files or folders. Glob patterns are supported.
+# See https://docs.codecov.com/docs/ignoring-paths
+ignore:
+  - extra/**/*
+  # - test/**/*
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -11,6 +11,8 @@ on:

 env:
  UBSAN_OPTIONS: print_stacktrace=1
+  B2_CI_VERSION: 1
+  LCOV_BRANCH_COVERAGE: 0

 jobs:
  posix:
@@ -246,19 +248,17 @@ jobs:
          export ADDRMD=${{matrix.address-model}}
          ./b2 -j3 libs/$LIBRARY/test toolset=${{matrix.toolset}} cxxstd=${{matrix.cxxstd}} ${ADDRMD:+address-model=$ADDRMD} variant=debug,release

+      - name: Compile examples
+        run: |
+          cd ../boost-root
+          export ADDRMD=${{matrix.address-model}}
+          ./b2 -j3 libs/$LIBRARY/example toolset=${{matrix.toolset}} cxxstd=${{matrix.cxxstd}} ${ADDRMD:+address-model=$ADDRMD} variant=debug,release
+
  windows:
    strategy:
      fail-fast: false
      matrix:
        include:
-          - toolset: msvc-14.0
-            cxxstd: 14,latest
-            addrmd: 32,64
-            os: windows-2019
-          - toolset: msvc-14.2
-            cxxstd: "14,17,20,latest"
-            addrmd: 32,64
-            os: windows-2019
          - toolset: msvc-14.3
            cxxstd: "14,17,20,latest"
            addrmd: 32,64
@@ -270,7 +270,7 @@ jobs:
          - toolset: gcc
            cxxstd: "11,14,17,2a"
            addrmd: 64
-            os: windows-2019
+            os: windows-2022

    runs-on: ${{matrix.os}}

@@ -305,3 +305,201 @@ jobs:
        run: |
          cd ../boost-root
          b2 -j3 libs/%LIBRARY%/test toolset=${{matrix.toolset}} cxxstd=${{matrix.cxxstd}} address-model=${{matrix.addrmd}} variant=debug,release embed-manifest-via=linker
+
+      - name: Compile examples
+        shell: cmd
+        run: |
+          cd ../boost-root
+          b2 -j3 libs/%LIBRARY%/example toolset=${{matrix.toolset}} cxxstd=${{matrix.cxxstd}} address-model=${{matrix.addrmd}} variant=debug,release embed-manifest-via=linker
+
+  posix-cmake-test:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: ubuntu-22.04
+          - os: ubuntu-24.04
+          - os: macos-13
+          - os: macos-14
+          - os: macos-15
+
+    runs-on: ${{matrix.os}}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install packages
+        if: matrix.install
+        run: sudo apt-get -y install ${{matrix.install}}
+
+      - name: Setup Boost
+        run: |
+          echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY
+          LIBRARY=${GITHUB_REPOSITORY#*/}
+          echo LIBRARY: $LIBRARY
+          echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV
+          echo GITHUB_BASE_REF: $GITHUB_BASE_REF
+          echo GITHUB_REF: $GITHUB_REF
+          REF=${GITHUB_BASE_REF:-$GITHUB_REF}
+          REF=${REF#refs/heads/}
+          echo REF: $REF
+          BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true
+          echo BOOST_BRANCH: $BOOST_BRANCH
+          cd ..
+          git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root
+          cd boost-root
+          mkdir -p libs/$LIBRARY
+          cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY
+          git submodule update --init tools/boostdep
+          python tools/boostdep/depinst/depinst.py -I benchmark -I example --git_args "--jobs 3" $LIBRARY
+
+      - name: Configure
+        run: |
+          cd ../boost-root
+          mkdir __build__ && cd __build__
+          cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON ..
+
+      - name: Build tests
+        run: |
+          cd ../boost-root/__build__
+          cmake --build . --target tests
+
+      - name: Run tests
+        run: |
+          cd ../boost-root/__build__
+          ctest --output-on-failure --no-tests=error
+
+  windows-cmake-test:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: windows-2022
+
+    runs-on: ${{matrix.os}}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Boost
+        shell: cmd
+        run: |
+          echo GITHUB_REPOSITORY: %GITHUB_REPOSITORY%
+          for /f %%i in ("%GITHUB_REPOSITORY%") do set LIBRARY=%%~nxi
+          echo LIBRARY: %LIBRARY%
+          echo LIBRARY=%LIBRARY%>>%GITHUB_ENV%
+          echo GITHUB_BASE_REF: %GITHUB_BASE_REF%
+          echo GITHUB_REF: %GITHUB_REF%
+          if "%GITHUB_BASE_REF%" == "" set GITHUB_BASE_REF=%GITHUB_REF%
+          set BOOST_BRANCH=develop
+          for /f %%i in ("%GITHUB_BASE_REF%") do if "%%~nxi" == "master" set BOOST_BRANCH=master
+          echo BOOST_BRANCH: %BOOST_BRANCH%
+          cd ..
+          git clone -b %BOOST_BRANCH% --depth 1 https://github.com/boostorg/boost.git boost-root
+          cd boost-root
+          mkdir -p libs\%LIBRARY% # remove when/if the library makes it into Boost
+          xcopy /s /e /q %GITHUB_WORKSPACE% libs\%LIBRARY%\
+          git submodule update --init tools/boostdep
+          python tools/boostdep/depinst/depinst.py -I benchmark -I example --git_args "--jobs 3" %LIBRARY%
+
+      - name: Configure
+        shell: cmd
+        run: |
+          cd ../boost-root
+          mkdir __build__ && cd __build__
+          cmake -DBOOST_INCLUDE_LIBRARIES=%LIBRARY% -DBUILD_TESTING=ON ..
+
+      - name: Build tests (Debug)
+        shell: cmd
+        run: |
+          cd ../boost-root/__build__
+          cmake --build . --target tests --config Debug
+
+      - name: Run tests (Debug)
+        shell: cmd
+        run: |
+          cd ../boost-root/__build__
+          ctest --output-on-failure --no-tests=error -C Debug
+
+      - name: Build tests (Release)
+        shell: cmd
+        run: |
+          cd ../boost-root/__build__
+          cmake --build . --target tests --config Release
+
+      - name: Run tests (Release)
+        shell: cmd
+        run: |
+          cd ../boost-root/__build__
+          ctest --output-on-failure --no-tests=error -C Release
+
+  codecov:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - toolset: gcc-14
+            cxxstd: "11"
+            os: ubuntu-24.04
+            install: g++-14-multilib
+            address-model: 32,64
+
+    runs-on: ${{matrix.os}}
+    container:
+      image: ${{matrix.container}}
+      volumes:
+        - /node20217:/node20217:rw,rshared
+        - ${{ startsWith(matrix.container, 'ubuntu:1') && '/node20217:/__e/node20:ro,rshared' || ' ' }}
+
+    defaults:
+      run:
+        shell: bash
+
+    steps:
+      - name: Setup container environment
+        if: matrix.container
+        run: |
+          apt-get update
+          apt-get -y install sudo python3 git g++ curl xz-utils
+          if [[ "${{matrix.container}}" == "ubuntu:1"* ]]; then
+            # Node 20 doesn't work with Ubuntu 16/18  glibc: https://github.com/actions/checkout/issues/1590
+            curl -sL https://archives.boost.io/misc/node/node-v20.9.0-linux-x64-glibc-217.tar.xz | tar -xJ --strip-components 1 -C /node20217
+          fi
+
+      - uses: actions/checkout@v4
+
+      - name: Fetch Boost.CI
+        uses: actions/checkout@v4
+        with:
+          repository: boostorg/boost-ci
+          ref: master
+          path: boost-ci-cloned
+
+      - name: Install Boost.CI
+        run: |
+          cp -r boost-ci-cloned/ci .
+          rm -rf boost-ci-cloned
+
+      - name: Install packages
+        if: matrix.install
+        run: |
+          sudo apt-get update
+          sudo apt-get -y install ${{matrix.install}}
+
+      - name: Setup Boost
+        env:
+          B2_TOOLSET: ${{matrix.toolset}}
+          B2_CXXSTD: ${{matrix.cxxstd}}
+          B2_ADDRESS_MODEL: ${{matrix.address-model}}
+        run: source ci/github/install.sh
+
+      - name: Setup coverage collection
+        run: ci/github/codecov.sh "setup"
+
+      - name: Run tests
+        run: ci/build.sh
+
+      - name: Upload coverage
+        env:
+          CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}}
+        run: ci/codecov.sh "upload"
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,7 +20,6 @@ target_link_libraries(boost_bloom
    Boost::core
    Boost::throw_exception
    Boost::type_traits
-    Boost::unordered
 )

 target_compile_features(boost_bloom INTERFACE cxx_std_11)
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # Candidate Boost Bloom Library

-[![Branch](https://img.shields.io/badge/branch-master-brightgreen.svg)](https://github.com/joaquintides/bloom/tree/master) [![CI](https://github.com/joaquintides/bloom/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/joaquintides/bloom/actions/workflows/ci.yml) [![Drone status](https://img.shields.io/drone/build/joaquintides/bloom/master?server=https%3A%2F%2Fdrone.cpp.al&logo=drone&logoColor=%23CCCCCC&label=CI)](https://drone.cpp.al/joaquintides/bloom) [![Documentation](https://img.shields.io/badge/docs-master-brightgreen.svg)](https://master.bloom.cpp.al/) </br>
-[![Branch](https://img.shields.io/badge/branch-develop-brightgreen.svg)](https://github.com/joaquintides/bloom/tree/develop) [![CI](https://github.com/joaquintides/bloom/actions/workflows/ci.yml/badge.svg?branch=develop)](https://github.com/joaquintides/bloom/actions/workflows/ci.yml) [![Drone status](https://img.shields.io/drone/build/joaquintides/bloom/develop?server=https%3A%2F%2Fdrone.cpp.al&logo=drone&logoColor=%23CCCCCC&label=CI)](https://drone.cpp.al/joaquintides/bloom) [![Documentation](https://img.shields.io/badge/docs-develop-brightgreen.svg)](https://develop.bloom.cpp.al/) </br>
+[![Branch](https://img.shields.io/badge/branch-master-brightgreen.svg)](https://github.com/joaquintides/bloom/tree/master) [![CI](https://github.com/joaquintides/bloom/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/joaquintides/bloom/actions/workflows/ci.yml) [![Drone status](https://img.shields.io/drone/build/joaquintides/bloom/master?server=https%3A%2F%2Fdrone.cpp.al&logo=drone&logoColor=%23CCCCCC&label=CI)](https://drone.cpp.al/joaquintides/bloom) [![codecov](https://codecov.io/gh/joaquintides/bloom/branch/master/graph/badge.svg)](https://app.codecov.io/gh/joaquintides/bloom/tree/master) [![Documentation](https://img.shields.io/badge/docs-master-brightgreen.svg)](https://master.bloom.cpp.al/) </br>
+[![Branch](https://img.shields.io/badge/branch-develop-brightgreen.svg)](https://github.com/joaquintides/bloom/tree/develop) [![CI](https://github.com/joaquintides/bloom/actions/workflows/ci.yml/badge.svg?branch=develop)](https://github.com/joaquintides/bloom/actions/workflows/ci.yml) [![Drone status](https://img.shields.io/drone/build/joaquintides/bloom/develop?server=https%3A%2F%2Fdrone.cpp.al&logo=drone&logoColor=%23CCCCCC&label=CI)](https://drone.cpp.al/joaquintides/bloom) [![codecov](https://codecov.io/gh/joaquintides/bloom/branch/develop/graph/badge.svg)](https://app.codecov.io/gh/joaquintides/bloom/tree/develop) [![Documentation](https://img.shields.io/badge/docs-develop-brightgreen.svg)](https://develop.bloom.cpp.al/) </br>
 [![BSL 1.0](https://img.shields.io/badge/license-BSL_1.0-blue.svg)](https://www.boost.org/users/license.html) <img alt="C++11 required" src="https://img.shields.io/badge/standard-C%2b%2b11-blue.svg"> <img alt="Header-only library" src="https://img.shields.io/badge/build-header--only-blue.svg">

 (Candidate) Boost.Bloom provides the class template `boost::bloom::filter` that
--- a/benchmark/comparison_table.cpp
+++ b/benchmark/comparison_table.cpp
@@ -53,11 +53,7 @@ void resume_timing()
  measure_start+=std::chrono::high_resolution_clock::now()-measure_pause;
 }

-#include <boost/bloom/block.hpp>
-#include <boost/bloom/fast_multiblock32.hpp>
-#include <boost/bloom/fast_multiblock64.hpp>
-#include <boost/bloom/filter.hpp>
-#include <boost/bloom/multiblock.hpp>
+#include <boost/bloom.hpp>
 #include <boost/core/detail/splitmix64.hpp>
 #include <boost/mp11/algorithm.hpp>
 #include <boost/mp11/list.hpp>
@@ -212,14 +208,14 @@ using namespace boost::bloom;
 template<std::size_t K1,std::size_t K2,std::size_t K3>
 using filters1=boost::mp11::mp_list<
  filter<int,K1>,
-  filter<int,1,block<boost::uint64_t,K2>>,
-  filter<int,1,block<boost::uint64_t,K3>,1>
+  filter<int,1,block<std::uint64_t,K2>>,
+  filter<int,1,block<std::uint64_t,K3>,1>
 >;

 template<std::size_t K1,std::size_t K2,std::size_t K3>
 using filters2=boost::mp11::mp_list<
-  filter<int,1,multiblock<boost::uint64_t,K1>>,
-  filter<int,1,multiblock<boost::uint64_t,K2>,1>,
+  filter<int,1,multiblock<std::uint64_t,K1>>,
+  filter<int,1,multiblock<std::uint64_t,K2>,1>,
  filter<int,1,fast_multiblock32<K3>>
 >;

@@ -230,6 +226,13 @@ using filters3=boost::mp11::mp_list<
  filter<int,1,fast_multiblock64<K3>,1>
 >;

+template<std::size_t K1,std::size_t K2,std::size_t K3>
+using filters4=boost::mp11::mp_list<
+  filter<int,1,block<std::uint64_t[8],K1>>,
+  filter<int,1,block<std::uint64_t[8],K2>,1>,
+  filter<int,1,multiblock<std::uint64_t[8],K3>>
+>;
+
 int main(int argc,char* argv[])
 {
  if(argc<2){
@@ -275,9 +278,9 @@ int main(int argc,char* argv[])
    "<table>\n"
    "  <tr>\n"
    "    <th></th>\n"
-    "    <th colspan=\"5\"><code>filter&lt;K></code></th>\n"
-    "    <th colspan=\"5\"><code>filter&lt;1,block&lt;uint64_t,K>></code></th>\n"
-    "    <th colspan=\"5\"><code>filter&lt;1,block&lt;uint64_t,K>,1></code></th>\n"
+    "    <th colspan=\"5\"><code>filter&lt;int,K></code></th>\n"
+    "    <th colspan=\"5\"><code>filter&lt;int,1,block&lt;uint64_t,K>></code></th>\n"
+    "    <th colspan=\"5\"><code>filter&lt;int,1,block&lt;uint64_t,K>,1></code></th>\n"
    "  </tr>\n"
    "  <tr>\n"
    "    <th>c</th>\n"<<
@@ -294,9 +297,9 @@ int main(int argc,char* argv[])
  std::cout<<
    "  <tr>\n"
    "    <th></th>\n"
-    "    <th colspan=\"5\"><code>filter&lt;1,multiblock&lt;uint64_t,K>></code></th>\n"
-    "    <th colspan=\"5\"><code>filter&lt;1,multiblock&lt;uint64_t,K>,1></code></th>\n"
-    "    <th colspan=\"5\"><code>filter&lt;1,fast_multiblock32&lt;K>></code></th>\n"
+    "    <th colspan=\"5\"><code>filter&lt;int,1,multiblock&lt;uint64_t,K>></code></th>\n"
+    "    <th colspan=\"5\"><code>filter&lt;int,1,multiblock&lt;uint64_t,K>,1></code></th>\n"
+    "    <th colspan=\"5\"><code>filter&lt;int,1,fast_multiblock32&lt;K>></code></th>\n"
    "  </tr>\n"
    "  <tr>\n"
    "    <th>c</th>\n"<<
@@ -313,9 +316,9 @@ int main(int argc,char* argv[])
  std::cout<<
    "  <tr>\n"
    "    <th></th>\n"
-    "    <th colspan=\"5\"><code>filter&lt;1,fast_multiblock32&lt;K>,1></code></th>\n"
-    "    <th colspan=\"5\"><code>filter&lt;1,fast_multiblock64&lt;K>></code></th>\n"
-    "    <th colspan=\"5\"><code>filter&lt;1,fast_multiblock64&lt;K>,1></code></th>\n"
+    "    <th colspan=\"5\"><code>filter&lt;int,1,fast_multiblock32&lt;K>,1></code></th>\n"
+    "    <th colspan=\"5\"><code>filter&lt;int,1,fast_multiblock64&lt;K>></code></th>\n"
+    "    <th colspan=\"5\"><code>filter&lt;int,1,fast_multiblock64&lt;K>,1></code></th>\n"
    "  </tr>\n"
    "  <tr>\n"
    "    <th>c</th>\n"<<
@@ -329,5 +332,24 @@ int main(int argc,char* argv[])
  row<filters3<11, 11, 11>>(16);
  row<filters3<13, 13, 14>>(20);

+  std::cout<<
+    "  <tr>\n"
+    "    <th></th>\n"
+    "    <th colspan=\"5\"><code>filter&lt;int,1,block&lt;uint64_t[8],K>></code></th>\n"
+    "    <th colspan=\"5\"><code>filter&lt;int,1,block&lt;uint64_t[8],K>,1></code></th>\n"
+    "    <th colspan=\"5\"><code>filter&lt;int,1,multiblock&lt;uint64_t[8],K>></code></th>\n"
+    "  </tr>\n"
+    "  <tr>\n"
+    "    <th>c</th>\n"<<
+    subheader<<
+    subheader<<
+    subheader<<
+    "  </tr>\n";
+
+  row<filters4< 5,  6,  7>>( 8);
+  row<filters4< 7,  7, 10>>(12);
+  row<filters4< 9, 10, 11>>(16);
+  row<filters4<12, 12, 15>>(20);
+
  std::cout<<"</table>\n";
 }
--- a/benchmark/fpr_c.cpp
+++ b/benchmark/fpr_c.cpp
@@ -97,7 +97,7 @@ int main()

  std::cout
    <<filter_name<<"\n"
-    <<"fpr;c;k\n";
+    <<"c;fpr;k\n";

  std::size_t ik=0; /* k-1 */
  for(std::size_t c=c_min;c<=c_max;++c){
--- a/doc/bloom.adoc
+++ b/doc/bloom.adoc
@@ -26,6 +26,14 @@
 .bordered_table th, .bordered_table td {
  border: 1px solid lightgray;
 }
+
+.formula-center {
+  display: block;
+  overflow-x: auto;
+  overflow-y: hidden;
+  max-width: 100%;
+  text-align: center;
+}
 </style>
 ++++

@@ -35,7 +43,9 @@ include::bloom/tutorial.adoc[]
 include::bloom/configuration.adoc[]
 include::bloom/benchmarks.adoc[]
 include::bloom/reference.adoc[]
+include::bloom/future_work.adoc[]
 include::bloom/fpr_estimation.adoc[]
 include::bloom/implementation_notes.adoc[]
 include::bloom/release_notes.adoc[]
+include::bloom/acknowledgements.adoc[]
 include::bloom/copyright.adoc[]
--- a/doc/bloom/acknowledgements.adoc
+++ b/doc/bloom/acknowledgements.adoc
@@ -0,0 +1,31 @@
+[#acknowledgements]
+= Acknowledgements
+
+:idprefix: acknowledgements_
+
+Peter Dimov and Christian Mazakas reviewed significant portions of the code
+and documentation during the development phase. Sam Darwin provided support
+for CI setup and documentation building.
+
+The Boost acceptance review took place between the 13th and 22nd of May,
+2025. Big thanks to Arnaud Becheler for his expert managing. The
+following people participated in the review: 
+Dmitry Arkhipov,
+David Bien,
+Claudio DeSouza,
+Peter Dimov,
+Vinnie Falco,
+Alexander Grund,
+Seth Heeren,
+Andrzej Krzemie&nacute;ski,
+Ivan Matek,
+Christian Mazakas,
+Rub&eacute;n P&eacute;rez,
+Kostas Savvidis,
+Peter Turcan,
+Tomer Vromen. Many thanks to all of them for their very helpful feedback.
+
+Boost.Bloom was designed and written in
+https://en.wikipedia.org/wiki/C%C3%A1ceres%2c_Spain[C&aacute;ceres^] and
+https://en.wikipedia.org/wiki/Oropesa,_Spain[Oropesa^],
+January-June 2025.
--- a/doc/bloom/benchmarks.adoc
+++ b/doc/bloom/benchmarks.adoc
--- a/doc/bloom/configuration.adoc
+++ b/doc/bloom/configuration.adoc
@@ -16,7 +16,8 @@ The chart plots FPR vs. _c_ (capacity / number of elements inserted) for several
 as shown in the table below.

 +++
-<table class="bordered_table" style="text-align: center;">
+<div style="overflow-x: auto;">
+<table class="bordered_table" style="text-align: center; font-size: 85%;">
    <tr>
        <th rowspan="2"></th>
 		<th colspan="21"><i>c</i> = capacity / number of elements inserted</th>
@@ -26,69 +27,81 @@ as shown in the table below.
        <th>14</th> <th>15</th> <th>16</th> <th>17</th> <th>18</th> <th>19</th> <th>20</th> <th>21</th> <th>22</th> <th>23</th> <th>24</th>
    </tr>
    <tr>
-        <td style="text-align: left;"><code>filter&lt;1,block&lt;uint32_t,K&gt;&gt;</code></td> <td>3</td> <td>3</td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td>
+        <td style="text-align: left;"><code>filter&lt;T,1,block&lt;uint32_t,K&gt;&gt;</code></td> <td>3</td> <td>3</td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td>
        <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td>
    </tr>
    <tr>
-        <td style="text-align: left;"><code>filter&lt;1,block&lt;uint32_t,K&gt;,1&gt;</code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>6</td>
+        <td style="text-align: left;"><code>filter&lt;T,1,block&lt;uint32_t,K&gt;,1&gt;</code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>6</td>
        <td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td>
    </tr>
    <tr>
-        <td style="text-align: left;"><code>filter&lt;1,block&lt;uint64_t,K&gt;&gt;</code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>6</td>
+        <td style="text-align: left;"><code>filter&lt;T,1,block&lt;uint64_t,K&gt;&gt;</code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>6</td>
        <td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td>
    </tr>
    <tr>
-        <td style="text-align: left;"><code>filter&lt;1,block&lt;uint64_t,K&gt;,1&gt;</code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td>
+        <td style="text-align: left;"><code>filter&lt;T,1,block&lt;uint64_t,K&gt;,1&gt;</code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td>
        <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td> <td>9</td> <td>9</td>
    </tr>
    <tr>
-        <td style="text-align: left;"><code>filter&lt;1,multiblock&lt;uint32_t,K&gt;&gt;</code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td>
+        <td style="text-align: left;"><code>filter&lt;T,1,multiblock&lt;uint32_t,K&gt;&gt;</code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td>
        <td>9</td> <td>9</td> <td>9</td> <td>10</td> <td>13</td> <td>13</td> <td>15</td> <td>15</td> <td>15</td> <td>16</td> <td>16</td>
    </tr>
    <tr>
-        <td style="text-align: left;"><code>filter&lt;1,multiblock&lt;uint32_t,K&gt;,1&gt;</code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>8</td> <td>8</td>
+        <td style="text-align: left;"><code>filter&lt;T,1,block&lt;uint64_t[8],K&gt;&gt;</code></td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>8</td>
+		<td>8</td> <td>9</td> <td>9</td> <td>10</td> <td>10</td> <td>11</td> <td>12</td> <td>12</td> <td>12</td> <td>12</td> <td>12</td>
+    </tr>
+    <tr>
+        <td style="text-align: left;"><code>filter&lt;T,1,multiblock&lt;uint32_t,K&gt;,1&gt;</code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>8</td> <td>8</td>
        <td>9</td> <td>9</td> <td>10</td> <td>10</td> <td>12</td> <td>12</td> <td>14</td> <td>14</td> <td>14</td> <td>14</td> <td>15</td>
    </tr>
    <tr>
-        <td style="text-align: left;"><code>filter&lt;1,multiblock&lt;uint64_t,K&gt;&gt;</code></td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>8</td> <td>8</td>
+        <td style="text-align: left;"><code>filter&lt;T,1,block&lt;uint64_t[8],K&gt;,1&gt;</code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>8</td>
+		<td>8</td> <td>8</td> <td>10</td> <td>11</td> <td>11</td> <td>12</td> <td>12</td> <td>12</td> <td>12</td> <td>12</td> <td>13</td>
+    </tr>
+    <tr>
+        <td style="text-align: left;"><code>filter&lt;T,1,multiblock&lt;uint64_t,K&gt;&gt;</code></td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>8</td> <td>8</td>
        <td>10</td> <td>10</td> <td>12</td> <td>13</td> <td>14</td> <td>15</td> <td>15</td> <td>15</td> <td>15</td> <td>16</td> <td>17</td>
    </tr>
    <tr>
-        <td style="text-align: left;"><code>filter&lt;1,multiblock&lt;uint64_t,K&gt;,1&gt;</code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>9</td> <td>10</td>
+        <td style="text-align: left;"><code>filter&lt;T,1,multiblock&lt;uint64_t,K&gt;,1&gt;</code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>9</td> <td>10</td>
        <td>10</td> <td>11</td> <td>11</td> <td>12</td> <td>12</td> <td>13</td> <td>13</td> <td>13</td> <td>15</td> <td>16</td> <td>16</td>
    </tr>
    <tr>
-        <td style="text-align: left;"><code>filter&lt;K&gt;</code></td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>8</td> <td>8</td> <td>9</td>
+        <td style="text-align: left;"><code>filter&lt;T,K&gt;</code></td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>8</td> <td>8</td> <td>9</td>
        <td>10</td> <td>11</td> <td>12</td> <td>13</td> <td>13</td> <td>13</td> <td>14</td> <td>16</td> <td>16</td> <td>16</td> <td>17</td>
    </tr>
 </table>
+</div>
 +++

 Let's see how this can be used by way of an example. Suppose we plan to insert 10M elements
-and want to keep the FPR at 10^&minus;4^. The chart gives us five possibilities:
+and want to keep the FPR at 10^&minus;4^. The chart gives us five different
+values of _c_ (the array capacity divided by the number of elements, in our case 10M):

-* `filter<K>` -> _c_ &cong; 19 bits per element
-* `filter<1, multiblock<uint64_t, K>, 1>` -> _c_ &cong; 20 bits per element
-* `filter<1, multiblock<uint64_t, K>>` -> _c_ &cong; 21 bits per element
-* `filter<1, multiblock<uint32_t, K>, 1>` -> _c_ &cong; 21.5 bits per element
-* `filter<1, multiblock<uint32_t, K>>` -> _c_ &cong; 23 bits per element
+* `filter<T, K>` -> _c_ &cong; 19 bits per element
+* `filter<T, 1, multiblock<uint64_t, K>, 1>` -> _c_ &cong; 20 bits per element
+* `filter<T, 1, multiblock<uint64_t, K>>` -> _c_ &cong; 21 bits per element
+* `filter<T, 1, block<uint64_t[8], K>, 1>` -> _c_ &cong; 21 bits per element
+* `filter<T, 1, multiblock<uint32_t, K>, 1>` -> _c_ &cong; 21.5 bits per element
+* `filter<T, 1, block<uint64_t[8], K>>` -> _c_ &cong; 22 bits per element
+* `filter<T, 1, multiblock<uint32_t, K>>` -> _c_ &cong; 23 bits per element

 These options have different tradeoffs in terms of space used and performance. If
-we choose `filter<1, multiblock<uint32_t, K>, 1>` as a compromise (or better yet,
-`filter<1, fast_multiblock32<K>, 1>`), the only remaining step is to consult the
+we choose `filter<T, 1, multiblock<uint32_t, K>, 1>` as a compromise (or better yet,
+`filter<T, 1, fast_multiblock32<K>, 1>`), the only remaining step is to consult the
 value of `K` in the table for _c_ = 21 or 22, and we get our final configuration:

-[listing,subs="+macros,+quotes"]
+[source,subs="+macros,+quotes"]
 -----
 using my_filter=filter<std::string, 1, fast_multiblock32<**14**>, 1>;
 -----

 The resulting filter can be constructed in any of the following ways:

-[listing,subs="+macros,+quotes"]
+[source]
 -----
 // 1) calculate the capacity from the value of c we got from the chart
-my_filter pass:[f((]std::size_t)(10'000'000 * 21.5));
+my_filter f((std::size_t)(10'000'000 * 21.5));

 // 2) let the library calculate the capacity from n and target fpr
 // expect some deviation from the capacity in 1)
--- a/doc/bloom/fpr_estimation.adoc
+++ b/doc/bloom/fpr_estimation.adoc
@@ -6,57 +6,62 @@
 For a classical Bloom filter, the theoretical false positive rate, under some simplifying assumptions,
 is given by

-[.text-center]
+[.formula-center]
 {small}stem:[\text{FPR}(n,m,k)=\left(1 - \left(1 - \displaystyle\frac{1}{m}\right)^{kn}\right)^k \approx \left(1 - e^{-kn/m}\right)^k]{small-end} for large {small}stem:[m]{small-end},

 where {small}stem:[n]{small-end} is the number of elements inserted in the filter, {small}stem:[m]{small-end} its capacity in bits and {small}stem:[k]{small-end} the
 number of bits set per insertion (see a https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives[derivation^]
-of this formula). For a given inverse load factor {small}stem:[c=m/n]{small-end}, the optimum {small}stem:[k]{small-end} is
-the integer closest to:
+of this formula). For a fixed inverse load factor {small}stem:[c=m/n]{small-end}, 
+the expression reaches at

-[.text-center]
-{small}stem:[k_{\text{opt}}=c\cdot\ln2,]{small-end}
+[.formula-center]
+{small}stem:[k_{\text{opt}}=c\cdot\ln2]{small-end}

-yielding a minimum attainable FPR of {small}stem:[1/2^{k_{\text{opt}}} \approx 0.6185^{c}]{small-end}.
+its minimum value
+{small}stem:[1/2^{k_{\text{opt}}} \approx 0.6185^{c}]{small-end}.
+The optimum {small}stem:[k]{small-end}, which must be an integer,
+is either
+{small}stem:[\lfloor k_{\text{opt}}\rfloor]{small-end} or
+{small}stem:[\lceil k_{\text{opt}}\rceil]{small-end}.

 In the case of filter of the form `boost::bloom::filter<T, K, block<Block, K'>>`, we can extend
 the approach from https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=f376ff09a64b388bfcde2f5353e9ddb44033aac8[Putze et al.^]
 to derive the (approximate but very precise) formula:

-[.text-center]
+[.formula-center]
 {small}stem:[\text{FPR}_{\text{block}}(n,m,b,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}(i,nbk/m) \cdot \text{FPR}(i,b,k')\right)^{k},]{small-end}

 where

-[.text-center]
+[.formula-center]
 {small}stem:[\text{Pois}(i,\lambda)=\displaystyle\frac{\lambda^i e^{-\lambda}}{i!}]{small-end}

 is the probability mass function of a https://en.wikipedia.org/wiki/Poisson_distribution[Poisson distribution^]
 with mean {small}stem:[\lambda]{small-end}, and {small}stem:[b]{small-end} is the size of `Block` in bits. If we're using `multiblock<Block,K'>`, we have

-[.text-center]
+[.formula-center]
 {small}stem:[\text{FPR}_\text{multiblock}(n,m,b,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}(i,nbkk'/m) \cdot \text{FPR}(i,b,1)^{k'}\right)^{k}.]{small-end}

 As we have commented xref:primer_multiblock_filters[before], in general 

-[.text-center]
+[.formula-center]
 {small}stem:[\text{FPR}_\text{block}(n,m,b,k,k') \geq \text{FPR}_\text{multiblock}(n,m,b,k,k') \geq \text{FPR}(n,m,kk'),]{small-end}

 that is, block and multiblock filters have worse FPR than the classical filter for the same number of bits
 set per insertion, but they will be faster. We have the particular case

-[.text-center]
+[.formula-center]
 {small}stem:[\text{FPR}_{\text{block}}(n,m,b,k,1)=\text{FPR}_{\text{multiblock}}(n,m,b,k,1)=\text{FPR}(n,m,k),]{small-end}

 which follows simply from the observation that using `{block|multiblock}<Block, 1>` behaves exactly as
 a classical Bloom filter.

 We don't know of any closed, simple formula for the FPR of block and multiblock filters when
-`Bucketsize` is not its "natural" size `xref:subfilters_used_value_size[_used-value-size_]<Subfilter>`,
+`Stride` is not its "natural" size `xref:subfilters_used_value_size[_used-value-size_]<Subfilter>`,
 that is, when subfilter subarrays overlap.
-We can use the following approximations ({small}stem:[s]{small-end} = `BucketSize` in bits):
+We can use the following approximations ({small}stem:[s]{small-end} = `Stride` in bits):

-[.text-center]
+[.formula-center]
 {small}stem:[\text{FPR}_{\text{block}}(n,m,b,s,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}\left(i,\frac{n(2b-s)k}{m}\right) \cdot \text{FPR}(i,2b-s,k')\right)^{k},]{small-end} +
 {small}stem:[\text{FPR}_\text{multiblock}(n,m,b,s,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}\left(i,\frac{n(2bk'-s)k}{m}\right) \cdot \text{FPR}\left(i,\frac{2bk'-s}{k'},1\right)^{k'}\right)^{k},]{small-end}

--- a/doc/bloom/future_work.adoc
+++ b/doc/bloom/future_work.adoc
@@ -0,0 +1,71 @@
+[#future_work]
+= Future Work
+
+:idprefix: future_work_
+
+A number of features asked by reviewers and users of Boost.Bloom are
+considered for inclusion into future versions of the library. 
+
+== Bulk operations
+
+Each insertion/lookup operation for `boost::bloom::filter` likely involves one or more
+cache misses in the access to the internal bit array. Following a similar
+approach to that of
+https://bannalia.blogspot.com/2023/10/bulk-visitation-in-boostconcurrentflatm.html[bulk visitation^]
+in Boost.Unordered, we can pipeline several operations so that
+cache miss stalls are leveraged to do useful computation. The interface
+for this functionality could be as follows:
+
+[source]
+-----
+f.insert(first1, last1);
+f.may_contain(first2, last2, [] (const value_type& x, bool res) {
+  // x is (likely) in the filter if res == true
+});
+-----
+
+== `try_insert`
+
+To avoid inserting an already present element, we now have to do:
+
+[source]
+-----
+if(!f.may_contain(x)) f.insert(x);
+-----
+
+These two calls can be combined in a potentially faster,
+single operation:
+
+[source]
+-----
+bool res = f.try_insert(x); // returns true if x was not present
+-----
+
+== Estimation of number of elements inserted
+
+For a classical Bloom filter, the number of elements actually inserted
+can be estimated from the number {small}stem:[B]{small-end} of bits set
+to one in the array as
+
+[.formula-center]
+{small}stem:[n\approx-\displaystyle\frac{m}{k}\ln\left(1-\displaystyle\frac{B}{m}\right),]{small-end}
+
+which can be used for the implementation of a member function
+`estimated_size`. As of this writing, we don't know how to extend the
+formula to the case of block and multiblock filters. Any help on this
+problem is much appreciated.
+
+== Run-time specification of _k_
+
+Currently, the number _k_ of bits set per operation is configured at compile time.
+A variation of (or extension to) `boost::bloom::filter` can be provided
+where the value of _k_ is specified at run-time, the tradeoff being that
+its performance will be worse than the static case (preliminary experiments
+show an increase in execution time of around 10-20%).
+
+== Alternative filters
+
+We can consider adding additional data structures such as
+https://en.wikipedia.org/wiki/Cuckoo_filter[cuckoo^] and
+https://arxiv.org/pdf/1912.08258[xor^] filters, which are more
+space efficient and potentially faster.
--- a/doc/bloom/implementation_notes.adoc
+++ b/doc/bloom/implementation_notes.adoc
@@ -8,7 +8,7 @@
 This is the bit-mixing post-process we use to improve the statistical properties
 of the hash function when it doesn't have the avalanching property:

-[.text-center]
+[.formula-center]
 {small}stem:[m\leftarrow\text{mul}(h,C)]{small-end}, +
 {small}stem:[h'\leftarrow\text{high}(m)\text{ xor }\text{low}(m)]{small-end},

@@ -37,7 +37,7 @@ show how to relax this requirement down to two different hash functions
 {small}stem:[h_1(x)]{small-end} and {small}stem:[h_2(x)]{small-end} linearly
 combined as

-[.text-center]
+[.formula-center]
 {small}stem:[g_i(x)=h_1(x)+ih_2(x).]{small-end}

 Without formal justification, we have relaxed this even further to just one
@@ -47,32 +47,35 @@ by means of very cheap mixing schemes. In what follows
 {small}stem:[k]{small-end}, {small}stem:[k']{small-end} are the homonym values
 in a filter of the form `boost::bloom::filter<T, K, {block|multiblock}<Block, K'>>`,
 {small}stem:[b]{small-end} is `sizeof(Block) * CHAR_BIT`,
-and {small}stem:[r]{small-end} is the number of buckets in the filter.
+and {small}stem:[r]{small-end} is the number of subarrays in the filter.

-=== Bucket Location
+=== Subarray Location

 To produce a location (i.e. a number {small}stem:[p]{small-end} in {small}stem:[[0,r)]{small-end}) from
 {small}stem:[h_{i-1}]{small-end}, instead of the straightforward but costly
 procedure {small}stem:[p\leftarrow h_{i-1}\bmod r]{small-end} we resort to
-Lemire's https://arxiv.org/pdf/1805.10941[fastrange technique^]. Moreover,
-we combine this calculation with the production of {small}stem:[h_{i}]{small-end}
-from {small}stem:[h_{i-1}]{small-end} as follows:
+Lemire's https://arxiv.org/pdf/1805.10941[fastrange technique^]:

-[.text-center]
+[.formula-center]
 {small}stem:[m\leftarrow\text{mul}(h_{i-1},r),]{small-end} +
-{small}stem:[p\leftarrow\lfloor m/2^{64} \rfloor=\text{high}(m),]{small-end} +
-{small}stem:[h_i\leftarrow m \bmod 2^{64}=\text{low}(m).]{small-end}
+{small}stem:[p\leftarrow\lfloor m/2^{64} \rfloor=\text{high}(m).]{small-end}

+To decorrelate {small}stem:[p]{small-end} from further uses of the hash value,
+we produce {small}stem:[h_{i}]{small-end} from {small}stem:[h_{i-1}]{small-end} as
+
+[.formula-center]
+{small}stem:[h_i\leftarrow c \cdot h_{i-1} \bmod 2^{64}=\text{low}(c \cdot h_{i-1}),]{small-end}
+
+with {small}stem:[c=\text{0xf1357aea2e62a9c5}]{small-end} (64-bit mode),
+{small}stem:[c=\text{0xe817fb2d}]{small-end} (32-bit mode) obtained
+from https://arxiv.org/pdf/2001.05304[Steele and Vigna^].
 The transformation {small}stem:[h_{i-1} \rightarrow h_i]{small-end} is
 a simple https://en.wikipedia.org/wiki/Linear_congruential_generator[multiplicative congruential generator^]
 over {small}stem:[2^{64}]{small-end}. For this MCG to produce long
-cycles, {small}stem:[h_0]{small-end} must be odd and the multiplicative constant
-{small}stem:[r]{small-end} must be {small}stem:[\equiv \pm 3 \text{ (mod 8)}]{small-end}:
-to meet these requirements, the implementation adjusts {small}stem:[h_0]{small-end}
-to {small}stem:[h_0']{small-end} and {small}stem:[r]{small-end}
-to {small}stem:[r']{small-end}. This renders the least significant bit
-of {small}stem:[h_i]{small-end} unsuitable for pseudorandomization
-(it is always one).
+cycles {small}stem:[h_0]{small-end} must be odd, so the implementation adjusts
+{small}stem:[h_0]{small-end} to {small}stem:[h_0'= (h_0\text{ or }1)]{small-end},
+which renders the least significant bit of {small}stem:[h_i]{small-end}
+unsuitable for pseudorandomization (it is always one).

 === Bit selection

@@ -109,14 +112,14 @@ In the case of SSE2, we don't have the 128-bit equivalent of
 `+++_+++mm256_sllv_epi32`, so we use the following, mildly interesting
 technique: a `+++__+++m128i` of the form

-[.text-center]
+[.formula-center]
 {small}stem:[((x_0+127)\cdot 2^{23},(x_1+127)\cdot 2^{23},(x_2+127)\cdot 2^{23},(x_3+127)\cdot 2^{23}),]{small-end}

 where each {small}stem:[x_i]{small-end} is in {small}stem:[[0,32)]{small-end},
 can be `reinterpret_cast`+++ed+++ to (i.e., has the same binary representation as)
 the `+++__+++m128` (register of `float`+++s+++)

-[.text-center]
+[.formula-center]
 {small}stem:[(2^{x_0},2^{x_1},2^{x_2},2^{x_3}),]{small-end}

 from which our desired `+++__+++m128i` of shifted 1s can be obtained
--- a/doc/bloom/intro.adoc
+++ b/doc/bloom/intro.adoc
@@ -8,10 +8,11 @@ that can be configured to implement a classical Bloom filter as well as
 variations discussed in the literature such as block filters, multiblock filters,
 and more.

-[listing,subs="+macros,+quotes"]
+[source,subs="+macros,+quotes"]
 -----
-#include <boost/bloom/filter.hpp>
+#include <boost/bloom.hpp>
 #include <cassert>
+#include <iostream>
 #include <string>

 int main()
@@ -25,7 +26,6 @@ int main()
  // insert elements (they can't be erased, Bloom filters are insert-only)
  f.insert("hello");
  f.insert("Boost");
-  //...

  // elements inserted are always correctly checked as such
  assert(f.may_contain("hello") == true);
@@ -35,7 +35,10 @@ int main()
  // the number of bits set per element and generally how the boost::bloom::filter
  // was specified
  if(f.may_contain("bye")) { // likely false
-    //...
+    std::cout << "false positive\n";
+  }
+  else {
+    std::cout << "everything worked as expected\n";
  }
 }
 -----
@@ -46,4 +49,20 @@ Boost.Bloom has been implemented with a focus on performance;
 SIMD technologies such as AVX2, Neon and SSE2 can be leveraged to speed up
 operations.

-Boost.Bloom is a header-only library. C++11 or later required.
+== Getting Started
+
+Consult the website
+https://www.boost.org/doc/user-guide/getting-started.html[section^]
+on how to install the entire Boost project or only Boost.Bloom
+and its dependencies.
+
+Boost.Bloom is a header-only library, so no additional build phase is
+needed. C++11 or later required. The library has been verified to
+work with GCC 4.8, Clang 3.9 and Visual Studio 2015 (and later versions
+of those). You can check that your environment is correctly set up
+by compiling the 
+link:../../example/basic.cpp[example program] shown above.
+
+If you are not familiar with Bloom filters in general, see the
+xref:primer[primer]; otherwise, you can jump directly to the
+xref:tutorial[tutorial].
--- a/doc/bloom/primer.adoc
+++ b/doc/bloom/primer.adoc
@@ -3,12 +3,20 @@

 :idprefix: primer_

-A Bloom filter is a probabilistic data structure where inserted elements can be looked up
-with 100% accuracy, whereas looking up for a non-inserted element may fail with
-some probability called the filter's _false positive rate_ or FPR. The tradeoff here is
-that Bloom filters occupy much less space than traditional non-probabilistic containers
-(typically, around 8-20 bits per element) for an acceptably low FPR. The greater
-the filter's _capacity_ (its size in bits), the lower the resulting FPR.
+A Bloom filter (named after its inventor Burton Howard Bloom) is a probabilistic data
+structure where inserted elements can be looked up with 100% accuracy, whereas looking
+up for a non-inserted element may fail with some probability called the filter's
+_false positive rate_ or FPR. The tradeoff here is that Bloom filters occupy much less
+space than traditional non-probabilistic containers (typically, around 8-20 bits per
+element) for an acceptably low FPR. The greater the filter's _capacity_ (its size in bits),
+the lower the resulting FPR.
+
+In general, Bloom filters are useful to prevent/mitigate queries against large data sets
+when exact retrieval is costly and/or can't be made in main memory.
+
+[.boxed]
+====
+*Example: Speeding up unsuccessful requests to a database*

 One prime application of Bloom filters and similar data structures is for the prevention
 of expensive disk/network accesses when these would fail to retrieve a given piece of
@@ -18,7 +26,7 @@ For instance, suppose we are developing a frontend for a database with access ti
 Inserting a Bloom filter with a lookup time of 200 ns and a FPR of 0.5% will reduce the
 average response time of the system from 10 ms to

-[.text-center]
+[.formula-center]
 (10 + 0.0002) &times; 50.25% + 0.0002 &times; 49.75% &cong; 5.03 ms, 

 that is, we get a &times;1.99 overall speedup. If the database holds 1 billion records,
@@ -27,8 +35,8 @@ which is perfectly realizable.

 image::db_speedup.png[align=center, title="Improving DB negative access time with a Bloom filter."]

-In general, Bloom filters are useful to prevent/mitigate queries against large data sets
-when exact retrieval is costly and/or can't be made in main memory.
+====
+
 Applications have been described in the areas of web caching,
 dictionary compression, network routing and genomics, among others.
 https://www.eecs.harvard.edu/~michaelm/postscripts/im2005b.pdf[Broder and Mitzenmacher^]
@@ -36,17 +44,17 @@ provide a rather extensive review of use cases with a focus on networking.

 == Implementation

-The implementation of a Bloom filter consists of an array of _m_ bits, initially set to zero.
+The implementation of a classical Bloom filter consists of an array of _m_ bits, initially set to zero.
 Inserting an element _x_ reduces to selecting _k_ positions pseudorandomly (with the help
 of _k_ independent hash functions) and setting them to one.

-image::bloom_insertion.png[align=center, title="Insertion in a classical Bloom filter, _k_ = 6."]
+image::bloom_insertion.png[align=center, title="Insertion in a classical Bloom filter with _k_ = 6 different hash functions. Inserting _x_ reduces to setting to one the bits at positions 10, 14, 43, 58, 1, and 39 as indicated by _h_~1~(_x_), ... , _h_~6~(_x_)."]

 To check if an element _y_ is in the filter, we follow the same procedure and see if
 the selected bits are all set to one. In the example figure there are two unset bits, which
 definitely indicates _y_ was not inserted in the filter.

-image::bloom_lookup.png[align=center, title="Lookup in a classical Bloom filter."]
+image::bloom_lookup.png[align=center, title="Lookup in a classical Bloom filter. Value _y_ is not in the filter because bits at positions 20 and 61 are not set to one."]

 A false positive occurs when the bits checked happen to be all set to one due to
 other, unrelated insertions. The probability of having a false positive increases as we
@@ -57,19 +65,19 @@ when the array is sparsely populated, a higher value of _k_ improves (decreases)
 as there are more chances that we hit a non-set bit; however, if _k_ is very high
 the array will have more and more bits set to one as new elements are inserted, which
 eventually will reach a point where we lose out to a filter with a lower _k_ and
-thus a smaller proportions of set bits.
+thus a smaller proportions of set bits. For given values of _n_ and _m_, the optimum _k_ is
+{small}stem:[\lfloor k_{\text{opt}}\rfloor]{small-end} or
+{small}stem:[\lceil k_{\text{opt}}\rceil]{small-end}, with

-image::fpr_n_k.png[align=center, title="FPR vs. number of inserted elements for two filters with _m_ = 10^5^ bits."]
+[.formula-center]
+{small}stem:[k_{\text{opt}}=\displaystyle\frac{m\cdot\ln2}{n},]{small-end}

-For given values of _n_ and _m_, the optimum _k_ is the integer closest to
-
-[.text-center]
-{small}stem:[k_{\text{opt}}=\displaystyle\frac{m\cdot\ln2}{n}]{small-end}
-
-for a minimum FPR of
+for a minimum FPR close to
 {small}stem:[1/2^{k_{\text{opt}}} \approx 0.6185^{m/n}]{small-end}. See the appendix
 on xref:fpr_estimation[FPR estimation] for more details.

+image::fpr_n_k.png[align=center, title="FPR vs. number of inserted elements for two filters with _m_ = 10^5^ bits. _k_ = 6 (red) has a better (lower) FPR than _k_ = 2 (blue) for small values of _n_, but eventually degrades more as _n_ grows. The dotted line shows the minimum attainable FPR resulting from selecting the optimum value of _k_ for each value of _n_."]
+
 == Variations on the Classical Filter

 === Block Filters
@@ -82,21 +90,21 @@ setting/checking in a small block of _b_ bits pseudorandomly selected from the
 entire array. If the block is small enough, it will fit in a CPU cacheline,
 thus drastically reducing the number of cache misses.

-image::block_insertion.png[align=center, title="Block filter."]
+image::block_insertion.png[align=center, title="Block filter. A block of _b_ bits is selected based on _h_~0~(x), and all subsequent bit setting is constrained there."]

 The downside is that the resulting FPR is worse than that of a classical filter for
 the same values of _n_, _m_ and _k_. Intuitively, block filters reduce the
 uniformity of the distribution of bits in the array, which ultimately hurts their
 probabilistic performance.

-image::fpr_n_k_bk.png[align=center, title="FPR (logarithmic scale) vs. number of inserted elements for a classical and a block filter, _m_ = 10^5^ bits."]
+image::fpr_n_k_bk.png[align=center, title="FPR (logarithmic scale) vs. number of inserted elements for a classical and a block filter with the same _k_ = 4, _m_ = 10^5^ bits."]

 A further variation in this idea is to have operations select _k_ blocks
 with _k'_ bits set on each. This, again, will have a worse FPR than a classical
 filter with _k&middot;k'_ bits per operation, but improves on a plain
 _k&middot;k'_ block filter.

-image::block_multi_insertion.png[align=center, title="Block filter with multi-insertion."]
+image::block_multi_insertion.png[align=center, title="Block filter with multi-insertion. _k_ = 2 blocks are selected, and _k_' = 3 bits are set in each."]

 === Multiblock Filters

@@ -106,7 +114,7 @@ so that each block takes exactly one bit. This still maintains a good cache
 locality but improves FPR with respect to block filters because bits set to one
 are more spread out across the array.

-image::multiblock_insertion.png[align=center, title="Multiblock filter."]
+image::multiblock_insertion.png[align=center, title="Multiblock filter. A range of _k_' = 4 consecutive blocks is selected based on _h_~0~(x), and a bit is set to one in each of the blocks."]

 Multiblock filters can also be combined with multi-insertion. In general,
 for the same number of bits per operation and equal values of _n_ and _m_,
--- a/doc/bloom/reference.adoc
+++ b/doc/bloom/reference.adoc
@@ -1,6 +1,7 @@
 [#reference]
 = Reference

+include::reference/header_bloom.adoc[]
 include::reference/header_filter.adoc[]
 include::reference/filter.adoc[]
 include::reference/subfilters.adoc[]
--- a/doc/bloom/reference/block.adoc
+++ b/doc/bloom/reference/block.adoc
@@ -34,9 +34,11 @@ struct block
 |===

 |`Block`
-|An unsigned integral type.
+|An unsigned integral type or an array of 2^`N`^ elements of unsigned integral type.

 |`K`
 | Number of bits set/checked per operation. Must be greater than zero.

 |===
+
+'''
--- a/doc/bloom/reference/fast_multiblock32.adoc
+++ b/doc/bloom/reference/fast_multiblock32.adoc
@@ -50,3 +50,5 @@ The non-SIMD case falls back to regular `multiblock`.

 `xref:subfilters_used_value_size[_used-value-size_]<fast_multiblock32<K>>` is
 `4 * K`.
+
+'''
--- a/doc/bloom/reference/filter.adoc
+++ b/doc/bloom/reference/filter.adoc
@@ -34,8 +34,9 @@ namespace bloom{

 template<
  typename T, std::size_t K,
-  typename Subfilter = block<unsigned char, 1>, std::size_t BucketSize = 0,
-  typename Hash = boost::hash<T>, typename Allocator = std::allocator<T>
+  typename Subfilter = block<unsigned char, 1>, std::size_t Stride = 0,
+  typename Hash = boost::hash<T>,
+  typename Allocator = std::allocator<unsigned char>
 >
 class filter
 {
@@ -44,7 +45,7 @@ public:
  using value_type                         = T;
  static constexpr std::size_t k           = K;
  using subfilter                          = Subfilter;
-  static constexpr std::size_t xref:filter_bucket_size[bucket_size] = xref:filter_bucket_size[__see below__];
+  static constexpr std::size_t xref:filter_stride[stride]      = xref:filter_stride[__see below__];
  using hasher                             = Hash;
  using allocator_type                     = Allocator;
  using size_type                          = std::size_t;
@@ -120,8 +121,6 @@ public:
  boost::span<const unsigned char> xref:#filter_array[array]() const noexcept;

  // modifiers
-  template<typename... Args>
-    void xref:#filter_emplace[emplace](Args&&... args);
  void xref:#filter_insert[insert](const value_type& x);
  template<typename U>
    void xref:#filter_insert[insert](const U& x);
@@ -172,39 +171,52 @@ bit setting/checking into the filter's internal array. The subfilter is invoked
 per operation on `K` pseudorandomly selected portions of the array (_subarrays_) of width
 `xref:subfilters_used_value_size[_used-value-size_]<Subfilter>`.

-|`BucketSize`
+|`Stride`
 | Distance in bytes between the initial positions of consecutive subarrays.
-If `BucketSize` is specified as zero, the actual distance is automatically selected to
+If `Stride` is specified as zero, the actual distance is automatically selected to
 `_used-value-size_<Subfilter>` (non-overlapping subarrays).
-Otherwise, `BucketSize` must be not greater than `_used-value-size_<Subfilter>`.
+Otherwise, `Stride` must be not greater than `_used-value-size_<Subfilter>`.

 |`Hash`
 |A https://en.cppreference.com/w/cpp/named_req/Hash[Hash^] type over `T`.

 |`Allocator`
-|An https://en.cppreference.com/w/cpp/named_req/Allocator[Allocator^] whose value type is `T`.
+|An https://en.cppreference.com/w/cpp/named_req/Allocator[Allocator^] whose value type is
+`unsigned char`.

 |===

 Allocation and deallocation of the internal array is done through an internal copy of the
-provided allocator. `value_type` construction/destruction (which only happens in
-`xref:filter_emplace[emplace]`) uses
-`std::allocator_traits<Allocator>::construct`/`destroy`.
+provided allocator. If `xref:filter_stride[stride]` is a
+multiple of _a_ = `alignof(Subfilter::value_type)`, the array is byte-aligned to
+max(64, _a_).

-If `link:../../../unordered/doc/html/unordered/reference/hash_traits.html#hash_traits_hash_is_avalanching[boost::unordered::hash_is_avalanching]<Hash>::value`
+If `link:../../../container_hash/doc/html/hash.html#ref_hash_is_avalanchinghash[boost::hash_is_avalanching]<Hash>::value`
 is `true` and `sizeof(std::size_t) >= 8`, 
 the hash function is used as-is; otherwise, a bit-mixing post-processing stage
 is added to increase the quality of hashing at the expense of extra computational cost.

+*Exception Safety Guarantees*
+
+Except when explicitly noted, all non-const member functions and associated functions taking
+`boost::bloom::filter` by non-const reference provide the
+https://en.cppreference.com/w/cpp/language/exceptions#Exception_safety[basic exception guarantee^],
+whereas all const member functions and associated functions taking
+`boost::bloom::filter` by const reference provide the 
+https://en.cppreference.com/w/cpp/language/exceptions#Exception_safety[strong exception guarantee^].
+
+Except when explicitly noted, no operation throws an exception unless that exception
+is thrown by the filter's `Hash` or `Allocator` object (if any).
+
 === Types and Constants

-[[filter_bucket_size]]
+[[filter_stride]]
 [listing,subs="+macros,+quotes"]
 ----
-static constexpr std::size_t bucket_size;
+static constexpr std::size_t stride;
 ----

-Equal to `BucketSize` if that parameter was specified as distinct from zero.
+Equal to `Stride` if that parameter was specified as distinct from zero.
 Otherwise, equal to `xref:subfilters_used_value_size[_used-value-size_]<subfilter>`.

 === Constructors
@@ -236,6 +248,7 @@ filter(
 Constructs an empty filter using copies of `h` and `al` as the hash function and allocator, respectively.

 [horizontal]
+Preconditions:;; `fpr` is between 0.0 and 1.0.
 Postconditions:;; `capacity() == 0` if `m == 0`, `capacity() >= m` otherwise (first overload). +
 `capacity() == capacity_for(n, fpr)` (second overload).

@@ -259,7 +272,8 @@ and inserts the values from `[first, last)` into it.

 [horizontal]
 Preconditions:;; `InputIterator` is a https://en.cppreference.com/w/cpp/named_req/InputIterator[LegacyInputIterator^] referring to `value_type`. +
-`[first, last)` is a valid range.
+`[first, last)` is a valid range. +
+`fpr` is between 0.0 and 1.0.
 Postconditions:;; `capacity() == 0` if `m == 0`, `capacity() >= m` otherwise (first overload). +
 `capacity() == capacity_for(n, fpr)` (second overload). +
 `may_contain(x)` for all values `x` from `[first, last)`.
@@ -366,7 +380,6 @@ filter(
 Equivalent to `xref:#filter_iterator_range_constructor[filter](il.begin(), il.end(), m, h, al)` (first overload)
 or `xref:#filter_iterator_range_constructor[filter](il.begin(), il.end(), n, fpr, h, al)` (second overload).

-
 ==== Capacity Constructor with Allocator

 [listing,subs="+macros,+quotes"]
@@ -378,7 +391,6 @@ filter(size_type n, double fpr, const allocator_type& al);
 Equivalent to `xref:#filter_capacity_constructor[filter](m, hasher(), al)` (first overload)
 or `xref:#filter_capacity_constructor[filter](n, fpr, hasher(), al)` (second overload).

-
 ==== Initializer List Constructor with Allocator

 [listing,subs="+macros,+quotes"]
@@ -425,6 +437,7 @@ Preconditions:;; If `pocca`,
 `hasher` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^].
 Postconditions:;; `*this == x`.
 Returns:;; `*this`.
+Exception Safety:;; Strong.

 ==== Move Assignment

@@ -449,6 +462,7 @@ Preconditions:;; If `pocma`,
 `hasher` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^].
 Postconditions:;; `x.capacity() == 0`.
 Returns:;; `*this`.
+Exception Safety:;; Nothrow as indicated, otherwise strong.

 ==== Initializer List Assignment

@@ -520,20 +534,6 @@ Returns:;; A span over the internal array.

 === Modifiers

-==== Emplace
-
-[listing,subs="+macros,+quotes"]
----
-template<typename... Args> void emplace(Args&&... args);
----
-
-Inserts an element constructed from `std::forward<Args>(args)+++...+++`.
-
-[horizontal]
-Preconditions:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/EmplaceConstructible[EmplaceConstructible^]
-into `filter` from `std::forward<Args>(args)+++...+++`. +
-`value_type` is https://en.cppreference.com/w/cpp/named_req/Erasable[Erasable^] from `filter`.
-
 ==== Insert

 [listing,subs="+macros,+quotes"]
@@ -548,6 +548,7 @@ bits of the internal array deterministically selected from the value

 [horizontal]
 Postconditions:;; `may_contain(x)`.
+Exception Safety:;; Strong.
 Notes:;; The second overload only participates in overload resolution if
 `hasher::is_transparent` is a valid member typedef.

@@ -591,7 +592,7 @@ If `pocs`, swaps the internal allocator with that of `x`.
 Preconditions:;; `pocs || get_allocator() == x.get_allocator()`. +
 If `pocs`, `Allocator` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^]. +
 `hasher` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^].
-
+Exception Safety:;; Nothrow.

 ==== Clear

@@ -615,8 +616,10 @@ equal to `capacity()`, and clears the filter. +
 Second overload: Equivalent to `reset(capacity_for(n, fpr))`.

 [horizontal]
+Preconditions:;; `fpr` is between 0.0 and 1.0.
 Postconditions:;; In general, `capacity() >= m`. +
 If `m == 0` or `m == capacity()` or `m == capacity_for(n, fpr)` for some `n` and `fpr`, then `capacity() == m`.
+Exception Safety:;; If `m == 0` or `capacity_for(n, fpr) == 0`, nothrow, otherwise strong.

 ==== Combine with AND

@@ -630,7 +633,9 @@ otherwise, changes the value of each bit in the internal array with the result o
 doing a logical AND operation of that bit and the corresponding one in `x`.

 [horizontal]
+Preconditions:;; The `Hash` objects of `x` and `y` are equivalent.
 Returns:;; `*this`;
+Exception Safety:;; Strong.

 ==== Combine with OR

@@ -644,7 +649,9 @@ otherwise, changes the value of each bit in the internal array with the result o
 doing a logical OR operation of that bit and the corresponding one in `x`.

 [horizontal]
+Preconditions:;; The `Hash` objects of `x` and `y` are equivalent.
 Returns:;; `*this`;
+Exception Safety:;; Strong.

 === Observers

@@ -698,6 +705,7 @@ bool operator==(
 ----

 [horizontal]
+Preconditions:;; The `Hash` objects of `x` and `y` are equivalent.
 Returns:;; `true` iff `x.capacity() == y.capacity()` and 
 `x`++'++s and `y`++'++s internal arrays are bitwise identical.

@@ -713,6 +721,7 @@ bool operator!=(
 ----

 [horizontal]
+Preconditions:;; The `Hash` objects of `x` and `y` are equivalent.
 Returns:;; `!(x xref:filter_operator[==] y)`.


@@ -728,3 +737,5 @@ void swap(filter<T, K, S, B, H, A>& x, filter<T, K, S, B, H, A>& y)
 ----

 Equivalent to `x.xref:filter_swap[swap](y)`.
+
+'''
--- a/doc/bloom/reference/header_bloom.adoc
+++ b/doc/bloom/reference/header_bloom.adoc
@@ -0,0 +1,9 @@
+[#header_bloom]
+== `<boost/bloom.hpp>`
+
+:idprefix: header_bloom_
+
+Convenience header including all the other headers listed in this
+reference.
+
+'''
--- a/doc/bloom/reference/header_filter.adoc
+++ b/doc/bloom/reference/header_filter.adoc
@@ -13,27 +13,28 @@ namespace bloom{

 template<
  typename T, std::size_t K,
-  typename Subfilter = block<unsigned char, 1>, std::size_t BucketSize = 0,
-  typename Hash = boost::hash<T>, typename Allocator = std::allocator<T>
+  typename Subfilter = block<unsigned char, 1>, std::size_t Stride = 0,
+  typename Hash = boost::hash<T>, 
+  typename Allocator = std::allocator<unsigned char>
 >
 class xref:filter[filter];

 template<
-  typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
+  typename T, std::size_t K, typename SF, std::size_t S, typename H, typename A
 >
 bool xref:filter_operator[operator+++==+++](
-  const filter<T, K, S, B, H, A>& x, const filter<T, K, S, B, H, A>& y);
+  const filter<T, K, SF, S, H, A>& x, const filter<T, K, SF, S, H, A>& y);

 template<
-  typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
+  typename T, std::size_t K, typename SF, std::size_t S, typename H, typename A
 >
 bool xref:filter_operator_2[operator!=](
-  const filter<T, K, S, B, H, A>& x, const filter<T, K, S, B, H, A>& y);
+  const filter<T, K, SF, S, H, A>& x, const filter<T, K, SF, S, H, A>& y);

 template<
-  typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
+  typename T, std::size_t K, typename SF, std::size_t S, typename H, typename A
 >
-void xref:filter_swap_2[swap](filter<T, K, S, B, H, A>& x, filter<T, K, S, B, H, A>& y)
+void xref:filter_swap_2[swap](filter<T, K, SF, S, H, A>& x, filter<T, K, SF, S, H, A>& y)
  noexcept(noexcept(x.swap(y)));

 } // namespace bloom
--- a/doc/bloom/reference/multiblock.adoc
+++ b/doc/bloom/reference/multiblock.adoc
@@ -34,7 +34,7 @@ struct multiblock
 |===

 |`Block`
-|An unsigned integral type.
+|An unsigned integral type or an array of 2^`N`^ elements of unsigned integral type.

 |`K`
 | Number of bits set/checked per operation. Must be greater than zero.
@@ -43,3 +43,5 @@ struct multiblock

 Each of the `K` bits set/checked is located in a different element of the
 `Block[K]` array.
+
+'''
--- a/doc/bloom/reference/subfilters.adoc
+++ b/doc/bloom/reference/subfilters.adoc
@@ -55,3 +55,5 @@ constexpr std::size_t _used-value-size_; // exposition only
 constant exists, or `sizeof(Subfilter::value_type)` otherwise.
 The value is the effective size in bytes of the subarrays upon which a
 given subfilter operates.
+
+'''
--- a/doc/bloom/release_notes.adoc
+++ b/doc/bloom/release_notes.adoc
@@ -3,7 +3,7 @@

 :idprefix: release_notes_

-== Boost 1.xx
+== Boost 1.89

 * Initial release.

--- a/doc/bloom/tutorial.adoc
+++ b/doc/bloom/tutorial.adoc
@@ -3,81 +3,127 @@

 :idprefix: tutorial_

-== Filter Definition
-
-A `boost::bloom::filter` can be regarded as a bit array divided into _buckets_ that
+A `boost::bloom::filter` can be regarded as a bit array divided into _subarrays_ that
 are selected pseudo-randomly (based on a hash function) upon insertion:
-each of the buckets is passed to a _subfilter_ that marks several of its bits according
+each of the subarrays is passed to a _subfilter_ that marks several of its bits according
 to some associated strategy.

+Note that although `boost::bloom::filter` mimics the interface of a container
+and provides operations such as `insert`, it is actually _not_ a
+container: for instance, insertion does not involve the actual storage
+of the element in the data stucture, but merely sets some bits in the internal
+array based on the hash value of the element.
+
+== Filter Definition
+
 [listing,subs="+macros,+quotes"]
 -----
 template<
  typename T, std::size_t K,
-  typename Subfilter = block<unsigned char, 1>, std::size_t BucketSize = 0,
-  typename Hash = boost::hash<T>, typename Allocator = std::allocator<T>  
+  typename Subfilter = block<unsigned char, 1>, std::size_t Stride = 0,
+  typename Hash = boost::hash<T>,
+  typename Allocator = std::allocator<unsigned char>  
 >
 class filter;
 -----

 * `T`: Type of the elements inserted.
-* `K`: Number of buckets marked per insertion.
+* `K`: Number of subarrays marked per insertion.
 * `xref:tutorial_subfilter[Subfilter]`: Type of subfilter used.
-* `xref:tutorial_bucketsize[BucketSize`]: Size in bytes of the buckets.
+* `xref:tutorial_stride[Stride`]: Distance in bytes between the initial positions of consecutive subarrays.
 * `xref:tutorial_hash[Hash]`: A hash function for `T`.
-* `Allocator`: An allocator for `T`.
+* `Allocator`: An allocator for `unsigned char`.

 === `Subfilter`

-The following subfilters can be selected, offering different compromises
-between performance and _false positive rate_ (FPR).
-See the xref:primer_variations_on_the_classical_filter[Bloom Filter Primer]
-for a general explanation of block and multiblock filters.
+A subfilter defines the local strategy for setting or checking bits within
+a selected subarray of the bit array. It determines how many bits are
+modified per operation, how they are arranged in memory, and how memory is accessed.
+The following subfilters are provided:

-`block<Block, K'>`
++++
+<div style="overflow-x: auto;">
++++
+[options="header"]
+|===
+| Subfilter | Description | Pros | Cons

-[.indent]
-Sets `K'` bits in an underlying value of the unsigned integral type `Block`
-(e.g. `unsigned char`, `uint32_t`, `uint64_t`). So,
-a `filter<T, K, block<Block, K'>>` will set `K * K'` bits per element.
-The tradeoff here is that insertion/lookup will be (much) faster than
-with `filter<T, K * K'>` while the FPR will be worse (larger).
-FPR is better the wider `Block` is.
+| `block<Block, K'>`
+| Sets `K'` bits in a subarray of type `Block`
+| Very fast access time
+| FPR is worse (higher) the smaller `Block` is

-`multiblock<Block, K'>`
+| `multiblock<Block, K'>`
+| Sets one bit in each of the elements of a `Block[K']` subarray
+| Better (lower) FPR than `block<Block, K'>` for the same `Block` type
+| Performance may worsen if cacheline boundaries are crossed when accessing the subarray

-[.indent]
-Instead of setting `K'` bits in a `Block` value, this subfilter sets
-one bit on each of the elements of a `Block[K']` subarray. This improves FPR
-but impacts performance with respect to `block<Block, K'>`, among other
-things because cacheline boundaries can be crossed when accessing the subarray.
+| `fast_multiblock32<K'>`
+| Statistically equivalent to `multiblock<uint32_t, K'>`, but uses
+faster SIMD-based algorithms when SSE2, AVX2 or Neon are enabled at
+compile time
+| Always prefer it to `multiblock<uint32_t, K'>` when SSE2/AVX2/Neon is available
+| FPR is worse (higher) than `fast_multiblock64<K'>` for the same `K'`

-`fast_multiblock32<K'>`
+| `fast_multiblock64<K'>`
+| Statistically equivalent to `multiblock<uint64_t, K'>`, but uses a
+faster SIMD-based algorithm when AVX2 is enabled at compile time
+| Always prefer it to `multiblock<uint64_t, K'>` when AVX2 is available
+| Slower than `fast_multiblock32<K'>` for the same `K'`
+|===
++++
+</div>
++++

-[.indent]
-Statistically equivalent to `multiblock<uint32_t, K'>`, but uses
-faster SIMD-based algorithms when SSE2, AVX2 or Neon are available.
+In the table above, `Block` can be an unsigned integral type
+(e.g. `unsigned char`, `uint32_t`, `uint64_t`), or
+an array of 2^`N`^ unsigned integrals (e.g. `uint64_t[8]`). In general,
+the wider `Block` is, the better (lower) the resulting FPR, but
+cache locality worsens and performance may suffer as a result.

-`fast_multiblock64<K'>`
+Note that the total number of of bits set/checked for a
+`boost::bloom::filter<T, K, _subfilter_<..., K'>>` is `K * K'`. The
+default configuration `boost::bloom::filter<T, K>` = 
+`boost::bloom::filter<T, K, block<unsigned char, 1>>`, which corresponds to a
+xref:primer_implementation[classical Bloom filter], has the best (lowest) FPR among all filters
+with the same number of bits per operation, but is also the slowest: a new
+subarray is accessed for each bit set/checked. Consult the
+xref:benchmarks[benchmarks section] to see different tradeoffs between FPR and
+performance.

-[.indent]
-Statistically equivalent to `multiblock<uint64_t, K'>`, but uses a
-faster SIMD-based algorithm when AVX2 is available.
+Once a subfilter have been selected, the parameter `K'` can be tuned
+to its optimum value (minimum FPR) if the number of elements that will be inserted is
+known in advance, as explained in a xref:configuration[dedicated section];
+otherwise, low values of `K'` will generally be faster and preferred to
+higher values as long as the resulting FPR is at acceptable levels.

-The default configuration with `block<unsigned char,1>` corresponds to a
-xref:primer[classical Bloom filter] setting `K` bits per element uniformly
-distributed across the array.
+=== `Stride`

-=== `BucketSize`
+As we have seen, `Subfilter` defines the subarray (`Block` in the case of
+`block<Block, K'>`, `Block[K']` for `multiblock<Block, K'>`) used by
+`boost::bloom::filter`: contiguous portions of the underlying bit array
+are then accessed and treated as those subarrays. The `Stride` parameter
+controls the distance in bytes between the initial positions of
+consecutive subarrays.

-When the default value 0 is used, buckets have the same size as
-the _subarrays_ subfilters operate on (non-overlapping case).
-Otherwise, bucket size is smaller and subarrays spill over adjacent buckets,
-which results in an improved (lower) FPR in exchange for a possibly
-worse performance due to memory unalignment.
+When the default value 0 is used, the stride is automatically set
+to the size of the subarrays, and so there's no overlapping between them.
+If `Stride` is set to a smaller value than that size, contiguous
+subarrays superimpose on one another: the level of overlap is larger
+for smaller values of `Stride`, with maximum overlap happening when
+`Stride` is 1 byte.
+
+image::stride.png[align=center, title="Two different configurations of `Stride`: (a) non-overlapping subarrays, (b) overlapping subarrays.+++<br/>+++Each subarray is associated to the stride of the same color."]
+
+As it happens, overlapping improves (decreases) the resulting FPR
+with respect to the non-overlapping case, the tradeoff being that
+subarrays may not be aligned in memory, which can impact performance
+negatively.

 === `Hash`

+Unlike other Bloom filter implementations requiring several hash functions per operation,
+`boost::bloom::filter` uses only one.
 By default, link:../../../container_hash/index.html[Boost.ContainerHash] is used.
 Consult this library's link:../../../container_hash/doc/html/hash.html#user[dedicated section]
 if you need to extend `boost::hash` for your own types.
@@ -87,16 +133,16 @@ as is; otherwise, a bit-mixing post-process is applied to hash values that impro
 their statistical properties so that the resulting FPR approaches its
 theoretical limit. The hash function is determined to be of high quality
 (more precisely, to have the so-called _avalanching_ property) via the
-`link:../../../unordered/doc/html/unordered/reference/hash_traits.html#hash_traits_hash_is_avalanching[boost::unordered::hash_is_avalanching]`
+`link:../../../container_hash/doc/html/hash.html#ref_hash_is_avalanchinghash[boost::hash_is_avalanching]`
 trait.

 == Capacity

 The size of the filter's internal array is specified at construction time:

-[listing,subs="+macros,+quotes"]
+[source,subs="+macros,+quotes"]
 -----
-using filter = boost::bloom::filter<std::string, ...>;
+using filter = boost::bloom::filter<std::string, 8>;
 filter f(1'000'000); // array of 1'000'000 **bits**
 std::cout << f.capacity(); // >= 1'000'000
 -----
@@ -109,7 +155,7 @@ Instead of specifying the array's capacity directly, we can let the library
 figure it out based on the number of elements we plan to insert and the
 desired FPR:

-[listing,subs="+macros,+quotes"]
+[source]
 -----
 // we'll insert 100'000 elements and want a FPR ~ 1%
 filter f(100'000, 0.01);
@@ -118,11 +164,20 @@ filter f(100'000, 0.01);
 filter f2(filter::capacity_for(100'000, 0.01));
 -----

+Be careful when the FPR specified is very small, as the resulting capacity
+may be too large to fit in memory:
+
+[source]
+-----
+// resulting capacity ~ 1.4E12, out of memory std::bad_alloc is thrown
+filter f3(100'000, 1E-50);
+-----
+
 Once a filter is constructed, its array is fixed (for instance, it won't
 grow dynamically as elements are inserted). The only way to change it is
 by assignment/swapping from a different filter, or using `reset`:

-[listing,subs="+macros,+quotes"]
+[source,subs="+macros,+quotes"]
 -----
 f.reset(2'000'000); // change to 2'000'000 bits **and clears the filter**
 f.reset(100'000, 0.005); // equivalent to reset(filter::capacity_for(100'000, 0.005));
@@ -133,10 +188,9 @@ f.reset(); // null array (capacity == 0)

 Insertion is done in much the same way as with a traditional container:

-[listing,subs="+macros,+quotes"]
+[source]
 -----
 f.insert("hello");
-f.emplace(100, 'X'); // ~ insert(std::string(100, 'X'))
 f.insert(data.begin(), data.end());
 -----

@@ -145,7 +199,7 @@ storage of elements into the filter, but rather the setting of bits in the
 internal array based on the hash values of those elements.
 Lookup goes as follows:

-[listing,subs="+macros,+quotes"]
+[source]
 -----
 bool b1 = f.may_contain("hello"); // b1 is true since we actually inserted "hello"
 bool b2 = f.may_contain("bye"); // b2 is most likely false
@@ -156,7 +210,7 @@ element has not been previously inserted, that is, it may yield false
 positives -- this is the essence of probabilistic data structures.
 `fpr_for` provides an estimation of the false positive rate:

-[listing,subs="+macros,+quotes"]
+[source]
 -----
 // we have inserted 100 elements so far, what's our FPR?
 std::cout<< filter::fpr_for(100, f.capacity());
@@ -170,7 +224,7 @@ operation.
 Once inserted, there is no way to remove a specific element from the filter.
 We can only clear up the filter entirely:

-[listing,subs="+macros,+quotes"]
+[source]
 -----
 f.clear(); // sets all the bits in the array to zero
 -----
@@ -180,18 +234,18 @@ f.clear(); // sets all the bits in the array to zero
 `boost::bloom::filter`+++s+++ can be combined by doing the OR logical operation
 of the bits of their arrays:

-[listing,subs="+macros,+quotes"]
+[source]
 -----
 filter f2 = ...;
 ...
 f |= f2; // f and f2 must have exactly the same capacity
 -----

-The result is equivalent to a filter "containing" both the elements
+The result is equivalent to a filter "containing" the set union of the elements
 of `f` and `f2`. AND combination, on the other hand, results in a filter
 holding the _intersection_ of the elements:

-[listing,subs="+macros,+quotes"]
+[source]
 -----
 filter f3 = ...;
 ...
@@ -208,7 +262,7 @@ case.
 The contents of the bit array can be accessed directly with the `array`
 member function, which can be leveraged for filter serialization:

-[listing,subs="+quotes"]
+[source]
 -----
 filter f1 = ...;
 ...
@@ -216,25 +270,29 @@ filter f1 = ...;
 // save filter
 std::ofstream out("filter.bin", std::ios::binary);
 std::size_t c1=f1.capacity();
-out.write((const char*) &c1, sizeof(c1)); // save capacity (bits)
+out.write(reinterpret_cast<const char*>(&c1), sizeof(c1)); // save capacity (bits)
 boost::span<const unsigned char> s1 = f1.array();
-out.write((const char*) s1.data(), s1.size()); // save array
+out.write(reinterpret_cast<const char*>(s1.data()), s1.size()); // save array
 out.close();

 // load filter
 filter f2;
 std::ifstream in("filter.bin", std::ios::binary);
 std::size_t c2;
-in.read((char*) &c2, sizeof(c2));
+in.read(reinterpret_cast<char*>(&c2), sizeof(c2));
 f2.reset(c2); // restore capacity
 boost::span<unsigned char> s2 = f2.array();
-in.read((char*) s2.data(), s2.size()); // load array
+in.read(reinterpret_cast<char*>(s2.data()), s2.size()); // load array
 in.close();
 -----

 Note that `array()` is a span over `unsigned char`+++s+++ whereas
 capacities are measured in bits, so `array.size()` is
-`capacity() / CHAR_BIT`.
+`capacity() / CHAR_BIT`. If you load a serialized filter in a computer
+other than the one where it was saved, take into account that
+the CPU architectures at each end must have the same
+https://es.wikipedia.org/wiki/Endianness[endianness^] for the
+reconstruction to work.

 == Debugging

--- a/doc/img/bloom_insertion.png
+++ b/doc/img/bloom_insertion.png
--- a/doc/img/bloom_lookup.png
+++ b/doc/img/bloom_lookup.png
--- a/doc/img/db_speedup.png
+++ b/doc/img/db_speedup.png
--- a/doc/img/fpr_c.png
+++ b/doc/img/fpr_c.png
--- a/doc/img/fpr_n_k.png
+++ b/doc/img/fpr_n_k.png
--- a/doc/img/fpr_n_k_bk.png
+++ b/doc/img/fpr_n_k_bk.png
--- a/doc/img/stride.png
+++ b/doc/img/stride.png
--- a/doc/index.html
+++ b/doc/index.html
@@ -8,12 +8,12 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<meta http-equiv="refresh" content="0; URL=html/index.html">
+<meta http-equiv="refresh" content="0; URL=html/bloom.html">
 <title>Boost.Bloom Documentation</title>
 </head>

 <body>
 Automatic redirection failed, please go to
-<a href="html/index.html">html/index.html</a>
+<a href="html/bloom.html">html/bloom.html</a>
 </body>
 </html>
--- a/example/Jamfile.v2
+++ b/example/Jamfile.v2
@@ -5,12 +5,14 @@
 #
 # See http://www.boost.org/libs/bloom for library home page.

+import config : requires ;
+
 project
    : requirements
      # <library>/boost/bloom//boost_bloom
-      <cxxstd>11
+      [ requires cxx11_noexcept ] # used as a proxy for C++11 support
    ;

 exe basic : basic.cpp ;
-exe genome : genome.cpp : <cxxstd>17 ;
+exe genome : genome.cpp : [ requires cxx17_if_constexpr ] ;
 exe serialization : serialization.cpp ;
--- a/example/basic.cpp
+++ b/example/basic.cpp
@@ -8,7 +8,7 @@
 * See https://www.boost.org/libs/bloom for library home page.
 */

-#include <boost/bloom/filter.hpp>
+#include <boost/bloom.hpp>
 #include <cassert>
 #include <iostream>
 #include <string>
@@ -41,4 +41,7 @@ int main()
  if(f.may_contain("bye")) { /* likely false */
    std::cout << "false positive\n";
  }
+  else {
+    std::cout << "everything worked as expected\n";
+  }
 }
--- a/example/genome.cpp
+++ b/example/genome.cpp
@@ -11,8 +11,8 @@
 #include <array>
 #include <boost/bloom/filter.hpp>
 #include <boost/bloom/fast_multiblock32.hpp>
-#include <boost/cstdint.hpp>
 #include <cassert>
+#include <cstdint>
 #include <cstdlib>
 #include <fstream>
 #include <iostream>
@@ -29,7 +29,7 @@ struct k_mer
 {
  static_assert(
    K >= 0 &&
-    2 * K <= sizeof(boost::uint64_t) * CHAR_BIT);
+    2 * K <= sizeof(std::uint64_t) * CHAR_BIT);

  static constexpr std::size_t size()
  {
@@ -45,8 +45,8 @@ struct k_mer

  k_mer& operator+=(char n)
  {
-    static constexpr boost::uint64_t mask=
-      (((boost::uint64_t)1) << (2 * size())) - 1;
+    static constexpr std::uint64_t mask=
+      (((std::uint64_t)1) << (2 * size())) - 1;

    data <<= 2;
    data &= mask;
@@ -54,7 +54,7 @@ struct k_mer
    return *this;
  }

-  boost::uint64_t data = 0;
+  std::uint64_t data = 0;

  using table_type=std::array<unsigned char, UCHAR_MAX>;

@@ -71,20 +71,28 @@ struct k_mer
 template<std::size_t N>
 std::size_t hash_value(const k_mer<N>& km)
 {
-  if constexpr (sizeof(std::size_t) >= sizeof(boost::uint64_t)) {
+  /* k:mer::data is 8 bytes wide. We use it directly as the associated
+   * hash value in 64-bit mode, as std::size_t is the same size; in 32-bit
+   * mode, we XOR the high and low portions of data to make it fit into
+   * a std::size_t.
+   */
+
+  if constexpr (sizeof(std::size_t) >= sizeof(std::uint64_t)) {
    return (std::size_t)km.data;
  }
-  else{
+  else{ /* 32-bit mode */
    return (std::size_t)(km.data ^ (km.data >> 32));
  }
 }

 /* Insert all the k-mers of a given genome in a boost::bloom::filter.
 * Assumed format is FASTA with A, C, G, T.
+ * https://en.wikipedia.org/wiki/FASTA_format
 */

 using genome_filter = boost::bloom::filter<
-  k_mer<20>, 1, boost::bloom::fast_multiblock32<8> >;
+  k_mer<20>, /* using k-mers of length 20 */
+  1, boost::bloom::fast_multiblock32<8> >;

 genome_filter make_genome_filter(const char* filename)
 {
@@ -93,7 +101,11 @@ genome_filter make_genome_filter(const char* filename)
  std::ifstream in(filename, std::ios::ate); /* open at end to tell size */
  if(!in) throw std::runtime_error("can't open file");

-  /* number of k-mers ~ length of the genome, FPR = 1% */
+  /* As a rough estimation, we assume that the number of k-mers
+   * is approximately equal to the length of the genome --this is
+   * overpessimistic due to the likely presence of duplicate k-mers.
+   * We set FPR = 1%.
+   */

  genome_filter f((std::size_t)in.tellg(), 0.01);
  in.seekg(0);
--- a/example/serialization.cpp
+++ b/example/serialization.cpp
@@ -11,8 +11,8 @@
 #include <boost/bloom/filter.hpp>
 #include <boost/bloom/multiblock.hpp>
 #include <boost/core/detail/splitmix64.hpp>
-#include <boost/cstdint.hpp>
 #include <boost/uuid/uuid.hpp>
+#include <cstdint>
 #include <cstring>
 #include <fstream>
 #include <iostream>
@@ -24,7 +24,7 @@ struct uuid_generator
  boost::uuids::uuid operator()()
  {
    std::uint8_t    data[16];
-    boost::uint64_t x = rng();
+    std::uint64_t x = rng();
    std::memcpy(&data[0], &x, sizeof(x));
    x = rng();
    std::memcpy(&data[8], &x, sizeof(x));
@@ -36,7 +36,7 @@ struct uuid_generator
 };

 using filter = boost::bloom::filter<
-  boost::uuids::uuid, 1, boost::bloom::multiblock<boost::uint64_t, 8> >;
+  boost::uuids::uuid, 1, boost::bloom::multiblock<std::uint64_t, 8> >;

 static constexpr std::size_t num_elements = 10000;

@@ -54,19 +54,19 @@ void save_filter(const filter& f, const char* filename)
 {
  std::ofstream out(filename, std::ios::binary | std::ios::trunc);
  std::size_t c=f.capacity();
-  out.write((const char*) &c, sizeof(c)); /* save capacity (bits) */
+  out.write(reinterpret_cast<const char*>(&c), sizeof(c)); /* save capacity (bits) */
  auto s = f.array();
-  out.write((const char*) s.data(), s.size()); /* save array */
+  out.write(reinterpret_cast<const char*>(s.data()), s.size()); /* save array */
 }

 filter load_filter(const char* filename)
 {
  std::ifstream in(filename, std::ios::binary);
  std::size_t c;
-  in.read((char*) &c, sizeof(c));
+  in.read(reinterpret_cast<char*>(&c), sizeof(c));
  filter f(c);
  auto s = f.array();
-  in.read((char*) s.data(), s.size()); /* load array */
+  in.read(reinterpret_cast<char*>(s.data()), s.size()); /* load array */
  return f;
 }

--- a/extra/boost_bloom.natvis
+++ b/extra/boost_bloom.natvis
@@ -12,12 +12,12 @@ See https://www.boost.org/libs/bloom for library home page.
 <Type Name="boost::bloom::filter&lt;*&gt;" Inheritable="false">
 	<Intrinsic Name="core" Expression="*static_cast&lt;super*&gt;(this)" />
 	<Intrinsic Name="has_array" Expression="core().ar.data!=nullptr" />
-	<Intrinsic Name="data" Expression="has_array()?core().ar.buckets:nullptr" />
+	<Intrinsic Name="data" Expression="has_array()?core().ar.array:nullptr" />
 	<Intrinsic
 		Name="array_size" 
 		Expression="
 			has_array()?
-				core().hs.rng*super::bucket_size+(super::used_value_size-super::bucket_size):
+				core().hs.rng*super::stride+(super::used_value_size-super::stride):
 				0"
 	/>
 	<Intrinsic Name="capacity" Expression="array_size()*8" />
--- a/include/boost/bloom.hpp
+++ b/include/boost/bloom.hpp
@@ -0,0 +1,18 @@
+/* Copyright 2025 Joaquin M Lopez Munoz.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ *
+ * See https://www.boost.org/libs/bloom for library home page.
+ */
+
+#ifndef BOOST_BLOOM_HPP
+#define BOOST_BLOOM_HPP
+
+#include <boost/bloom/filter.hpp>
+#include <boost/bloom/block.hpp>
+#include <boost/bloom/multiblock.hpp>
+#include <boost/bloom/fast_multiblock32.hpp>
+#include <boost/bloom/fast_multiblock64.hpp>
+
+#endif
--- a/include/boost/bloom/block.hpp
+++ b/include/boost/bloom/block.hpp
@@ -10,36 +10,61 @@
 #define BOOST_BLOOM_BLOCK_HPP

 #include <boost/bloom/detail/block_base.hpp>
+#include <boost/bloom/detail/block_ops.hpp>
 #include <boost/bloom/detail/block_fpr_base.hpp>
-#include <boost/cstdint.hpp>
 #include <cstddef>
+#include <cstdint>

 namespace boost{
 namespace bloom{

 template<typename Block,std::size_t K>
 struct block:
-  private detail::block_base<Block,K>,public detail::block_fpr_base<K>
+  public detail::block_fpr_base<K>,
+  private detail::block_base<Block,K>
 {
  static constexpr std::size_t k=K;
  using value_type=Block;

-  static inline void mark(value_type& x,boost::uint64_t hash)
+  /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+  static inline void mark(value_type& x,std::uint64_t hash)
  {
-    loop(hash,[&](boost::uint64_t h){x|=Block(1)<<(h&mask);});
+    loop(hash,[&](std::uint64_t h){block_ops::set(x,h&mask);});
  }

-  static inline bool check(const value_type& x,boost::uint64_t hash)
+  /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+  static inline bool check(const value_type& x,std::uint64_t hash)
  {
-    Block fp=0;
-    mark(fp,hash);
-    return (x&fp)==fp;
+    return check(x,hash,typename block_ops::is_extended_block{});
  }

 private:
  using super=detail::block_base<Block,K>;
  using super::mask;
  using super::loop;
+  using super::loop_while;
+  using block_ops=detail::block_ops<Block>;
+
+  /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+  static inline bool check(
+    const value_type& x,std::uint64_t hash,
+    std::false_type /* non-extended block */)
+  {
+    Block fp;
+    block_ops::zero(fp);
+    mark(fp,hash);
+    return block_ops::testc(x,fp);
+  }
+
+  /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+  static inline bool check(
+    const value_type& x,std::uint64_t hash,
+    std::true_type /* extended block */)
+  {
+    return loop_while(hash,[&](std::uint64_t h){
+      return block_ops::get_at_lsb(x,h&mask)&1;
+    });
+  }
 };

 } /* namespace bloom */
--- a/include/boost/bloom/detail/block_base.hpp
+++ b/include/boost/bloom/detail/block_base.hpp
@@ -12,8 +12,9 @@
 #include <boost/config.hpp>
 #include <boost/bloom/detail/constexpr_bit_width.hpp>
 #include <boost/bloom/detail/mulx64.hpp>
-#include <boost/cstdint.hpp>
+#include <boost/bloom/detail/type_traits.hpp>
 #include <cstddef>
+#include <cstdint>

 namespace boost{
 namespace bloom{
@@ -24,23 +25,31 @@ namespace detail{
 #pragma warning(disable:4714) /* marked as __forceinline not inlined */
 #endif

-// TODO: describe
+/* Validates type Block and provides common looping facilities for block
+ * and multiblock.
+ */

 template<typename Block,std::size_t K>
 struct block_base
 {
-  static constexpr std::size_t k=K;
-  static constexpr std::size_t hash_width=sizeof(boost::uint64_t)*CHAR_BIT;
-  static constexpr std::size_t block_width=sizeof(Block)*CHAR_BIT;
  static_assert(
-    (block_width&(block_width-1))==0,
-    "Block's size in bits must be a power of two");
+    is_unsigned_integral_or_extended_unsigned_integral<Block>::value||
+    (
+      is_array_of<
+        Block,is_unsigned_integral_or_extended_unsigned_integral>::value&&
+      is_power_of_two<array_size<Block>::value>::value
+    ),
+    "Block must be an (extended) unsigned integral type or an array T[N] "
+    "with T an (extended) unsigned integral type and N a power of two");
+  static constexpr std::size_t k=K;
+  static constexpr std::size_t hash_width=sizeof(std::uint64_t)*CHAR_BIT;
+  static constexpr std::size_t block_width=sizeof(Block)*CHAR_BIT;
  static constexpr std::size_t mask=block_width-1;
  static constexpr std::size_t shift=constexpr_bit_width(mask);
  static constexpr std::size_t rehash_k=(hash_width-shift)/shift;

  template<typename F>
-  static BOOST_FORCEINLINE void loop(boost::uint64_t hash,F f)
+  static BOOST_FORCEINLINE void loop(std::uint64_t hash,F f)
  {
    for(std::size_t i=0;i<k/rehash_k;++i){
      auto h=hash;
@@ -56,6 +65,25 @@ struct block_base
      f(h);
    }
  }
+
+  template<typename F>
+  static BOOST_FORCEINLINE bool loop_while(std::uint64_t hash,F f)
+  {
+    for(std::size_t i=0;i<k/rehash_k;++i){
+      auto h=hash;
+      for(std::size_t j=0;j<rehash_k;++j){
+        h>>=shift;
+        if(!f(h))return false;
+      }
+      hash=detail::mulx64(hash);
+    }
+    auto h=hash;
+    for(std::size_t i=0;i<k%rehash_k;++i){
+      h>>=shift;
+      if(!f(h))return false;
+    }
+    return true;
+  }
 };

 #if defined(BOOST_MSVC)
--- a/include/boost/bloom/detail/block_ops.hpp
+++ b/include/boost/bloom/detail/block_ops.hpp
@@ -0,0 +1,95 @@
+/* Copyright 2025 Joaquin M Lopez Munoz.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ *
+ * See https://www.boost.org/libs/bloom for library home page.
+ */
+
+#ifndef BOOST_BLOOM_DETAIL_BLOCK_OPS_HPP
+#define BOOST_BLOOM_DETAIL_BLOCK_OPS_HPP
+
+#include <boost/config.hpp>
+#include <cstdint>
+#include <type_traits>
+
+namespace boost{
+namespace bloom{
+namespace detail{
+
+#if defined(BOOST_MSVC)
+#pragma warning(push)
+#pragma warning(disable:4714) /* marked as __forceinline not inlined */
+#endif
+
+template<typename Block>
+struct block_ops
+{
+  using is_extended_block=std::false_type;
+  using value_type=Block;
+
+  static BOOST_FORCEINLINE void zero(Block& x)
+  {
+    x=0;
+  }
+
+  static BOOST_FORCEINLINE void set(value_type& x,std::uint64_t n)
+  {
+    x|=Block(1)<<n;
+  }
+
+  static BOOST_FORCEINLINE int get_at_lsb(const value_type& x,std::uint64_t n)
+  {
+    return static_cast<int>(x>>n);
+  }
+
+  static BOOST_FORCEINLINE void reduce(
+    int& res,const value_type& x,std::uint64_t n)
+  {
+    res&=get_at_lsb(x,n);
+  }
+
+  static BOOST_FORCEINLINE bool testc(const value_type& x,const value_type& y)
+  {
+    return (x&y)==y;
+  }
+};
+
+template<typename Block,std::size_t N>
+struct block_ops<Block[N]>
+{
+  using is_extended_block=std::true_type;
+  using value_type=Block[N];
+
+  static BOOST_FORCEINLINE void zero(value_type& x)
+  {
+    for(std::size_t i=0;i<N;++i)x[i]=0;
+  }
+
+  static BOOST_FORCEINLINE void set(value_type& x,std::uint64_t n)
+  {
+    x[n%N]|=Block(1)<<(n/N);
+  }
+
+  static BOOST_FORCEINLINE int get_at_lsb(const value_type& x,std::uint64_t n)
+  {
+    return static_cast<int>(x[n%N]>>(n/N));
+  }
+
+  static BOOST_FORCEINLINE void reduce(
+    int& res,const value_type& x,std::uint64_t n)
+  {
+    res&=get_at_lsb(x,n);
+  }
+};
+
+#if defined(BOOST_MSVC)
+#pragma warning(pop) /* C4714 */
+#endif
+
+
+} /* namespace detail */
+} /* namespace bloom */
+} /* namespace boost */
+
+#endif
--- a/include/boost/bloom/detail/constexpr_bit_width.hpp
+++ b/include/boost/bloom/detail/constexpr_bit_width.hpp
@@ -17,7 +17,7 @@ namespace detail{

 /* boost::core::bit_width is not always C++11 constexpr */

-inline constexpr std::size_t constexpr_bit_width(std::size_t x) 
+constexpr std::size_t constexpr_bit_width(std::size_t x) 
 {
  return x?1+constexpr_bit_width(x>>1):0;
 }
--- a/include/boost/bloom/detail/core.hpp
+++ b/include/boost/bloom/detail/core.hpp
@@ -19,9 +19,10 @@
 #include <boost/core/allocator_traits.hpp>
 #include <boost/core/empty_value.hpp>
 #include <boost/core/span.hpp>
-#include <boost/cstdint.hpp>
 #include <boost/throw_exception.hpp>
+#include <climits>
 #include <cmath>
+#include <cstdint>
 #include <cstring>
 #include <limits>
 #include <memory>
@@ -60,45 +61,46 @@ namespace detail{
 #pragma warning(disable:4714) /* marked as __forceinline not inlined */
 #endif

-/*  mcg_and_fastrange produces (pos,hash') from hash, where
- *   - m=mulx64(hash,range), mulx64 denotes extended multiplication
- *   - pos=high(m)
- *   - hash'=low(m)
- *  pos is uniformly distributed in [0,range) (see
- *  https://arxiv.org/pdf/1805.10941), whereas hash'<-hash is a multiplicative
- *  congruential generator of the form hash'<-hash*rng mod 2^64. This MCG
- *  generates long cycles when the initial value of hash is odd and
- *  rng = +-3 (mod 8), which is why we adjust hash and rng as seen below. As a
- *  result, the low bits of hash' are of poor quality, and the least
- *  significant bit in particular is always one.
+/* fastrange_and_mcg produces (pos,hash') from hash as follows:
+ *   - pos=high(mulx64(hash,range))
+ *   - hash'=c*m
+ * pos is uniformly distributed in [0,range) (see Lemire 2018
+ * https://arxiv.org/pdf/1805.10941), whereas hash'<-hash is a multiplicative
+ * congruential generator using well-behaved multipliers c from Steele and
+ * Vigna 2021 https://arxiv.org/pdf/2001.05304 . To ensure the MCG generates
+ * long cycles the initial value of hash is adjusted to be odd, which implies
+ * that the least significant of hash' is always one. In general, the low bits
+ * of MCG-produced values are of low quality and we don't use them downstream.
 */

-struct mcg_and_fastrange
+struct fastrange_and_mcg
 {
-  constexpr mcg_and_fastrange(std::size_t m)noexcept:
-    rng{
-      m+(
-        (m%8<=3)?3-(m%8):
-        (m%8<=5)?5-(m%8):
-                 8-(m%8)+3)
-    }
-    {}
+  constexpr fastrange_and_mcg(std::size_t m)noexcept:rng{m}{}

+  /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
  inline constexpr std::size_t range()const noexcept{return (std::size_t)rng;}

-  inline void prepare_hash(boost::uint64_t& hash)const noexcept
+  /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+  inline void prepare_hash(std::uint64_t& hash)const noexcept
  {
    hash|=1u;
  }

-  inline std::size_t next_position(boost::uint64_t& hash)const noexcept
+  /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+  inline std::size_t next_position(std::uint64_t& hash)const noexcept
  {
    boost::uint64_t hi;
-    hash=umul128(hash,rng,hi);
+    umul128(hash,rng,hi);
+
+#if ((((SIZE_MAX>>16)>>16)>>16)>>15)!=0 /* 64-bit mode (or higher) */
+    hash*=0xf1357aea2e62a9c5ull;
+#else /* 32-bit mode */
+    hash*=0xe817fb2d;
+#endif
    return (std::size_t)hi;
  }

-  boost::uint64_t rng;
+  std::uint64_t rng;
 };

 /* used_value_size<Subfilter>::value is Subfilter::used_value_size if it
@@ -124,7 +126,7 @@ struct used_value_size<

 /* GCD with x,p > 1, p a power of two */

-inline constexpr std::size_t gcd_pow2(std::size_t x,std::size_t p)
+constexpr std::size_t gcd_pow2(std::size_t x,std::size_t p)
 {
  /* x&-x: maximum power of two dividing x */
  return (x&(0-x))<p?(x&(0-x)):p;
@@ -132,7 +134,7 @@ inline constexpr std::size_t gcd_pow2(std::size_t x,std::size_t p)

 /* std::ldexp is not constexpr in C++11 */

-inline constexpr double constexpr_ldexp_1_positive(int exp)
+constexpr double constexpr_ldexp_1_positive(int exp)
 {
  return exp==0?1.0:2.0*constexpr_ldexp_1_positive(exp-1);
 }
@@ -140,7 +142,7 @@ inline constexpr double constexpr_ldexp_1_positive(int exp)
 struct filter_array
 {
  unsigned char* data;
-  unsigned char* buckets; /* adjusted from data for proper alignment */
+  unsigned char* array; /* adjusted from data for proper alignment */
 };

 struct if_constexpr_void_else{void operator()()const{}};
@@ -170,7 +172,7 @@ template<bool B,typename T,typename std::enable_if<!B>::type* =nullptr>
 void swap_if(T&,T&){}

 template<
-  std::size_t K,typename Subfilter,std::size_t BucketSize,typename Allocator
+  std::size_t K,typename Subfilter,std::size_t Stride,typename Allocator
 >
 class filter_core:empty_value<Allocator,0>
 {
@@ -192,23 +194,22 @@ private:
    detail::used_value_size<subfilter>::value;

 public:
-  static constexpr std::size_t bucket_size=
-    BucketSize?BucketSize:used_value_size;
+  static constexpr std::size_t stride=Stride?Stride:used_value_size;
  static_assert(
-    bucket_size<=used_value_size,"BucketSize can't exceed the block size");
+    stride<=used_value_size,"Stride can't exceed the block size");

 private:
-  static constexpr std::size_t tail_size=sizeof(block_type)-bucket_size;
+  static constexpr std::size_t tail_size=sizeof(block_type)-stride;
  static constexpr bool are_blocks_aligned=
-    (bucket_size%alignof(block_type)==0);
+    (stride%alignof(block_type)==0);
  static constexpr std::size_t cacheline=64; /* unknown at compile time */
  static constexpr std::size_t initial_alignment=
    are_blocks_aligned?
      alignof(block_type)>cacheline?alignof(block_type):cacheline:
      1;
  static constexpr std::size_t prefetched_cachelines=
-    1+(block_size+cacheline-1-gcd_pow2(bucket_size,cacheline))/cacheline;
-  using hash_strategy=detail::mcg_and_fastrange;
+    1+(block_size+cacheline-1-gcd_pow2(stride,cacheline))/cacheline;
+  using hash_strategy=detail::fastrange_and_mcg;

 public:
  using allocator_type=Allocator;
@@ -362,15 +363,15 @@ public:

  boost::span<unsigned char> array()noexcept
  {
-    return {ar.data?ar.buckets:nullptr,capacity()/CHAR_BIT};
+    return {ar.data?ar.array:nullptr,capacity()/CHAR_BIT};
  }

  boost::span<const unsigned char> array()const noexcept
  {
-    return {ar.data?ar.buckets:nullptr,capacity()/CHAR_BIT};
+    return {ar.data?ar.array:nullptr,capacity()/CHAR_BIT};
  }

-  BOOST_FORCEINLINE void insert(boost::uint64_t hash)
+  BOOST_FORCEINLINE void insert(std::uint64_t hash)
  {
    hs.prepare_hash(hash);
    for(auto n=k;n--;){
@@ -438,7 +439,7 @@ public:
    return *this;
  }

-  BOOST_FORCEINLINE bool may_contain(boost::uint64_t hash)const
+  BOOST_FORCEINLINE bool may_contain(std::uint64_t hash)const
  {
    hs.prepare_hash(hash);
 #if 1
@@ -464,7 +465,7 @@ public:
  {
    if(x.range()!=y.range())return false;
    else if(!x.ar.data)return true;
-    else return std::memcmp(x.ar.buckets,y.ar.buckets,x.used_array_size())==0;
+    else return std::memcmp(x.ar.array,y.ar.array,x.used_array_size())==0;
  }

 private:
@@ -475,25 +476,25 @@ private:

  static std::size_t requested_range(std::size_t m)
  {
-    if(m>(used_value_size-bucket_size)*CHAR_BIT){
+    if(m>(used_value_size-stride)*CHAR_BIT){
      /* ensures filter_core{f.capacity()}.capacity()==f.capacity() */
-      m-=(used_value_size-bucket_size)*CHAR_BIT;
+      m-=(used_value_size-stride)*CHAR_BIT;
    }
    return
-      (std::numeric_limits<std::size_t>::max)()-m>=bucket_size*CHAR_BIT-1?
-      (m+bucket_size*CHAR_BIT-1)/(bucket_size*CHAR_BIT):
-      m/(bucket_size*CHAR_BIT);
+      (std::numeric_limits<std::size_t>::max)()-m>=stride*CHAR_BIT-1?
+      (m+stride*CHAR_BIT-1)/(stride*CHAR_BIT):
+      m/(stride*CHAR_BIT);
  }

  static filter_array new_array(allocator_type& al,std::size_t rng)
  {
    if(rng){
      auto p=allocator_allocate(al,space_for(rng));
-      return {p,buckets_for(p)};
+      return {p,array_for(p)};
    }
    else{
      /* To avoid dynamic allocation for zero capacity or moved-from filters,
-       * we point buckets to a statically allocated dummy array with all bits
+       * we point array to a statically allocated dummy array with all bits
       * set to one. This is good for read operations but not so for write
       * operations, where we need to resort to a null check on
       * filter_array::data.
@@ -502,7 +503,7 @@ private:
      static struct {unsigned char x=-1;}
      dummy[space_for(hash_strategy{0}.range())];

-      return {nullptr,buckets_for(reinterpret_cast<unsigned char*>(&dummy))};
+      return {nullptr,array_for(reinterpret_cast<unsigned char*>(&dummy))};
    }
  }

@@ -513,13 +514,13 @@ private:

  void clear_bytes()noexcept
  {
-    std::memset(ar.buckets,0,used_array_size());
+    std::memset(ar.array,0,used_array_size());
  }

  void copy_bytes(const filter_core& x)
  {
    BOOST_ASSERT(range()==x.range());
-    std::memcpy(ar.buckets,x.ar.buckets,used_array_size());
+    std::memcpy(ar.array,x.ar.array,used_array_size());
  }

  std::size_t range()const noexcept
@@ -529,14 +530,14 @@ private:

  static constexpr std::size_t space_for(std::size_t rng)noexcept
  {
-    return (initial_alignment-1)+rng*bucket_size+tail_size;
+    return (initial_alignment-1)+rng*stride+tail_size;
  }

-  static unsigned char* buckets_for(unsigned char* p)noexcept
+  static unsigned char* array_for(unsigned char* p)noexcept
  {
    return p+
-      (boost::uintptr_t(initial_alignment)-
-       boost::uintptr_t(p))%initial_alignment;
+      (std::uintptr_t(initial_alignment)-
+       std::uintptr_t(p))%initial_alignment;
  }

  std::size_t used_array_size()const noexcept
@@ -546,7 +547,7 @@ private:

  static std::size_t used_array_size(std::size_t rng)noexcept
  {
-    return rng?rng*bucket_size+(used_value_size-bucket_size):0;
+    return rng?rng*stride+(used_value_size-stride):0;
  }

  static std::size_t unadjusted_capacity_for(std::size_t n,double fpr)
@@ -609,7 +610,7 @@ private:

  static double fpr_for_c(double c)
  {
-    constexpr std::size_t w=(2*used_value_size-bucket_size)*CHAR_BIT;
+    constexpr std::size_t w=(2*used_value_size-stride)*CHAR_BIT;
    const double          lambda=w*k/c;
    const double          loglambda=std::log(lambda);
    double                res=0.0;
@@ -639,20 +640,20 @@ private:
      std::pow(1.0-std::exp(-(double)k_total/c),(double)k_total));
  }

-  BOOST_FORCEINLINE bool get(const unsigned char* p,boost::uint64_t hash)const
+  BOOST_FORCEINLINE bool get(const unsigned char* p,std::uint64_t hash)const
  {
    return get(p,hash,std::integral_constant<bool,are_blocks_aligned>{});
  }

  BOOST_FORCEINLINE bool get(
-    const unsigned char* p,boost::uint64_t hash,
+    const unsigned char* p,std::uint64_t hash,
    std::true_type /* blocks aligned */)const
  {
    return subfilter::check(*reinterpret_cast<const block_type*>(p),hash);
  }

  BOOST_FORCEINLINE bool get(
-    const unsigned char* p,boost::uint64_t hash,
+    const unsigned char* p,std::uint64_t hash,
    std::false_type /* blocks not aligned */)const
  {
    block_type x;
@@ -660,20 +661,20 @@ private:
    return subfilter::check(x,hash);
  }

-  BOOST_FORCEINLINE void set(unsigned char* p,boost::uint64_t hash)
+  BOOST_FORCEINLINE void set(unsigned char* p,std::uint64_t hash)
  {
    return set(p,hash,std::integral_constant<bool,are_blocks_aligned>{});
  }

  BOOST_FORCEINLINE void set(
-    unsigned char* p,boost::uint64_t hash,
+    unsigned char* p,std::uint64_t hash,
    std::true_type /* blocks aligned */)
  {
    subfilter::mark(*reinterpret_cast<block_type*>(p),hash);
  }

  BOOST_FORCEINLINE void set(
-    unsigned char* p,boost::uint64_t hash,
+    unsigned char* p,std::uint64_t hash,
    std::false_type /* blocks not aligned */)
  {
    block_type x;
@@ -683,9 +684,9 @@ private:
  }

  BOOST_FORCEINLINE 
-  unsigned char* next_element(boost::uint64_t& h)noexcept
+  unsigned char* next_element(std::uint64_t& h)noexcept
  {
-    auto p=ar.buckets+hs.next_position(h)*bucket_size;
+    auto p=ar.array+hs.next_position(h)*stride;
    for(std::size_t i=0;i<prefetched_cachelines;++i){
      BOOST_BLOOM_PREFETCH_WRITE((unsigned char*)p+i*cacheline);
    }
@@ -693,9 +694,9 @@ private:
  }

  BOOST_FORCEINLINE
-  const unsigned char* next_element(boost::uint64_t& h)const noexcept
+  const unsigned char* next_element(std::uint64_t& h)const noexcept
  {
-    auto p=ar.buckets+hs.next_position(h)*bucket_size;
+    auto p=ar.array+hs.next_position(h)*stride;
    for(std::size_t i=0;i<prefetched_cachelines;++i){
      BOOST_BLOOM_PREFETCH((unsigned char*)p+i*cacheline);
    }
@@ -708,9 +709,9 @@ private:
    if(range()!=x.range()){
      BOOST_THROW_EXCEPTION(std::invalid_argument("incompatible filters"));
    }
-    auto first0=ar.buckets,
+    auto first0=ar.array,
         last0=first0+used_array_size(),
-         first1=x.ar.buckets;
+         first1=x.ar.array;
    while(first0!=last0)f(*first0++,*first1++);
  }

--- a/include/boost/bloom/detail/fast_multiblock32_avx2.hpp
+++ b/include/boost/bloom/detail/fast_multiblock32_avx2.hpp
@@ -13,8 +13,8 @@
 #include <boost/bloom/detail/multiblock_fpr_base.hpp>
 #include <boost/bloom/detail/mulx64.hpp>
 #include <boost/config.hpp>
-#include <boost/cstdint.hpp>
 #include <cstddef>
+#include <cstdint>

 namespace boost{
 namespace bloom{
@@ -29,9 +29,9 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
 {
  static constexpr std::size_t k=K;
  using value_type=__m256i[(k+7)/8];
-  static constexpr std::size_t used_value_size=sizeof(boost::uint32_t)*k;
+  static constexpr std::size_t used_value_size=sizeof(std::uint32_t)*k;

-  static BOOST_FORCEINLINE void mark(value_type& x,boost::uint64_t hash)
+  static BOOST_FORCEINLINE void mark(value_type& x,std::uint64_t hash)
  {
    for(std::size_t i=0;i<k/8;++i){
      mark_m256i(x[i],hash,8);
@@ -42,7 +42,7 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
    }
  }

-  static BOOST_FORCEINLINE bool check(const value_type& x,boost::uint64_t hash)
+  static BOOST_FORCEINLINE bool check(const value_type& x,std::uint64_t hash)
  {
    for(std::size_t i=0;i<k/8;++i){
      if(!check_m256i(x[i],hash,8))return false;
@@ -56,7 +56,7 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>

 private:
  static BOOST_FORCEINLINE __m256i make_m256i(
-    boost::uint64_t hash,std::size_t kp)
+    std::uint64_t hash,std::size_t kp)
  {
    const __m256i ones[8]={
      _mm256_set_epi32(0,0,0,0,0,0,0,1),
@@ -76,14 +76,14 @@ private:
  }

  static BOOST_FORCEINLINE void mark_m256i(
-    __m256i& x,boost::uint64_t hash,std::size_t kp)
+    __m256i& x,std::uint64_t hash,std::size_t kp)
  {
    __m256i h=make_m256i(hash,kp);
    x=_mm256_or_si256(x,h);
  }

  static BOOST_FORCEINLINE bool check_m256i(
-    const __m256i& x,boost::uint64_t hash,std::size_t kp)
+    const __m256i& x,std::uint64_t hash,std::size_t kp)
  {
    __m256i h=make_m256i(hash,kp);
    return _mm256_testc_si256(x,h);
--- a/include/boost/bloom/detail/fast_multiblock32_neon.hpp
+++ b/include/boost/bloom/detail/fast_multiblock32_neon.hpp
@@ -13,8 +13,8 @@
 #include <boost/bloom/detail/mulx64.hpp>
 #include <boost/bloom/detail/neon.hpp>
 #include <boost/config.hpp>
-#include <boost/cstdint.hpp>
 #include <cstddef>
+#include <cstdint>

 namespace boost{
 namespace bloom{
@@ -28,11 +28,11 @@ namespace bloom{

 #ifdef _MSC_VER
 #define BOOST_BLOOM_INIT_U32X4(w,x,y,z)            \
-{(boost::uint32_t(w)+(unsigned long long(x)<<32)), \
- (boost::uint32_t(y)+(unsigned long long(z)<<32))}
+{(std::uint32_t(w)+(unsigned long long(x)<<32)), \
+ (std::uint32_t(y)+(unsigned long long(z)<<32))}
 #else
 #define BOOST_BLOOM_INIT_U32X4(w,x,y,z) \
-{boost::uint32_t(w),boost::uint32_t(x),boost::uint32_t(y),boost::uint32_t(z)}
+{std::uint32_t(w),std::uint32_t(x),std::uint32_t(y),std::uint32_t(z)}
 #endif

 #define BOOST_BLOOM_INIT_U32X4X2(w0,x0,y0,z0,w1,x1,y1,z1) \
@@ -43,9 +43,9 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
 {
  static constexpr std::size_t k=K;
  using value_type=uint32x4x2_t[(k+7)/8];
-  static constexpr std::size_t used_value_size=sizeof(boost::uint32_t)*k;
+  static constexpr std::size_t used_value_size=sizeof(std::uint32_t)*k;

-  static BOOST_FORCEINLINE void mark(value_type& x,boost::uint64_t hash)
+  static BOOST_FORCEINLINE void mark(value_type& x,std::uint64_t hash)
  {
    for(std::size_t i=0;i<k/8;++i){
      mark_uint32x4x2_t(x[i],hash,8);
@@ -56,7 +56,7 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
    }
  }

-  static BOOST_FORCEINLINE bool check(const value_type& x,boost::uint64_t hash)
+  static BOOST_FORCEINLINE bool check(const value_type& x,std::uint64_t hash)
  {
    for(std::size_t i=0;i<k/8;++i){
      if(!check_uint32x4x2_t(x[i],hash,8))return false;
@@ -70,7 +70,7 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>

 private:
  static BOOST_FORCEINLINE uint32x4x2_t make_uint32x4x2_t(
-    boost::uint64_t hash,std::size_t kp)
+    std::uint64_t hash,std::size_t kp)
  {
    static const uint32x4x2_t ones[8]={
      BOOST_BLOOM_INIT_U32X4X2(1,0,0,0,0,0,0,0),
@@ -101,7 +101,7 @@ private:
  }

  static BOOST_FORCEINLINE void mark_uint32x4x2_t(
-    uint32x4x2_t& x,boost::uint64_t hash,std::size_t kp)
+    uint32x4x2_t& x,std::uint64_t hash,std::size_t kp)
  {
    uint32x4x2_t h=make_uint32x4x2_t(hash,kp);
    x.val[0]=vorrq_u32(x.val[0],h.val[0]);
@@ -109,7 +109,7 @@ private:
  }

  static BOOST_FORCEINLINE bool check_uint32x4x2_t(
-    const uint32x4x2_t& x,boost::uint64_t hash,std::size_t kp)
+    const uint32x4x2_t& x,std::uint64_t hash,std::size_t kp)
  {
    uint32x4x2_t h=make_uint32x4x2_t(hash,kp);
    uint32x4_t   lo=vtstq_u32(x.val[0],h.val[0]);
--- a/include/boost/bloom/detail/fast_multiblock32_sse2.hpp
+++ b/include/boost/bloom/detail/fast_multiblock32_sse2.hpp
@@ -13,8 +13,8 @@
 #include <boost/bloom/detail/mulx64.hpp>
 #include <boost/bloom/detail/sse2.hpp>
 #include <boost/config.hpp>
-#include <boost/cstdint.hpp>
 #include <cstddef>
+#include <cstdint>

 #ifdef __SSE4_1__
 #include <smmintrin.h>
@@ -35,6 +35,7 @@ struct m128ix2
  __m128i lo,hi;
 };

+/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
 static inline int mm_testc_si128(__m128i x,__m128i y)
 {
 #ifdef __SSE4_1__
@@ -51,9 +52,9 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
 {
  static constexpr std::size_t k=K;
  using value_type=detail::m128ix2[(k+7)/8];
-  static constexpr std::size_t used_value_size=sizeof(boost::uint32_t)*k;
+  static constexpr std::size_t used_value_size=sizeof(std::uint32_t)*k;

-  static BOOST_FORCEINLINE void mark(value_type& x,boost::uint64_t hash)
+  static BOOST_FORCEINLINE void mark(value_type& x,std::uint64_t hash)
  {
    for(std::size_t i=0;i<k/8;++i){
      mark_m128ix2(x[i],hash,8);
@@ -64,7 +65,7 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>
    }
  }

-  static BOOST_FORCEINLINE bool check(const value_type& x,boost::uint64_t hash)
+  static BOOST_FORCEINLINE bool check(const value_type& x,std::uint64_t hash)
  {
    for(std::size_t i=0;i<k/8;++i){
      if(!check_m128ix2(x[i],hash,8))return false;
@@ -78,10 +79,10 @@ struct fast_multiblock32:detail::multiblock_fpr_base<K>

 private:
  static BOOST_FORCEINLINE detail::m128ix2 make_m128ix2(
-    boost::uint64_t hash,std::size_t kp)
+    std::uint64_t hash,std::size_t kp)
  {
-    const boost::uint32_t mask=boost::uint32_t(31)<<23,
-                          exp=boost::uint32_t(127)<<23;
+    const std::uint32_t mask=std::uint32_t(31)<<23,
+                          exp=std::uint32_t(127)<<23;
    const __m128i exps[4]={
      _mm_set_epi32( 0 , 0 , 0 ,exp),
      _mm_set_epi32( 0 , 0 ,exp,exp),
@@ -113,7 +114,7 @@ private:
  }

  static BOOST_FORCEINLINE void mark_m128ix2(
-    detail::m128ix2& x,boost::uint64_t hash,std::size_t kp)
+    detail::m128ix2& x,std::uint64_t hash,std::size_t kp)
  {
    detail::m128ix2 h=make_m128ix2(hash,kp);
    x.lo=_mm_or_si128(x.lo,h.lo);
@@ -121,7 +122,7 @@ private:
  }

  static BOOST_FORCEINLINE bool check_m128ix2(
-    const detail::m128ix2& x,boost::uint64_t hash,std::size_t kp)
+    const detail::m128ix2& x,std::uint64_t hash,std::size_t kp)
  {
    detail::m128ix2 h=make_m128ix2(hash,kp);
    auto res=detail::mm_testc_si128(x.lo,h.lo);
--- a/include/boost/bloom/detail/fast_multiblock64_avx2.hpp
+++ b/include/boost/bloom/detail/fast_multiblock64_avx2.hpp
@@ -13,8 +13,8 @@
 #include <boost/bloom/detail/multiblock_fpr_base.hpp>
 #include <boost/bloom/detail/mulx64.hpp>
 #include <boost/config.hpp>
-#include <boost/cstdint.hpp>
 #include <cstddef>
+#include <cstdint>

 namespace boost{
 namespace bloom{
@@ -38,9 +38,9 @@ struct fast_multiblock64:detail::multiblock_fpr_base<K>
 {
  static constexpr std::size_t k=K;
  using value_type=detail::m256ix2[(k+7)/8];
-  static constexpr std::size_t used_value_size=sizeof(boost::uint64_t)*k;
+  static constexpr std::size_t used_value_size=sizeof(std::uint64_t)*k;

-  static BOOST_FORCEINLINE void mark(value_type& x,boost::uint64_t hash)
+  static BOOST_FORCEINLINE void mark(value_type& x,std::uint64_t hash)
  {
    for(int i=0;i<k/8;++i){
      mark_m256ix2(x[i],hash,8);
@@ -51,7 +51,7 @@ struct fast_multiblock64:detail::multiblock_fpr_base<K>
    }
  }

-  static BOOST_FORCEINLINE bool check(const value_type& x,boost::uint64_t hash)
+  static BOOST_FORCEINLINE bool check(const value_type& x,std::uint64_t hash)
  {
    for(int i=0;i<k/8;++i){
      if(!check_m256ix2(x[i],hash,8))return false;
@@ -65,7 +65,7 @@ struct fast_multiblock64:detail::multiblock_fpr_base<K>

 private:
  static BOOST_FORCEINLINE detail::m256ix2 make_m256ix2(
-    boost::uint64_t hash,std::size_t kp)
+    std::uint64_t hash,std::size_t kp)
  {
    const detail::m256ix2 ones[8]={
      {_mm256_set_epi64x(0,0,0,1),_mm256_set_epi64x(0,0,0,0)},
@@ -92,7 +92,7 @@ private:
  }

  static BOOST_FORCEINLINE void mark_m256ix2(
-    detail::m256ix2& x,boost::uint64_t hash,std::size_t kp)
+    detail::m256ix2& x,std::uint64_t hash,std::size_t kp)
  {
    detail::m256ix2 h=make_m256ix2(hash,kp);
    x.lo=_mm256_or_si256(x.lo,h.lo);
@@ -100,7 +100,7 @@ private:
  }

  static BOOST_FORCEINLINE bool check_m256ix2(
-    const detail::m256ix2& x,boost::uint64_t hash,std::size_t kp)
+    const detail::m256ix2& x,std::uint64_t hash,std::size_t kp)
  {
    detail::m256ix2 h=make_m256ix2(hash,kp);
    auto res=_mm256_testc_si256(x.lo,h.lo);
--- a/include/boost/bloom/detail/mulx64.hpp
+++ b/include/boost/bloom/detail/mulx64.hpp
@@ -10,9 +10,9 @@
 #ifndef BOOST_BLOOM_DETAIL_MULX64_HPP
 #define BOOST_BLOOM_DETAIL_MULX64_HPP

-#include <boost/cstdint.hpp>
 #include <climits>
 #include <cstddef>
+#include <cstdint>

 #if defined(_MSC_VER)&&!defined(__clang__)
 #include <intrin.h>
@@ -24,16 +24,16 @@ namespace detail{

 #if defined(_MSC_VER)&&defined(_M_X64)&&!defined(__clang__)

-__forceinline boost::uint64_t umul128(
-  boost::uint64_t x,boost::uint64_t y,boost::uint64_t& hi)
+__forceinline std::uint64_t umul128(
+  std::uint64_t x,std::uint64_t y,std::uint64_t& hi)
 {
  return _umul128(x,y,&hi);
 }

 #elif defined(_MSC_VER)&&defined(_M_ARM64)&&!defined(__clang__)

-__forceinline boost::uint64_t umul128(
-  boost::uint64_t x,boost::uint64_t y,boost::uint64_t& hi)
+__forceinline std::uint64_t umul128(
+  std::uint64_t x,std::uint64_t y,std::uint64_t& hi)
 {
  hi=__umulh(x,y);
  return x*y;
@@ -41,40 +41,42 @@ __forceinline boost::uint64_t umul128(

 #elif defined(__SIZEOF_INT128__)

-inline boost::uint64_t umul128(
-  boost::uint64_t x,boost::uint64_t y,boost::uint64_t& hi)
+/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+inline std::uint64_t umul128(
+  std::uint64_t x,std::uint64_t y,std::uint64_t& hi)
 {
  __uint128_t r=(__uint128_t)x*y;
-  hi=(boost::uint64_t)(r>>64);
-  return (boost::uint64_t)r;
+  hi=(std::uint64_t)(r>>64);
+  return (std::uint64_t)r;
 }

 #else

-inline boost::uint64_t umul128(
-  boost::uint64_t x,boost::uint64_t y,boost::uint64_t& hi)
+/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+inline std::uint64_t umul128(
+  std::uint64_t x,std::uint64_t y,std::uint64_t& hi)
 {
-  boost::uint64_t x1=(boost::uint32_t)x;
-  boost::uint64_t x2=x >> 32;
+  std::uint64_t x1=(std::uint32_t)x;
+  std::uint64_t x2=x >> 32;

-  boost::uint64_t y1=(boost::uint32_t)y;
-  boost::uint64_t y2=y >> 32;
+  std::uint64_t y1=(std::uint32_t)y;
+  std::uint64_t y2=y >> 32;

-  boost::uint64_t r3=x2*y2;
+  std::uint64_t r3=x2*y2;

-  boost::uint64_t r2a=x1*y2;
+  std::uint64_t r2a=x1*y2;

  r3+=r2a>>32;

-  boost::uint64_t r2b=x2*y1;
+  std::uint64_t r2b=x2*y1;

  r3+=r2b>>32;

-  boost::uint64_t r1=x1*y1;
+  std::uint64_t r1=x1*y1;

-  boost::uint64_t r2=(r1>>32)+(boost::uint32_t)r2a+(boost::uint32_t)r2b;
+  std::uint64_t r2=(r1>>32)+(std::uint32_t)r2a+(std::uint32_t)r2b;

-  r1=(r2<<32)+(boost::uint32_t)r1;
+  r1=(r2<<32)+(std::uint32_t)r1;
  r3+=r2>>32;

  hi=r3;
@@ -83,11 +85,12 @@ inline boost::uint64_t umul128(

 #endif

-inline boost::uint64_t mulx64(boost::uint64_t x)noexcept
+/* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+inline std::uint64_t mulx64(std::uint64_t x)noexcept
 {
  /* multiplier is 2^64/phi */
-  boost::uint64_t hi;
-  boost::uint64_t lo=umul128(x,0x9E3779B97F4A7C15ull,hi);
+  std::uint64_t hi;
+  std::uint64_t lo=umul128(x,0x9E3779B97F4A7C15ull,hi);
  return hi^lo;
 }

--- a/include/boost/bloom/detail/type_traits.hpp
+++ b/include/boost/bloom/detail/type_traits.hpp
@@ -11,7 +11,9 @@
 #ifndef BOOST_BLOOM_DETAIL_TYPE_TRAITS_HPP
 #define BOOST_BLOOM_DETAIL_TYPE_TRAITS_HPP

+#include <boost/config.hpp>
 #include <boost/type_traits/make_void.hpp>
+#include <cstddef>
 #include <type_traits>
 #include <utility>

@@ -86,6 +88,54 @@ template<typename T,class Q=void>
 using enable_if_transparent_t=
  typename std::enable_if<is_transparent<T>::value,Q>::type;

+template<typename T>
+struct is_integral_or_extended_integral:std::is_integral<T>{};
+template<typename T>
+struct is_unsigned_or_extended_unsigned:std::is_unsigned<T>{};
+
+#if defined(__SIZEOF_INT128__)
+
+#if defined(BOOST_GCC)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+
+template<>
+struct is_integral_or_extended_integral<__int128>:std::true_type{};
+template<>
+struct is_integral_or_extended_integral<unsigned __int128>:std::true_type{};
+template<>
+struct is_unsigned_or_extended_unsigned<unsigned __int128>:std::true_type{};
+
+#if defined(BOOST_GCC)
+#pragma GCC diagnostic pop
+#endif
+
+#endif
+
+template<typename T>
+struct is_unsigned_integral_or_extended_unsigned_integral:
+  std::integral_constant<
+    bool,
+    is_integral_or_extended_integral<T>::value&&
+    is_unsigned_or_extended_unsigned<T>::value
+  >
+{};
+
+template<typename T,template <typename...> class Trait>
+struct is_array_of:std::false_type{};
+
+template<typename T,std::size_t N,template <typename...> class Trait>
+struct is_array_of<T[N],Trait>:Trait<T>{};
+
+template<typename T> struct array_size:
+  std::integral_constant<std::size_t,0>{};
+template<typename T,std::size_t N> struct array_size<T[N]>:
+  std::integral_constant<std::size_t,N>{};
+
+template<std::size_t N>
+struct is_power_of_two:std::integral_constant<bool,(N!=0)&&((N&(N-1))==0)>{};
+
 } /* namespace detail */
 } /* namespace bloom */
 } /* namespace boost */
--- a/include/boost/bloom/fast_multiblock32.hpp
+++ b/include/boost/bloom/fast_multiblock32.hpp
@@ -21,14 +21,14 @@
 #include <boost/bloom/detail/fast_multiblock32_neon.hpp>
 #else /* fallback */
 #include <boost/bloom/multiblock.hpp>
-#include <boost/cstdint.hpp>
 #include <cstddef>
+#include <cstdint>

 namespace boost{
 namespace bloom{

 template<std::size_t K>
-using fast_multiblock32=multiblock<boost::uint32_t,K>;
+using fast_multiblock32=multiblock<std::uint32_t,K>;

 } /* namespace bloom */
 } /* namespace boost */
--- a/include/boost/bloom/fast_multiblock64.hpp
+++ b/include/boost/bloom/fast_multiblock64.hpp
@@ -15,14 +15,14 @@
 #include <boost/bloom/detail/fast_multiblock64_avx2.hpp>
 #else /* fallback */
 #include <boost/bloom/multiblock.hpp>
-#include <boost/cstdint.hpp>
 #include <cstddef>
+#include <cstdint>

 namespace boost{
 namespace bloom{

 template<std::size_t K>
-using fast_multiblock64=multiblock<boost::uint64_t,K>;
+using fast_multiblock64=multiblock<std::uint64_t,K>;

 } /* namespace bloom */
 } /* namespace boost */
--- a/include/boost/bloom/filter.hpp
+++ b/include/boost/bloom/filter.hpp
@@ -17,10 +17,10 @@
 #include <boost/bloom/detail/type_traits.hpp>
 #include <boost/config.hpp>
 #include <boost/container_hash/hash.hpp>
+#include <boost/container_hash/hash_is_avalanching.hpp>
 #include <boost/core/allocator_traits.hpp>
 #include <boost/core/empty_value.hpp>
-#include <boost/cstdint.hpp>
-#include <boost/unordered/hash_traits.hpp> // TODO: internalize?
+#include <cstdint>
 #include <initializer_list>
 #include <memory>
 #include <type_traits>
@@ -37,57 +37,29 @@ namespace detail{
 * filter mixes hash results with mulx64 if the hash is not marked as
 * avalanching, i.e. it's not of good quality (see
 * <boost/unordered/hash_traits.hpp>), or if std::size_t is less than 64 bits
- * (mixing policies promote to boost::uint64_t).
+ * (mixing policies promote to std::uint64_t).
 */

 struct no_mix_policy
 {
  template<typename Hash,typename T>
-  static inline boost::uint64_t mix(const Hash& h,const T& x)
+  /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+  static inline std::uint64_t mix(const Hash& h,const T& x)
  {
-    return (boost::uint64_t)h(x);
+    return (std::uint64_t)h(x);
  }
 };

 struct mulx64_mix_policy
 {
  template<typename Hash,typename T>
-  static inline boost::uint64_t mix(const Hash& h,const T& x)
+  /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+  static inline std::uint64_t mix(const Hash& h,const T& x)
  {
-    return mulx64((boost::uint64_t)h(x));
+    return mulx64((std::uint64_t)h(x));
  }
 };

-template<typename Allocator,typename T>
-class allocator_constructed
-{
-public:
-  template<typename...Args>
-  allocator_constructed(const Allocator& al_,Args&&... args):al{al_}
-  {
-    allocator_construct(al,std::addressof(u.x),std::forward<Args>(args)...);
-  }
-
-  ~allocator_constructed()
-  {
-    allocator_destroy(al,std::addressof(u.x));
-  }
-
-  const T& value()const noexcept{return u.x;}
-
-private:
-  union uninitialized_value
-  {
-    uninitialized_value(){}
-    ~uninitialized_value(){}
-
-    T x;
-  };
-  
-  uninitialized_value u;
-  Allocator           al;
-};
-
 } /* namespace detail */

 #if defined(BOOST_MSVC)
@@ -97,8 +69,8 @@ private:

 template<
  typename T,std::size_t K,
-  typename Subfilter=block<unsigned char,1>,std::size_t BucketSize=0,
-  typename Hash=boost::hash<T>,typename Allocator=std::allocator<T>
+  typename Subfilter=block<unsigned char,1>,std::size_t Stride=0,
+  typename Hash=boost::hash<T>,typename Allocator=std::allocator<unsigned char>
 >
 class

@@ -108,20 +80,18 @@ __declspec(empty_bases) /* activate EBO with multiple inheritance */

 filter:
  detail::filter_core<
-    K,Subfilter,BucketSize,allocator_rebind_t<Allocator,unsigned char>
+    K,Subfilter,Stride,allocator_rebind_t<Allocator,unsigned char>
  >,
  empty_value<Hash,0>
 {
  BOOST_BLOOM_STATIC_ASSERT_IS_CV_UNQUALIFIED_OBJECT(T);
  static_assert(
-    std::is_same<T,allocator_value_type_t<Allocator>>::value,
-    "Allocator's value_type must be T");
-  using super=detail::filter_core<
-    K,Subfilter,BucketSize,allocator_rebind_t<Allocator,unsigned char>
-  >;
+    std::is_same<unsigned char,allocator_value_type_t<Allocator>>::value,
+    "Allocator's value_type must be unsigned char");
+  using super=detail::filter_core<K,Subfilter,Stride,Allocator>;
  using mix_policy=typename std::conditional<
-    unordered::hash_is_avalanching<Hash>::value&&
-    sizeof(std::size_t)>=sizeof(boost::uint64_t),
+    boost::hash_is_avalanching<Hash>::value&&
+    sizeof(std::size_t)>=sizeof(std::uint64_t),
    detail::no_mix_policy,
    detail::mulx64_mix_policy
  >::type;
@@ -130,7 +100,7 @@ public:
  using value_type=T;
  using super::k;
  using subfilter=typename super::subfilter;
-  using super::bucket_size;
+  using super::stride;
  using hasher=Hash;
  using allocator_type=Allocator;
  using size_type=typename super::size_type;
@@ -258,23 +228,6 @@ public:
  using super::fpr_for;
  using super::array;

-  template<typename... Args>
-  BOOST_FORCEINLINE void emplace(Args&&... args)
-  {
-    insert(detail::allocator_constructed<allocator_type,value_type>{
-      get_allocator(),std::forward<Args>(args)...}.value());
-  }
-
-  template<
-    typename U,
-    typename std::enable_if<
-      std::is_same<T,detail::remove_cvref_t<U>>::value>::type* =nullptr
-  >
-  BOOST_FORCEINLINE void emplace(U&& x)
-  {
-    insert(x); /* avoid value_type construction */
-  }
-
  BOOST_FORCEINLINE void insert(const T& x)
  {
    super::insert(hash_for(x));
@@ -292,7 +245,7 @@ public:
  template<typename InputIterator>
  void insert(InputIterator first,InputIterator last)
  {
-    while(first!=last)emplace(*first++);
+    while(first!=last)insert(*first++);
  }

  void insert(std::initializer_list<value_type> il)
@@ -346,10 +299,10 @@ public:

 private:
  template<
-    typename T1,std::size_t K1,typename S,std::size_t B,typename H,typename A
+    typename T1,std::size_t K1,typename SF,std::size_t S,typename H,typename A
  >
  bool friend operator==(
-    const filter<T1,K1,S,B,H,A>& x,const filter<T1,K1,S,B,H,A>& y);
+    const filter<T1,K1,SF,S,H,A>& x,const filter<T1,K1,SF,S,H,A>& y);

  using hash_base=empty_value<Hash,0>;

@@ -357,33 +310,34 @@ private:
  Hash& h(){return hash_base::get();}

  template<typename U>
-  inline boost::uint64_t hash_for(const U& x)const
+  /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+  inline std::uint64_t hash_for(const U& x)const
  {
    return mix_policy::mix(h(),x);
  }
 };

 template<
-  typename T,std::size_t K,typename S,std::size_t B,typename H,typename A
+  typename T,std::size_t K,typename SF,std::size_t S,typename H,typename A
 >
-bool operator==(const filter<T,K,S,B,H,A>& x,const filter<T,K,S,B,H,A>& y)
+bool operator==(const filter<T,K,SF,S,H,A>& x,const filter<T,K,SF,S,H,A>& y)
 {
-  using super=typename filter<T,K,S,B,H,A>::super;
+  using super=typename filter<T,K,SF,S,H,A>::super;
  return static_cast<const super&>(x)==static_cast<const super&>(y);
 }

 template<
-  typename T,std::size_t K,typename S,std::size_t B,typename H,typename A
+  typename T,std::size_t K,typename SF,std::size_t S,typename H,typename A
 >
-bool operator!=(const filter<T,K,S,B,H,A>& x,const filter<T,K,S,B,H,A>& y)
+bool operator!=(const filter<T,K,SF,S,H,A>& x,const filter<T,K,SF,S,H,A>& y)
 {
  return !(x==y);
 }

 template<
-  typename T,std::size_t K,typename S,std::size_t B,typename H,typename A
+  typename T,std::size_t K,typename SF,std::size_t S,typename H,typename A
 >
-void swap(filter<T,K,S,B,H,A>& x,filter<T,K,S,B,H,A>& y)
+void swap(filter<T,K,SF,S,H,A>& x,filter<T,K,SF,S,H,A>& y)
  noexcept(noexcept(x.swap(y)))
 {
  x.swap(y);
--- a/include/boost/bloom/multiblock.hpp
+++ b/include/boost/bloom/multiblock.hpp
@@ -10,31 +10,35 @@
 #define BOOST_BLOOM_MULTIBLOCK_HPP

 #include <boost/bloom/detail/block_base.hpp>
+#include <boost/bloom/detail/block_ops.hpp>
 #include <boost/bloom/detail/multiblock_fpr_base.hpp>
-#include <boost/cstdint.hpp>
 #include <cstddef>
+#include <cstdint>

 namespace boost{
 namespace bloom{

 template<typename Block,std::size_t K>
 struct multiblock:
-  private detail::block_base<Block,K>,public detail::multiblock_fpr_base<K>
+  public detail::multiblock_fpr_base<K>,
+  private detail::block_base<Block,K>
 {
  static constexpr std::size_t k=K;
  using value_type=Block[k];

-  static inline void mark(value_type& x,boost::uint64_t hash)
+  /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+  static inline void mark(value_type& x,std::uint64_t hash)
  {
    std::size_t i=0;
-    loop(hash,[&](boost::uint64_t h){x[i++]|=Block(1)<<(h&mask);});
+    loop(hash,[&](std::uint64_t h){block_ops::set(x[i++],h&mask);});
  }

-  static inline bool check(const value_type& x,boost::uint64_t hash)
+  /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
+  static inline bool check(const value_type& x,std::uint64_t hash)
  {
-    Block res=1;
+    int res=1;
    std::size_t i=0;
-    loop(hash,[&](boost::uint64_t h){res&=(x[i++]>>(h&mask));});
+    loop(hash,[&](std::uint64_t h){block_ops::reduce(res,x[i++],h&mask);});
    return res;
  }

@@ -42,6 +46,7 @@ private:
  using super=detail::block_base<Block,K>;
  using super::mask;
  using super::loop;
+  using block_ops=detail::block_ops<Block>;
 };

 } /* namespace bloom */
--- a/index.html
+++ b/index.html
@@ -8,12 +8,12 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<meta http-equiv="refresh" content="0; URL=doc/index.html">
+<meta http-equiv="refresh" content="0; URL=doc/html/bloom.html">
 <title>Boost.Bloom Documentation</title>
 </head>

 <body>
 Automatic redirection failed, please go to
-<a href="doc/index.html">doc/index.html</a>
+<a href="doc/html/bloom.html">doc/html/bloom.html</a>
 </body>
 </html>
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -0,0 +1,13 @@
+# Copyright 2018, 2019, 2021, 2022 Peter Dimov
+# Copyright 2025 Joaquin M Lopez Muñoz
+# Distributed under the Boost Software License, Version 1.0.
+# See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt
+
+include(BoostTestJamfile OPTIONAL RESULT_VARIABLE HAVE_BOOST_TEST)
+
+if(HAVE_BOOST_TEST)
+
+boost_test_jamfile(FILE Jamfile.v2
+  LINK_LIBRARIES Boost::bloom Boost::core Boost::mp11)
+
+endif()
--- a/test/Jamfile.v2
+++ b/test/Jamfile.v2
@@ -18,12 +18,11 @@ project
      <toolset>msvc:<cxxflags>-D_SCL_SECURE_NO_WARNINGS
    ;

-test-suite "bloom" :
-    [ run test_array.cpp        ]
-    [ run test_capacity.cpp     ]
-    [ run test_combination.cpp  ]
-    [ run test_comparison.cpp   ]
-    [ run test_construction.cpp ]
-    [ run test_fpr.cpp          ]
-    [ run test_insertion.cpp    ]
-    ;
+run test_array.cpp ;
+run test_boost_bloom_hpp.cpp ;
+run test_capacity.cpp ;
+run test_combination.cpp ;
+run test_comparison.cpp ;
+run test_construction.cpp ;
+run test_fpr.cpp ;
+run test_insertion.cpp ;
--- a/test/test_boost_bloom_hpp.cpp
+++ b/test/test_boost_bloom_hpp.cpp
@@ -0,0 +1,25 @@
+/* Copyright 2025 Joaquin M Lopez Munoz.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ *
+ * See https://www.boost.org/libs/bloom for library home page.
+ */
+
+#include <boost/bloom.hpp>
+#include <boost/core/lightweight_test.hpp>
+
+struct use_types
+{
+  using type1=boost::bloom::filter<int,1>;
+  using type2=boost::bloom::block<unsigned char,1>;
+  using type3=boost::bloom::multiblock<unsigned char,1>;
+  using type4=boost::bloom::fast_multiblock32<1>;
+  using type5=boost::bloom::fast_multiblock64<1>;
+};
+
+int main()
+{
+  (void)use_types{};
+  return boost::report_errors();
+}
--- a/test/test_capacity.cpp
+++ b/test/test_capacity.cpp
@@ -43,9 +43,7 @@ struct counting_allocator
 template<typename Filter,typename ValueFactory>
 void test_capacity()
 {
-  using filter=realloc_filter<
-    Filter,counting_allocator<typename Filter::value_type>
-  >;
+  using filter=realloc_filter<Filter,counting_allocator<unsigned char>>;

  ValueFactory fac;

--- a/test/test_construction.cpp
+++ b/test/test_construction.cpp
@@ -88,7 +88,7 @@ void test_pocxx()
  static constexpr auto always_equal=AlwaysEqual::value;
  using filter=realloc_filter<
    rehash_filter<Filter,stateful<typename Filter::hasher>>,
-    stateful_allocator<typename Filter::value_type,Propagate,AlwaysEqual>
+    stateful_allocator<unsigned char,Propagate,AlwaysEqual>
  >;
  using value_type=typename filter::value_type;
  using hasher=typename filter::hasher;
@@ -170,7 +170,7 @@ void test_construction()
 {
  using filter=realloc_filter<
    rehash_filter<Filter,stateful<typename Filter::hasher>>,
-    stateful_allocator<typename Filter::value_type>
+    stateful_allocator<unsigned char>
  >;
  using value_type=typename filter::value_type;
  using hasher=typename filter::hasher;
@@ -443,65 +443,6 @@ void test_construction()
  }
 }

-struct allocator_only_constructible
-{
-  allocator_only_constructible()=delete;
-  ~allocator_only_constructible()=delete;
-
-  int n;
-};
-
-struct allocator_only_constructible_hash
-{
-  using is_transparent=void;
-
-  std::size_t operator()(const allocator_only_constructible& x)const
-  {
-    return (*this)(x.n);
-  }
-
-  std::size_t operator()(int n)const
-  {
-    return boost::hash<int>{}(n);
-  }
-};
-
-template<typename T>
-struct constructing_allocator
-{
-  using value_type=T;
-
-  constructing_allocator()=default;
-  template<typename U>
-  constructing_allocator(const constructing_allocator<U>&){}
-
-  T* allocate(std::size_t n)
-  {
-    return static_cast<T*>(::operator new(n*sizeof(T)));
-  }
-
-  void deallocate(T* p,std::size_t){::operator delete(p);}
-
-  void construct(allocator_only_constructible* p,int n){p->n=n;}
-  void destroy(allocator_only_constructible* p){}
-
-  bool operator==(const constructing_allocator& x)const{return true;}
-  bool operator!=(const constructing_allocator& x)const{return false;}
-};
-
-void test_allocator_aware_construction()
-{
-  using value_type=allocator_only_constructible;
-  using filter=boost::bloom::filter<
-    value_type,5,boost::bloom::block<unsigned char,1>,0,
-    allocator_only_constructible_hash,constructing_allocator<value_type>
-  >;
-
-  filter f(1000);
-  f.emplace(42);
-  BOOST_TEST(f.may_contain(42));
-}
-
 struct lambda
 {
  template<typename T>
@@ -517,6 +458,5 @@ struct lambda
 int main()
 {
  boost::mp11::mp_for_each<identity_test_types>(lambda{});
-  test_allocator_aware_construction();
  return boost::report_errors();
 }
--- a/test/test_fpr.cpp
+++ b/test/test_fpr.cpp
@@ -55,10 +55,7 @@ void test_fpr()
 {
  using filter=rehash_filter<
    revalue_filter<
-      realloc_filter<
-        Filter,
-        throwing_allocator<typename Filter::value_type>
-      >,
+      realloc_filter<Filter,throwing_allocator<unsigned char>>,
      std::string
    >,
    boost::hash<std::string>
--- a/test/test_insertion.cpp
+++ b/test/test_insertion.cpp
@@ -50,21 +50,6 @@ void test_insertion()
  filter       f(10000);
  ValueFactory fac;

-  {
-    auto x=fac();
-    f.emplace(x,0,"hello",3.1416);
-    BOOST_TEST(f.may_contain(value_type{x,1}));
-  }
-  {
-    auto x=fac();
-    f.emplace(value_type{x,0,"boost"}); /* must avoid value_type move ctor */
-    BOOST_TEST(f.may_contain(value_type{x,1}));
-  }
-  {
-    value_type x{fac(),0,"boost"};
-    f.emplace(x); /* same with copy ctor */
-    BOOST_TEST(f.may_contain(x));
-  }
  {
    value_type x{fac(),0};
    f.insert(const_cast<value_type&>(x));
--- a/test/test_types.hpp
+++ b/test/test_types.hpp
@@ -14,10 +14,10 @@
 #include <boost/bloom/fast_multiblock64.hpp>
 #include <boost/bloom/filter.hpp>
 #include <boost/bloom/multiblock.hpp>
-#include <boost/cstdint.hpp>
 #include <boost/mp11/algorithm.hpp>
 #include <boost/mp11/list.hpp>
 #include <boost/mp11/utility.hpp>
+#include <cstdint>
 #include <string>

 using test_types=boost::mp11::mp_list<
@@ -25,10 +25,16 @@ using test_types=boost::mp11::mp_list<
    int,2
  >,
  boost::bloom::filter<
-   std::string,1,boost::bloom::block<boost::uint16_t,3>,1
+   std::string,1,boost::bloom::block<std::uint16_t,3>,1
  >,
  boost::bloom::filter<
-    std::size_t,1,boost::bloom::multiblock<boost::uint64_t,3>
+   int,1,boost::bloom::block<std::uint32_t[4],4>
+  >,
+  boost::bloom::filter<
+    std::size_t,1,boost::bloom::multiblock<std::uint64_t,3>
+  >,
+  boost::bloom::filter<
+    std::size_t,1,boost::bloom::multiblock<unsigned char[4],3>,1
  >,
  boost::bloom::filter<
    unsigned char,1,boost::bloom::fast_multiblock32<5>,2
--- a/test/test_utilities.hpp
+++ b/test/test_utilities.hpp
@@ -10,7 +10,6 @@
 #define BOOST_BLOOM_TEST_TEST_UTILITIES_HPP

 #include <boost/bloom/filter.hpp>
-#include <boost/core/allocator_traits.hpp>
 #include <limits>
 #include <new>
 #include <string>
@@ -44,7 +43,7 @@ template<
 >
 struct revalue_filter_impl<boost::bloom::filter<T,K,S,B,H,A>,U>
 {
-  using type=boost::bloom::filter<U,K,S,B,H,boost::allocator_rebind_t<A,U>>;
+  using type=boost::bloom::filter<U,K,S,B,H,A>;
 };

 template<typename Filter,typename U>