added documentation

* removed unneeded explicit * fixed boundary results for capacity_for and fpr_for * renamed used_block_size to used_value_size * added reset(n,n) * added initial documentation draft * static asserted assumption on Block size * synced up naming in comment with that of docs * added implementation notes * editorial * expanded tables * removed unneeded explicit * fixed boundary results for capacity_for and fpr_for * renamed used_block_size to used_value_size * added reset(n,n) * added initial documentation draft * static asserted assumption on Block size * synced up naming in comment with that of docs * added implementation notes * editorial * added benchmarks * editorial * added configuration section * editorial * s/multiinsertion/multi-insertion * added section on use cases * editorial
2026-01-19 04:02:11 +00:00 · 2025-04-02 20:42:19 +02:00
parent e326a1f3cb
commit 916c845c41
37 changed files with 3087 additions and 19 deletions
--- a/doc/Jamfile.v2
+++ b/doc/Jamfile.v2
@@ -0,0 +1,23 @@
+# Copyright 2025 Joaquín M López Muñoz.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+#
+# See http://www.boost.org/libs/bloom for library home page.
+
+import asciidoctor ;
+
+html bloom.html : bloom.adoc ;
+
+install html_ : bloom.html : <location>html ;
+
+pdf bloom.pdf : bloom.adoc ;
+explicit bloom.pdf ;
+
+install pdf_ : bloom.pdf : <location>bloom ;
+explicit pdf_ ;
+
+alias boostdoc ;
+explicit boostdoc ;
+alias boostrelease : html_ ;
+explicit boostrelease ;
--- a/doc/bloom.adoc
+++ b/doc/bloom.adoc
@@ -0,0 +1,41 @@
+= Boost.Bloom
+:toc: left
+:toclevels: 3
+:idprefix:
+:docinfo: private-footer
+:source-highlighter: rouge
+:source-language: c++
+:nofooter:
+:sectlinks:
+:leveloffset: +1
+:imagesdir: ../img
+:stem: latexmath
+:small: pass:[<font style="font-size:90%">]
+:small-end: pass:[</font>]
+
++++
+<style>
+.imageblock > .title {
+  text-align: inherit;
+}
+
+.indent {
+  padding-left: 2rem;
+}
+
+.bordered_table th, .bordered_table td {
+  border: 1px solid lightgray;
+}
+</style>
++++
+
+include::bloom/intro.adoc[]
+include::bloom/primer.adoc[]
+include::bloom/tutorial.adoc[]
+include::bloom/configuration.adoc[]
+include::bloom/benchmarks.adoc[]
+include::bloom/reference.adoc[]
+include::bloom/fpr_estimation.adoc[]
+include::bloom/implementation_notes.adoc[]
+include::bloom/release_notes.adoc[]
+include::bloom/copyright.adoc[]
--- a/doc/bloom/benchmarks.adoc
+++ b/doc/bloom/benchmarks.adoc
--- a/doc/bloom/configuration.adoc
+++ b/doc/bloom/configuration.adoc
@@ -0,0 +1,99 @@
+[#configuration]
+= Choosing a Filter Configuration
+
+:idprefix: configuration_
+
+Boost.Bloom offers a plethora of compile-time and run-time configuration options,
+so it may be difficult to make a choice.
+If you're aiming for a given FPR or have a particular capacity in mind and
+you'd like to choose the most appropriate filter type, the following chart
+may come handy.
+
+image::fpr_c.png[align=center, title="FPR vs. _c_ for different filter types."]
+
+The chart plots FPR vs. _c_ (capacity / number of elements inserted) for several
+`boost::bloom::filter`+++s+++ where `K` has been set to its optimum value (minimum FPR)
+as shown in the table below.
+
+++
+<table class="bordered_table" style="text-align: center;">
+    <tr>
+        <th rowspan="2"></th>
+		<th colspan="21"><i>c</i> = capacity / number of elements inserted</th>
+	</tr>
+    <tr>
+        <th>4</th> <th>5</th> <th>6</th> <th>7</th> <th>8</th> <th>9</th> <th>10</th> <th>11</th> <th>12</th> <th>13</th>
+        <th>14</th> <th>15</th> <th>16</th> <th>17</th> <th>18</th> <th>19</th> <th>20</th> <th>21</th> <th>22</th> <th>23</th> <th>24</th>
+    </tr>
+    <tr>
+        <td style="text-align: left;"><code>filter&lt;1,block&lt;uint32_t,K&gt;&gt;</code></td> <td>3</td> <td>3</td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td>
+        <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td>
+    </tr>
+    <tr>
+        <td style="text-align: left;"><code>filter&lt;1,block&lt;uint32_t,K&gt;,1&gt;</code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>6</td>
+        <td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td>
+    </tr>
+    <tr>
+        <td style="text-align: left;"><code>filter&lt;1,block&lt;uint64_t,K&gt;&gt;</code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>6</td>
+        <td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td>
+    </tr>
+    <tr>
+        <td style="text-align: left;"><code>filter&lt;1,block&lt;uint64_t,K&gt;,1&gt;</code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td>
+        <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td> <td>9</td> <td>9</td>
+    </tr>
+    <tr>
+        <td style="text-align: left;"><code>filter&lt;1,multiblock&lt;uint32_t,K&gt;&gt;</code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td>
+        <td>9</td> <td>9</td> <td>9</td> <td>10</td> <td>13</td> <td>13</td> <td>15</td> <td>15</td> <td>15</td> <td>16</td> <td>16</td>
+    </tr>
+    <tr>
+        <td style="text-align: left;"><code>filter&lt;1,multiblock&lt;uint32_t,K&gt;,1&gt;</code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>8</td> <td>8</td>
+        <td>9</td> <td>9</td> <td>10</td> <td>10</td> <td>12</td> <td>12</td> <td>14</td> <td>14</td> <td>14</td> <td>14</td> <td>15</td>
+    </tr>
+    <tr>
+        <td style="text-align: left;"><code>filter&lt;1,multiblock&lt;uint64_t,K&gt;&gt;</code></td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>8</td> <td>8</td>
+        <td>10</td> <td>10</td> <td>12</td> <td>13</td> <td>14</td> <td>15</td> <td>15</td> <td>15</td> <td>15</td> <td>16</td> <td>17</td>
+    </tr>
+    <tr>
+        <td style="text-align: left;"><code>filter&lt;1,multiblock&lt;uint64_t,K&gt;,1&gt;</code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>9</td> <td>10</td>
+        <td>10</td> <td>11</td> <td>11</td> <td>12</td> <td>12</td> <td>13</td> <td>13</td> <td>13</td> <td>15</td> <td>16</td> <td>16</td>
+    </tr>
+    <tr>
+        <td style="text-align: left;"><code>filter&lt;K&gt;</code></td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>8</td> <td>8</td> <td>9</td>
+        <td>10</td> <td>11</td> <td>12</td> <td>13</td> <td>13</td> <td>13</td> <td>14</td> <td>16</td> <td>16</td> <td>16</td> <td>17</td>
+    </tr>
+</table>
+++
+
+Let's see how this can be used by way of an example. Suppose we plan to insert 10M elements
+and want to keep the FPR at 10^-4^. The chart gives us five possibilities:
+
+* `filter<K>` -> _c_ &cong; 19 bits per element
+* `filter<1, multiblock<uint32_t, K>, 1>` -> _c_ &cong; 20 bits per element
+* `filter<1, multiblock<uint64_t, K>>` -> _c_ &cong; 21 bits per element
+* `filter<1, multiblock<uint32_t, K>, 1>` -> _c_ &cong; 21.5 bits per element
+* `filter<1, multiblock<uint32_t, K>>` -> _c_ &cong; 23 bits per element
+
+These options have different tradeoffs in terms of space used and performance. If
+we choose `filter<1, multiblock<uint32_t, K>, 1>` as a compromise (or better yet,
+`filter<1, fast_multiblock32<K>, 1>`), the only remaining step is to consult the
+value of `K` in the table for _c_ = 21 or 22, and we get our final configuration:
+
+[listing,subs="+macros,+quotes"]
+-----
+using my_filter=filter<std::string, 1, fast_multiblock32<**14**>, 1>;
+-----
+
+The resulting filter can be constructed in any of the following ways:
+
+[listing,subs="+macros,+quotes"]
+-----
+// 1) calculate the capacity from the value of c we got from the chart
+my_filter pass:[f((]std::size_t)(10'000'000 * 21.5));
+
+// 2) let the library calculate the capacity from n and target fpr
+// expect some deviation from the capacity in 1)
+my_filter f(10'000'000, 1E-4);
+
+// 3) equivalent to 2)
+my_filter f(my_filter::capacity_for(10'000'000, 1E-4));
+-----
--- a/doc/bloom/copyright.adoc
+++ b/doc/bloom/copyright.adoc
@@ -0,0 +1,10 @@
+[#copyright]
+= Copyright and License
+
+:idprefix: copyright_
+
+Of this documentation:
+
+* Copyright &copy; 2025 Joaqu&iacute;n M L&oacute;pez Mu&ntilde;oz
+
+Distributed under the http://www.boost.org/LICENSE_1_0.txt[Boost Software License, Version 1.0^].
--- a/doc/bloom/fpr_estimation.adoc
+++ b/doc/bloom/fpr_estimation.adoc
@@ -0,0 +1,74 @@
+[#fpr_estimation]
+= Appendix A: FPR Estimation
+
+:idprefix: fpr_estimation_
+
+For a classical Bloom filter, the theoretical false positive rate, under some simplifying assumptions,
+is given by
+
+[.text-center]
+{small}stem:[\text{FPR}(n,m,k)=\left(1 - \left(1 - \displaystyle\frac{1}{m}\right)^{kn}\right)^k \approx \left(1 - e^{-kn/m}\right)^k]{small-end} for large {small}stem:[m]{small-end},
+
+where {small}stem:[n]{small-end} is the number of elements inserted in the filter, {small}stem:[m]{small-end} its capacity in bits and {small}stem:[k]{small-end} the
+number of bits set per insertion (see a https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives[derivation^]
+of this formula). For a given inverse load factor {small}stem:[c=m/n]{small-end}, the optimum {small}stem:[k]{small-end} is
+the integer closest to:
+
+[.text-center]
+{small}stem:[k_{\text{opt}}=c\cdot\ln2,]{small-end}
+
+yielding a minimum attainable FPR of {small}stem:[1/2^{k_{\text{opt}}} \approx 0.6185^{c}]{small-end}.
+
+In the case of filter of the form `boost::bloom::filter<T, K, block<Block, K'>>`, we can extend
+the approach from https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=f376ff09a64b388bfcde2f5353e9ddb44033aac8[Putze et al.^]
+to derive the (approximate but very precise) formula:
+
+[.text-center]
+{small}stem:[\text{FPR}_{\text{block}}(n,m,b,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}(i,nbk/m) \cdot \text{FPR}(i,b,k')\right)^{k},]{small-end}
+
+where
+
+[.text-center]
+{small}stem:[\text{Pois}(i,\lambda)=\displaystyle\frac{\lambda^i e^{-\lambda}}{i!}]{small-end}
+
+is the probability mass function of a https://en.wikipedia.org/wiki/Poisson_distribution[Poisson distribution^]
+with mean {small}stem:[\lambda]{small-end}, and {small}stem:[b]{small-end} is the size of `Block` in bits. If we're using `multiblock<Block,K'>`, we have
+
+[.text-center]
+{small}stem:[\text{FPR}_\text{multiblock}(n,m,b,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}(i,nbkk'/m) \cdot \text{FPR}(i,b,1)^{k'}\right)^{k}.]{small-end}
+
+As we have commented xref:primer_multiblock_filters[before], in general 
+
+[.text-center]
+{small}stem:[\text{FPR}_\text{block}(n,m,b,k,k') \geq \text{FPR}_\text{multiblock}(n,m,b,k,k') \geq \text{FPR}(n,m,kk'),]{small-end}
+
+that is, block and multiblock filters have worse FPR than the classical filter for the same number of bits
+set per insertion, but they will be faster. We have the particular case
+
+[.text-center]
+{small}stem:[\text{FPR}_{\text{block}}(n,m,b,k,1)=\text{FPR}_{\text{multiblock}}(n,m,b,k,1)=\text{FPR}(n,m,k),]{small-end}
+
+which follows simply from the observation that using `{block|multiblock}<Block, 1>` behaves exactly as
+a classical Bloom filter.
+
+We don't know of any closed, simple formula for the FPR of block and multiblock filters when
+`Bucketsize` is not its "natural" size `xref:subfilters_used_value_size[_used-value-size_]<Subfilter>`,
+that is, when subfilter subarrays overlap.
+We can use the following approximations ({small}stem:[s]{small-end} = `BucketSize` in bits):
+
+[.text-center]
+{small}stem:[\text{FPR}_{\text{block}}(n,m,b,s,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}\left(i,\frac{n(2b-s)k}{m}\right) \cdot \text{FPR}(i,2b-s,k')\right)^{k},]{small-end} +
+{small}stem:[\text{FPR}_\text{multiblock}(n,m,b,s,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}\left(i,\frac{n(2bk'-s)k}{m}\right) \cdot \text{FPR}\left(i,\frac{2bk'-s}{k'},1\right)^{k'}\right)^{k},]{small-end}
+
+where the replacement of {small}stem:[b]{small-end} with {small}stem:[2b-s]{small-end} 
+(or {small}stem:[bk']{small-end} with {small}stem:[2bk'-s]{small-end} for multiblock filters) accounts
+for the fact that the window of hashing positions affecting a particular bit spreads due to
+overlapping. Note that the formulas reduce to the non-ovelapping case when {small}stem:[s]{small-end} takes its
+default value (stem:[b] for block, stem:[bk'] for multiblock). These approximations are acceptable for
+low values of {small}stem:[k']{small-end} but tend to underestimate the actual FPR as {small}stem:[k']{small-end} grows.
+In general, the use of overlapping improves (decreases) FPR by a factor ranging from
+0.6 to 0.9 for typical filter configurations.
+
+{small}stem:[\text{FPR}_{\text{block}}(n,m,b,s,k,k')]{small-end} and {small}stem:[\text{FPR}_\text{multiblock}(n,m,b,s,k,k')]{small-end}
+are the formulas used by the implementation of
+`xref:filter_fpr_estimation[boost::filter::fpr_for]`.
--- a/doc/bloom/implementation_notes.adoc
+++ b/doc/bloom/implementation_notes.adoc
@@ -0,0 +1,130 @@
+[#implementation_notes]
+= Appendix B: Implementation Notes
+
+:idprefix: implementation_notes_
+
+== Hash Mixing
+
+This is the bit-mixing post-process we use to improve the statistical properties
+of the hash function when it doesn't have the avalanching property:
+
+[.text-center]
+{small}stem:[m\leftarrow\text{mulx}(h,C)]{small-end}, +
+{small}stem:[h'\leftarrow\text{high}(m)\text{ xor }\text{low}(m)]{small-end},
+
+where {small}stem:[\text{mulx}]{small-end} denotes 128-bit multiplication of two 64-bit factors,
+{small}stem:[\text{high}(m)]{small-end} and {small}stem:[\text{low}(m)]{small-end}
+are the high and low 64-bit words of {small}stem:[m]{small-end}, respectively,
+{small}stem:[C=\lfloor 2^{64}/\varphi \rfloor]{small-end} and
+{small}stem:[\varphi]{small-end} is the https://en.wikipedia.org/wiki/Golden_ratio[golden ratio^].
+
+== 32-bit mode
+
+Internally, we always use 64-bit hash values even if in 32-bit mode, where
+the user-provided hash function produces 32-bit outputs. To expand
+a 32-bit hash value to 64 bits, we use the same mixing procedure
+described
+xref:implementation_notes_hash_mixing[above].
+
+== Dispensing with Multiple Hash Functions
+
+Direct implementations of a Bloom filter with {small}stem:[k]{small-end}
+bits per operation require {small}stem:[k]{small-end} different and independent
+hash functions {small}stem:[h_i(x)]{small-end}, which incurs an important
+performance penalty, particularly if the objects are expensive to hash
+(e.g. strings). https://www.eecs.harvard.edu/~michaelm/postscripts/rsa2008.pdf[Kirsch and Mitzenmacher^]
+show how to relax this requirement down to two different hash functions
+{small}stem:[h_1(x)]{small-end} and {small}stem:[h_2(x)]{small-end} linearly
+combined as
+
+[.text-center]
+{small}stem:[g_i(x)=h_1(x)+ih_2(x).]{small-end}
+
+Without formal justification, we have relaxed this even further to just one
+initial hash value {small}stem:[h_0=h_0(x)]{small-end}, where new values
+{small}stem:[h_i]{small-end} are computed from  {small}stem:[h_{i-1}]{small-end}
+by means of very cheap mixing schemes. In what follows
+{small}stem:[k]{small-end}, {small}stem:[k']{small-end} are the homonym values
+in a filter of the form `boost::bloom::filter<T, K, {block|multiblock}<Block, K'>>`,
+{small}stem:[b]{small-end} is `sizeof(Block) * CHAR_BIT`,
+and {small}stem:[r]{small-end} is the number of buckets in the filter.
+
+=== Bucket Location
+
+To produce a location (i.e. a number {small}stem:[p]{small-end} in {small}stem:[[0,r)]{small-end}) from
+{small}stem:[h_{i-1}]{small-end}, instead of the straightforward but costly
+procedure {small}stem:[p\leftarrow h_{i-1}\bmod r]{small-end} we resort to
+Lemire's https://arxiv.org/pdf/1805.10941[fastrange technique^]. Moreover,
+we combine this calculation with the production of {small}stem:[h_{i}]{small-end}
+from {small}stem:[h_{i-1}]{small-end} as follows:
+
+[.text-center]
+{small}stem:[m\leftarrow\text{mulx}(h_{i-1},r),]{small-end} +
+{small}stem:[p\leftarrow\lfloor m/2^{64} \rfloor=\text{high}(m),]{small-end} +
+{small}stem:[h_i\leftarrow m \bmod 2^{64}=\text{low}(m).]{small-end}
+
+The transformation {small}stem:[h_{i-1} \rightarrow h_i]{small-end} is
+a simple https://en.wikipedia.org/wiki/Linear_congruential_generator[multiplicative congruential generator^]
+over {small}stem:[2^{64}]{small-end}. For this MCG to produce long
+cycles, {small}stem:[h_0]{small-end} must be odd and the multiplicative constant
+{small}stem:[r]{small-end} must be {small}stem:[\equiv \pm 3 \text{ (mod 8)}]{small-end}:
+to meet these requirements, the implementation adjusts {small}stem:[h_0]{small-end}
+to {small}stem:[h_0']{small-end} and {small}stem:[r]{small-end}
+to {small}stem:[r']{small-end}. This renders the least significant bit
+of {small}stem:[h_i]{small-end} unsuitable for pseudorandomization
+(it is always one).
+
+=== Bit selection
+
+Inside a subfilter, we must produce {small}stem:[k']{small-end}
+values from {small}stem:[h_i]{small-end} in the range
+{small}stem:[[0,b)]{small-end} (the positions of the {small}stem:[k']{small-end}
+bits). We do this by successively taking {small}stem:[\log_2b]{small-end} bits
+from {small}stem:[h_i]{small-end} without utilizing the portion containing
+its least significant bit (which is always one as we have discussed).
+If we run out of bits (which happens when
+{small}stem:[k'> 63/\log_2b]{small-end}), we produce a new hash value
+{small}stem:[h_{i+1}]{small-end} from {small}stem:[h_{i}]{small-end}
+using the mixing procedure
+xref:implementation_notes_hash_mixing[already described].
+
+== SIMD algorithms
+
+=== `fast_multiblock32`
+
+When using AVX2, we select up to 8 bits at a time by creating
+a `+++__+++m256i` of 32-bit values {small}stem:[(x_0,x_1,...,x_7)]{small-end}
+where each {small}stem:[x_i]{small-end} is constructed from
+a different 5-bit portion of the hash value, and calculating from this
+the `+++__+++m256i` {small}stem:[(2^{x_0},2^{x_1},...,2^{x_7})]{small-end}
+with https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-10/mm256-sllv-epi32-64.html[`+++_+++mm256_sllv_epi32`^].
+If more bits are needed, we generate a new hash value as
+xref:implementation_notes_hash_mixing[described before] and repeat.
+
+For little-endian Neon, the algorithm is similar but the computations
+are carried out with two `uint32x4_t`+++s+++ in parallel as Neon does not have
+256-bit registers.
+
+In the case of SSE2, we don't have the 128-bit equivalent of
+`+++_+++mm256_sllv_epi32`, so we use the following, mildly interesting
+technique: a `+++__+++m128i` of the form
+
+[.text-center]
+{small}stem:[((x_0+127)\cdot 2^{23},(x_1+127)\cdot 2^{23},(x_2+127)\cdot 2^{23},(x_3+127)\cdot 2^{23}),]{small-end}
+
+where each {small}stem:[x_i]{small-end} is in {small}stem:[[0,32)]{small-end},
+can be `reinterpret_cast`+++ed+++ to (i.e., has the same binary representation as)
+the `+++__+++m128` (register of `float`+++s+++)
+
+[.text-center]
+{small}stem:[(2^{x_0},2^{x_1},2^{x_2},2^{x_3}),]{small-end}
+
+from which our desired `+++__+++m128i` of shifted 1s can be obtained
+with https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-10/conversion-intrinsics-003.html#GUID-B1CFE576-21E9-4E70-BE5E-B9B18D598C12[`+++_+++mm_cvttps_epi32`^].
+
+=== `fast_multiblock64`
+
+We only provide a SIMD implementation for AVX2 that relies in two
+parallel `+++__+++m256i`+++s+++ for the generation of up
+to 8 64-bit values with shifted 1s. For Neon and SSE2, emulation
+through 4 128-bit registers proved slower than non-SIMD `multiblock<uint64_t, K>`.
--- a/doc/bloom/intro.adoc
+++ b/doc/bloom/intro.adoc
@@ -0,0 +1,49 @@
+[#intro]
+= Introduction
+
+:idprefix: intro_
+
+Boost.Bloom provides the class template `xref:tutorial[boost::bloom::filter]`
+that can be configured to implement a classical Bloom filter as well as
+variations discussed in the literature such as block filters, multiblock filters,
+and more.
+
+[listing,subs="+macros,+quotes"]
+-----
+#include <boost/bloom/filter.hpp>
+#include <cassert>
+#include <string>
+
+int main()
+{
+  // Bloom filter of strings with 5 bits set per insertion
+  using filter = boost::bloom::filter<std::string, 5>;
+
+  // create filter with a capacity of 1'000'000 **bits**
+  filter f(1'000'000);
+
+  // insert elements (they can't be erased, Bloom filters are insert-only)
+  f.insert("hello");
+  f.insert("Boost");
+  //...
+
+  // elements inserted are always correctly checked as such
+  assert(f.may_contain("hello") == true);
+
+  // elements not inserted may incorrectly be identified as such with a
+  // false positive rate (FPR) which is a function of the array capacity,
+  // the number of bits set per element and generally how the boost::bloom::filter
+  // was specified
+  if(f.may_contain("bye")) { // likely false
+    //...
+  }
+}
+-----
+
+The different filter variations supported are specified at compile time
+as part of the `boost::bloom::filter` instantiation definition.
+Boost.Bloom has been implemented with a focus on performance;
+SIMD technologies such as AVX2, Neon and SSE2 can be leveraged to speed up
+operations.
+
+Boost.Bloom is a header-only library. C++11 or later required.
--- a/doc/bloom/primer.adoc
+++ b/doc/bloom/primer.adoc
@@ -0,0 +1,118 @@
+[#primer]
+= Bloom Filter Primer
+
+:idprefix: primer_
+
+A Bloom filter is a probabilistic data structure where inserted elements can be looked up
+with 100% accuracy, whereas looking up for a non-inserted element may fail with
+some probability called the filter's _false positive rate_ or FPR. The tradeoff here is
+that Bloom filters occupy much less space than traditional non-probabilistic containers
+(typically, around 8-20 bits per element) for an acceptably low FPR. The greater
+the filter's _capacity_ (its size in bits), the lower the resulting FPR.
+
+One prime application of Bloom filters and similar data structures is for the prevention
+of expensive disk/network accesses when these would fail to retrieve a given piece of
+information.
+For instance, suppose we are developing a frontend for a database with access time
+10 ms and we know 50% of the requests will not succeed (the record does not exist).
+Inserting a Bloom filter with a lookup time of 200 ns and a FPR of 0.5% will reduce the
+average response time of the system from 10 ms to
+
+[.text-center]
+(10 + 0.0002) &times; 50.25% + 0.0002 &times; 49.75% &cong; 5.03 ms, 
+
+that is, we get a &times;1.99 overall speedup. If the database holds 1 billion records,
+an in-memory filter with say 8 bits per element will occupy 0.93 GB,
+which is perfectly realizable.
+
+image::db_speedup.png[align=center, title="Improving DB negative access time with a Bloom filter."]
+
+In general, Bloom filters are useful to prevent/mitigate queries against large data sets
+when exact retrieval is costly and/or can't be made in main memory.
+Applications have been described in the areas of web caching,
+dictionary compression, network routing and genomics, among others.
+https://www.eecs.harvard.edu/~michaelm/postscripts/im2005b.pdf[Broder and Mitzenmacher^]
+provide a rather extensive review of use cases with a focus on networking.
+
+== Implementation
+
+The implementation of a Bloom filter consists of an array of _m_ bits, initially set to zero.
+Inserting an element _x_ reduces to selecting _k_ positions pseudorandomly (with the help
+of _k_ independent hash functions) and setting them to one.
+
+image::bloom_insertion.png[align=center, title="Insertion in a classical Bloom filter, _k_ = 6."]
+
+To check if an element _y_ is in the filter, we follow the same procedure and see if
+the selected bits are all set to one. In the example figure there are two unset bits, which
+definitely indicates _y_ was not inserted in the filter.
+
+image::bloom_lookup.png[align=center, title="Lookup in a classical Bloom filter."]
+
+A false positive occurs when the bits checked happen to be all set to one due to
+other, unrelated insertions. The probability of having a false positive increases as we
+add more elements to the filter, whereas for a given number _n_ of inserted elements, a filter
+with greater capacity (larger bit array) will have a lower FPR.
+The number _k_ of bits set per operation also affects the FPR, albeit in a more complicated way:
+when the array is sparsely populated, a higher value of _k_ improves (decreases) the FPR,
+as there are more chances that we hit a non-set bit; however, if _k_ is very high
+the array will have more and more bits set to one as new elements are inserted, which
+eventually will reach a point where we lose out to a filter with a lower _k_ and
+thus a smaller proportions of set bits.
+
+image::fpr_n_k.png[align=center, title="FPR vs. number of inserted elements for two filters with _m_ = 10^5^ bits."]
+
+For given values of _n_ and _m_, the optimum _k_ is the integer closest to
+
+[.text-center]
+{small}stem:[k_{\text{opt}}=\displaystyle\frac{m\cdot\ln2}{n}]{small-end}
+
+for a minimum FPR of
+{small}stem:[1/2^{k_{\text{opt}}} \approx 0.6185^{m/n}]{small-end}. See the appendix
+on xref:fpr_estimation[FPR estimation] for mode details.
+
+== Variations on the Classical Filter
+
+=== Block Filters
+
+An operation on a Bloom filter involves accessing _k_ different positions in memory,
+which, for large arrays, results in _k_ CPU cache misses and affects the
+operation's performance. A variation on the classical approach called a
+_block filter_ seeks to minimize cache misses by concentrating all bit
+setting/checking in a small block of _b_ bits pseudorandomly selected from the
+entire array. If the block is small enough, it will fit in a CPU cacheline,
+thus drastically reducing the number of cache misses.
+
+image::block_insertion.png[align=center, title="Block filter."]
+
+The downside is that the resulting FPR is worse than that of a classical filter for
+the same values of _n_, _m_ and _k_. Intuitively, block filters reduce the
+uniformity of the distribution of bits in the array, which ultimately hurts their
+probabilistic performance.
+
+image::fpr_n_k_bk.png[align=center, title="FPR (logarithmic scale) vs. number of inserted elements for a classical and a block filter, _m_ = 10^5^ bits."]
+
+A further variation in this idea is to have operations select _k_ blocks
+with _k'_ bits set on each. This, again, will have a worse FPR than a classical
+filter with _k&middot;k'_ bits per operation, but improves on a plain
+_k&middot;k'_ block filter.
+
+image::block_multi_insertion.png[align=center, title="Block filter with multi-insertion."]
+
+=== Multiblock Filters
+
+_Multiblock filters_ take block filters' approach further by having
+bit setting/checking done on a sequence of consecutive blocks of size _b_,
+so that each block takes exactly one bit. This still maintains a good cache
+locality but improves FPR with respect to block filters because bits set to one
+are more spread out across the array.
+
+image::multiblock_insertion.png[align=center, title="Multiblock filter."]
+
+Multiblock filters can also be combined with multi-insertion. In general,
+for the same number of bits per operation and equal values of _n_ and _m_,
+a classical Bloom filter will have the better (lower) FPR, followed by
+multiblock filters and then block filters. Execution speed will roughly go
+in the reverse order. When considering block/multiblock filters with
+multi-insertion, the number of available configurations grows quickly and
+you will need to do some experimenting to locate your preferred point in the
+(FPR, capacity, speed) tradeoff space.
--- a/doc/bloom/reference.adoc
+++ b/doc/bloom/reference.adoc
@@ -0,0 +1,14 @@
+[#reference]
+= Reference
+
+include::reference/header_filter.adoc[]
+include::reference/filter.adoc[]
+include::reference/subfilters.adoc[]
+include::reference/header_block.adoc[]
+include::reference/block.adoc[]
+include::reference/header_multiblock.adoc[]
+include::reference/multiblock.adoc[]
+include::reference/header_fast_multiblock32.adoc[]
+include::reference/fast_multiblock32.adoc[]
+include::reference/header_fast_multiblock64.adoc[]
+include::reference/fast_multiblock64.adoc[]
--- a/doc/bloom/reference/block.adoc
+++ b/doc/bloom/reference/block.adoc
@@ -0,0 +1,42 @@
+[#block]
+== Class Template `block`
+
+:idprefix: block_
+
+`boost::bloom::block` -- A xref:subfilter[subfilter] over an integral type.
+
+=== Synopsis
+
+[listing,subs="+macros,+quotes"]
+-----
+// #include <boost/bloom/block.hpp>
+
+namespace boost{
+namespace bloom{
+
+template<typename Block, std::size_t K>
+struct block
+{
+  static constexpr std::size_t k = K;
+  using value_type               = Block;
+
+  // the rest of the interface is not public
+
+} // namespace bloom
+} // namespace boost
+-----
+
+=== Description
+
+*Template Parameters*
+
+[cols="1,4"]
+|===
+
+|`Block`
+|An unsigned integral type.
+
+|`K`
+| Number of bits set/checked per operation. Must be greater than zero.
+
+|===
--- a/doc/bloom/reference/fast_multiblock32.adoc
+++ b/doc/bloom/reference/fast_multiblock32.adoc
@@ -0,0 +1,52 @@
+[#fast_multiblock32]
+== Class Template `fast_multiblock32`
+
+:idprefix: fast_multiblock32_
+
+`boost::bloom::fast_multiblock32` -- A faster replacement of
+`xref:multiblock[multiblock]<std::uint32_t, K>`.
+
+=== Synopsis
+
+[listing,subs="+macros,+quotes"]
+-----
+// #include <boost/bloom/fast_multiblock32.hpp>
+
+namespace boost{
+namespace bloom{
+
+template<std::size_t K>
+struct fast_multiblock32
+{
+  static constexpr std::size_t k               = K;
+  using value_type                             = _implementation-defined_;
+
+  // might not be present
+  static constexpr std::size_t used_value_size = _implementation-defined_;
+
+  // the rest of the interface is not public
+
+} // namespace bloom
+} // namespace boost
+-----
+
+=== Description
+
+*Template Parameters*
+
+[cols="1,4"]
+|===
+
+|`K`
+| Number of bits set/checked per operation. Must be greater than zero.
+
+|===
+
+`fast_multiblock32<K>` is statistically equivalent to
+`xref:multiblock[multiblock]<std::uint32_t, K>`, but takes advantage
+of selected SIMD technologies, when available at compile time, to perform faster.
+Currently supported: AVX2, little-endian Neon, SSE2.
+The non-SIMD case falls back to regular `multiblock`.
+
+`xref:subfilters_used_value_size[_used-value-size_]<fast_multiblock32<K>>` is
+`4 * K`.
--- a/doc/bloom/reference/fast_multiblock64.adoc
+++ b/doc/bloom/reference/fast_multiblock64.adoc
@@ -0,0 +1,52 @@
+[#fast_multiblock64]
+== Class Template `fast_multiblock64`
+
+:idprefix: fast_multiblock64_
+
+`boost::bloom::fast_multiblock64` -- A faster replacement of
+`xref:multiblock[multiblock]<std::uint64_t, K>`.
+
+=== Synopsis
+
+[listing,subs="+macros,+quotes"]
+-----
+// #include <boost/bloom/fast_multiblock64.hpp>
+
+namespace boost{
+namespace bloom{
+
+template<std::size_t K>
+struct fast_multiblock64
+{
+  static constexpr std::size_t k               = K;
+  using value_type                             = _implementation-defined_;
+
+  // might not be present
+  static constexpr std::size_t used_value_size = _implementation-defined_;
+
+  // the rest of the interface is not public
+
+} // namespace bloom
+} // namespace boost
+-----
+
+=== Description
+
+*Template Parameters*
+
+[cols="1,4"]
+|===
+
+|`K`
+| Number of bits set/checked per operation. Must be greater than zero.
+
+|===
+
+`fast_multiblock64<K>` is statistically equivalent to
+`xref:multiblock[multiblock]<std::uint64_t, K>`, but takes advantage
+of selected SIMD technologies, when available at compile time, to perform faster.
+Currently supported: AVX2.
+The non-SIMD case falls back to regular `multiblock`.
+
+`xref:subfilters_used_value_size[_used-value-size_]<fast_multiblock64<K>>` is
+`8 * K`.
--- a/doc/bloom/reference/filter.adoc
+++ b/doc/bloom/reference/filter.adoc
@@ -0,0 +1,711 @@
+[#filter]
+== Class Template `filter`
+
+:idprefix: filter_
+
+`boost::bloom::filter` -- A data structure that supports element insertion
+and _probabilistic_ lookup, where an element can be determined to be in the filter
+with high confidence or else not be with absolute certainty. The probability
+that lookup erroneously classifies a non-present element as present is called
+the filter's _false positive rate_ (FPR).
+
+`boost::bloom::filter` maintains an internal array of `m` bits where `m` is the
+filter's _capacity_.  Unlike traditional containers, inserting an
+element `x` does not store a copy of `x` within the filter, but rather results
+in a fixed number of bits in the array being set to one, where the positions
+of the bits are pseudorandomly produced from the hash value of `x`. Lookup
+for `y` simply checks whether all the bits associated to `y` are actually set.
+
+* For a given filter,  the FPR increases as new elements are inserted.
+* For a given number of inserted elements, a filter with higher capacity
+has a lower FPR.
+
+By convention, we say that a filter is _empty_ if its capacity is zero or
+all the bits in the internal array are set to zero.
+
+=== Synopsis
+
+[listing,subs="+macros,+quotes"]
+-----
+// #include <boost/bloom/filter.hpp>
+
+namespace boost{
+namespace bloom{
+
+template<
+  typename T, std::size_t K,
+  typename Subfilter = block<unsigned char, 1>, std::size_t BucketSize = 0,
+  typename Hash = boost::hash<T>, typename Allocator = std::allocator<T>
+>
+class filter
+{
+public:
+  // types and constants
+  using value_type                         = T;
+  static constexpr std::size_t k           = K;
+  using subfilter                          = Subfilter;
+  static constexpr std::size_t xref:filter_bucket_size[bucket_size] = xref:filter_bucket_size[__see below__];
+  using hasher                             = Hash;
+  using allocator_type                     = Allocator;
+  using size_type                          = std::size_t;
+  using difference_type                    = std::ptrdiff_t;
+  using reference                          = value_type&;
+  using const_reference                    = const value_type&;
+  using pointer                            = value_type*;
+  using const_pointer                      = const value_type*;
+
+  // construct/copy/destroy
+  xref:#filter_default_constructor[filter]();
+  explicit xref:#filter_capacity_constructor[filter](
+    size_type m, const hasher& h = hasher(),
+    const allocator_type& al = allocator_type());
+  xref:#filter_capacity_constructor[filter](
+    size_type n, double fpr, const hasher& h = hasher(),
+    const allocator_type& al = allocator_type());
+  template<typename InputIterator>
+    xref:#filter_iterator_range_constructor[filter](
+      InputIterator first, InputIterator last,
+      size_type m, const hasher& h = hasher(),
+      const allocator_type& al = allocator_type());
+  template<typename InputIterator>
+    xref:#filter_iterator_range_constructor[filter](
+      InputIterator first, InputIterator last,
+      size_type n, double fpr, const hasher& h = hasher(),
+      const allocator_type& al = allocator_type());
+  xref:#filter_copy_constructor[filter](const filter& x);
+  xref:#filter_move_constructor[filter](filter&& x);
+  template<typename InputIterator>
+    xref:#filter_iterator_range_constructor_with_allocator[filter](
+      InputIterator first, InputIterator last,
+      size_type m, const allocator_type& al);
+  template<typename InputIterator>
+    xref:#filter_iterator_range_constructor_with_allocator[filter](
+      InputIterator first, InputIterator last,
+      size_type n, double fpr, const allocator_type& al);
+  explicit xref:#filter_allocator_constructor[filter](const allocator_type& al);
+  xref:#filter_copy_constructor_with_allocator[filter](const filter& x, const allocator_type& al);
+  xref:#filter_move_constructor_with_allocator[filter](filter&& x, const allocator_type& al);
+  xref:#filter_initializer_list_constructor[filter](
+    std::initializer_list<value_type> il,
+    size_type m, const hasher& h = hasher(),
+    const allocator_type& al = allocator_type());
+  xref:#filter_initializer_list_constructor[filter](
+    std::initializer_list<value_type> il,
+    size_type n, double fpr, const hasher& h = hasher(),
+    const allocator_type& al = allocator_type());
+  xref:#filter_capacity_constructor_with_allocator[filter](size_type m, const allocator_type& al);
+  xref:#filter_capacity_constructor_with_allocator[filter](size_type n, double fpr, const allocator_type& al);
+  xref:#filter_initializer_list_constructor_with_allocator[filter](
+    std::initializer_list<value_type> il,
+    size_type m, const allocator_type& al);
+  xref:#filter_initializer_list_constructor_with_allocator[filter](
+    std::initializer_list<value_type> il,
+    size_type n, double fpr, const allocator_type& al);
+  xref:#filter_destructor[~filter]();
+  filter& xref:#filter_copy_assignment[operator+++=+++](const filter& x);
+  filter& xref:#filter_move_assignment[operator+++=+++](filter&& x)
+    noexcept(
+	  std::allocator_traits<Allocator>::is_always_equal::value ||
+      std::allocator_traits<Allocator>::propagate_on_container_move_assignment::value);
+  filter& xref:#filter_initializer_list_assignment[operator+++=+++](std::initializer_list<value_type> il);
+  allocator_type xref:#filter_get_allocator[get_allocator]() const noexcept;
+
+  // capacity
+  size_type xref:#filter_capacity_2[capacity]() const noexcept;
+  static size_type xref:#filter_capacity_estimation[capacity_for](size_type n, double fpr);
+  static double xref:#filter_fpr_estimation[fpr_for](size_type n,size_type m)
+
+  // modifiers
+  template<typename... Args>
+    void xref:#filter_emplace[emplace](Args&&... args);
+  void xref:#filter_insert[insert](const value_type& x);
+  template<typename U>
+    void xref:#filter_insert[insert](const U& x);
+  template<typename InputIterator>
+    void xref:#filter_insert_iterator_range[insert](InputIterator first, InputIterator last);
+  void xref:#filter_insert_initializer_list[insert](std::initializer_list<value_type> il);
+
+  void xref:#filter_swap[swap](filter& x)
+    noexcept(std::allocator_traits<Allocator>::is_always_equal::value ||
+             std::allocator_traits<Allocator>::propagate_on_container_swap::value);
+  void xref:#filter_clear[clear]() noexcept;
+  void xref:#filter_reset[reset](size_type m = 0);
+  void xref:#filter_reset[reset](size_type n, double fpr);
+
+  filter& xref:#filter_combine_with_and[operator&=](const filter& x);
+  filter& xref:#filter_combine_with_or[operator|=](const filter& x);
+
+  // observers
+  hasher xref:#filter_hash_function[hash_function]() const;
+
+  // lookup
+  bool xref:#filter_may_contain[may_contain](const value_type& x) const;
+  template<typename U>
+    bool xref:#filter_may_contain[may_contain](const U& x) const;
+};
+
+} // namespace bloom
+} // namespace boost
+-----
+
+=== Description
+
+*Template Parameters*
+
+[cols="1,4"]
+|===
+
+|`T`
+|The cv-unqualified object type of the elements inserted into the filter.
+
+|`K`
+| Number of times the associated subfilter is invoked per element upon insertion or lookup.
+`K` must be greater than zero.
+
+|`Subfilter`
+| A xref:subfilter[subfilter] type providing the exact algorithm for
+bit setting/checking into the filter's internal array. The subfilter is invoked `K` times
+per operation on `K` pseudorandomly selected portions of the array (_subarrays_) of width
+`xref:subfilters_used_value_size[_used-value-size_]<Subfilter>`.
+
+|`BucketSize`
+| Distance in bytes between the initial positions of consecutive subarrays.
+If `BucketSize` is specified as zero, the actual distance is automatically selected to
+`_used-value-size_<Subfilter>` (non-overlapping subarrays).
+Otherwise, `BucketSize` must be not greater than `_used-value-size_<Subfilter>`.
+
+|`Hash`
+|A https://en.cppreference.com/w/cpp/named_req/Hash[Hash^] type over `T`.
+
+|`Allocator`
+|An https://en.cppreference.com/w/cpp/named_req/Allocator[Allocator^] whose value type is `T`.
+
+|===
+
+Allocation and deallocation of the internal array is done through an internal copy of the
+provided allocator. `value_type` construction/destruction (which only happens in
+`xref:filter_emplace[emplace]`) uses
+`std::allocator_traits<Allocator>::construct`/`destroy`.
+
+If `link:../../../unordered/doc/html/unordered/reference/hash_traits.html#hash_traits_hash_is_avalanching[boost::unordered::hash_is_avalanching]<Hash>::value`
+is `true` and `sizeof(std::size_t) >= 8`, 
+the hash function is used as-is; otherwise, a bit-mixing post-processing stage
+is added to increase the quality of hashing at the expense of extra computational cost.
+
+=== Types and Constants
+
+[[filter_bucket_size]]
+[listing,subs="+macros,+quotes"]
+----
+static constexpr std::size_t bucket_size;
+----
+
+Equal to `BucketSize` if that parameter was specified as distinct from zero.
+Otherwise, equal to `xref:subfilters_used_value_size[_used-value-size_]<subfilter>`.
+
+=== Constructors
+
+==== Default Constructor
+[listing,subs="+macros,+quotes"]
+----
+filter();
+----
+
+Constructs an empty filter using `hasher()` as the hash function and
+`allocator_type()` as the allocator.
+
+[horizontal]
+Preconditions:;; `hasher`, and `allocator_type` must be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+Postconditions:;; `capacity() == 0`.
+
+==== Capacity Constructor
+[listing,subs="+macros,+quotes"]
+----
+explicit filter(
+  size_type m, const hasher& h = hasher(),
+  const allocator_type& al = allocator_type());
+filter(
+  size_type n, double fpr, const hasher& h = hasher(),
+  const allocator_type& al = allocator_type());
+----
+
+Constructs an empty filter using copies of `h` and `al` as the hash function and allocator, respectively.
+
+[horizontal]
+Postconditions:;; `capacity() == 0` if `m == 0`, `capacity() >= m` otherwise (first overload). +
+`capacity() == capacity_for(n, fpr)` (second overload).
+
+==== Iterator Range Constructor
+[listing,subs="+macros,+quotes"]
+----
+template<typename InputIterator>
+  filter(
+    InputIterator first, InputIterator last,
+    size_type m, const hasher& h = hasher(),
+    const allocator_type& al = allocator_type());
+template<typename InputIterator>
+  filter(
+    InputIterator first, InputIterator last,
+    size_type n, double fpr, const hasher& h = hasher(),
+    const allocator_type& al = allocator_type());
+----
+
+Constructs a filter using copies of `h` and `al` as the hash function and allocator, respectively,
+and inserts the values from `[first, last)` into it.
+
+[horizontal]
+Preconditions:;; `InputIterator` is a https://en.cppreference.com/w/cpp/named_req/InputIterator[LegacyInputIterator^] referring to `value_type`. +
+`[first, last)` is a valid range.
+Postconditions:;; `capacity() == 0` if `m == 0`, `capacity() >= m` otherwise (first overload). +
+`capacity() == capacity_for(n, fpr)` (second overload). +
+`may_contain(x)` for all values `x` from `[first, last)`.
+
+==== Copy Constructor
+[listing,subs="+macros,+quotes"]
+----
+filter(const filter& x);
+----
+
+Constructs a filter using copies of `x`++'++s internal array, `x.hash_function()`
+and `std::allocator_traits<Allocator>::select_on_container_copy_construction(x.get_allocator())`.
+
+[horizontal]
+Postconditions:;; `*this == x`.
+
+==== Move Constructor
+
+[listing,subs="+macros,+quotes"]
+----
+filter(filter&& x);
+----
+
+Constructs a filter tranferring `x`++'++s internal array to `*this` and using
+a hash function and allocator move-constructed from `x`++'++s hash function
+and allocator, respectively.
+
+[horizontal]
+Postconditions:;; `x.capacity() == 0`.
+
+==== Iterator Range Constructor with Allocator
+
+[listing,subs="+macros,+quotes"]
+----
+template<typename InputIterator>
+  filter(
+    InputIterator first, InputIterator last,
+    size_type m, const allocator_type& al);
+template<typename InputIterator>
+  filter(
+    InputIterator first, InputIterator last,
+    size_type n, double fpr, const allocator_type& al);
+----
+
+Equivalent to `xref:#filter_iterator_range_constructor[filter](first, last, m, hasher(), al)` (first overload)
+or `xref:#filter_iterator_range_constructor[filter](first, last, n, fpr, hasher(), al)` (second overload).
+
+==== Allocator Constructor
+
+[listing,subs="+macros,+quotes"]
+----
+explicit filter(const allocator_type& al);
+----
+
+Constructs an empty filter using `hasher()` as the hash function and
+a copy of `al` as the allocator.
+
+[horizontal]
+Preconditions:;; `hasher` must be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+Postconditions:;; `capacity() == 0`.
+
+==== Copy Constructor with Allocator
+
+[listing,subs="+macros,+quotes"]
+----
+filter(const filter& x, const allocator_type& al);
+----
+
+Constructs a filter using copies of `x`++'++s internal array, `x.hash_function()`
+and `al`.
+
+[horizontal]
+Postconditions:;; `*this == x`.
+
+==== Move Constructor with Allocator
+
+[listing,subs="+macros,+quotes"]
+----
+filter(filter&& x, const allocator_type& al);
+----
+
+Constructs a filter tranferring `x`++'++s internal array to `*this` if
+`al == x.get_allocator()`, or using a copy of the array otherwise.
+The hash function of the new filter is move-constructed from `x`++'++s
+hash function and the allocator is a copy of `al`.
+
+[horizontal]
+Postconditions:;; `x.capacity() == 0`.
+
+==== Initializer List Constructor
+
+[listing,subs="+macros,+quotes"]
+----
+filter(
+  std::initializer_list<value_type> il,
+  size_type m, const hasher& h = hasher(),
+  const allocator_type& al = allocator_type());
+filter(
+  std::initializer_list<value_type> il,
+  size_type n, double fpr, const hasher& h = hasher(),
+  const allocator_type& al = allocator_type());
+----
+
+Equivalent to `xref:#filter_iterator_range_constructor[filter](il.begin(), il.end(), m, h, al)` (first overload)
+or `xref:#filter_iterator_range_constructor[filter](il.begin(), il.end(), n, fpr, h, al)` (second overload).
+
+
+==== Capacity Constructor with Allocator
+
+[listing,subs="+macros,+quotes"]
+----
+filter(size_type m, const allocator_type& al);
+filter(size_type n, double fpr, const allocator_type& al);
+----
+
+Equivalent to `xref:#filter_capacity_constructor[filter](m, hasher(), al)` (first overload)
+or `xref:#filter_capacity_constructor[filter](n, fpr, hasher(), al)` (second overload).
+
+
+==== Initializer List Constructor with Allocator
+
+[listing,subs="+macros,+quotes"]
+----
+filter(
+  std::initializer_list<value_type> il,
+  size_type m, const allocator_type& al);
+filter(
+  std::initializer_list<value_type> il,
+  size_type n, double fpr, const allocator_type& al);
+----
+
+Equivalent to `xref:#filter_initializer_list_constructor[filter](il, m, hasher(), al)` (first overload)
+or `xref:#filter_initializer_list_constructor[filter](il, n, fpr, hasher(), al)` (second overload).
+
+=== Destructor
+
+[listing,subs="+macros,+quotes"]
+----
+~filter();
+----
+
+Deallocates the internal array and destructs the internal hash function and allocator.
+
+=== Assignment
+
+==== Copy Assignment
+
+[listing,subs="+macros,+quotes"]
+----
+filter& operator=(const filter& x);
+----
+
+Let `pocca` be `std::allocator_traits<Allocator>::propagate_on_container_copy_assignment::value`.
+If `pocca`, replaces the internal allocator `al` with a copy of `x.get_allocator()`.
+If `capacity() != x.capacity()` or `pocca && al != x.get_allocator()`, replaces the internal array
+with a new one with capacity  `x.capacity()`.
+Copies the values of `x`++'++s internal array.
+Replaces the internal hash function with a copy of `x.hash_function()`.
+
+[horizontal]
+Preconditions:;; If `pocca`,
+`Allocator` is nothrow https://en.cppreference.com/w/cpp/named_req/CopyAssignable[CopyAssignable^]. +
+`hasher` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^].
+Postconditions:;; `*this == x`.
+Returns:;; `*this`.
+
+==== Move Assignment
+
+[listing,subs="+macros,+quotes"]
+----
+filter& operator=(filter&& x)
+  noexcept(
+    std::allocator_traits<Allocator>::is_always_equal::value ||
+    std::allocator_traits<Allocator>::propagate_on_container_move_assignment::value);
+----
+
+Let `pocma` be `std::allocator_traits<Allocator>::propagate_on_container_move_assignment::value`.
+If `pocma`, replaces the internal allocator with a copy of `x.get_allocator()`.
+If `get_allocator() == x.get_allocator()`, transfers `x`++'++s internal array to `*this`;
+otherwise, replaces the internal array with a new one with capacity `x.capacity()`
+and copies the values of `x`++'++s internal array.
+Replaces the internal hash function with a copy of `x.hash_function()`.
+
+[horizontal]
+Preconditions:;; If `pocma`,
+`Allocator` is nothrow https://en.cppreference.com/w/cpp/named_req/CopyAssignable[CopyAssignable^]. +
+`hasher` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^].
+Postconditions:;; `x.capacity() == 0`.
+Returns:;; `*this`.
+
+==== Initializer List Assignment
+
+[listing,subs="+macros,+quotes"]
+----
+filter& operator=(std::initializer_list<value_type> il);
+----
+
+Clears the filter and inserts the values from `il`.
+
+[horizontal]
+Returns:;; `*this`.
+
+=== Capacity
+
+==== Capacity
+
+[listing,subs="+macros,+quotes"]
+----
+size_type capacity() const noexcept;
+----
+
+[horizontal]
+Returns:;; The size in bits of the internal array.
+
+==== Capacity Estimation
+
+[listing,subs="+macros,+quotes"]
+----
+static size_type capacity_for(size_type n, double fpr);
+----
+
+[horizontal]
+Preconditions:;; `fpr` is between 0.0 and 1.0.
+Postconditions:;; `filter(capacity_for(n, fpr)).capacity() == capacity_for(n, fpr)`. +
+`capacity_for(n, 1.0) == 0`.
+Returns:;; An estimation of the capacity required by a `filter` to attain a false positive rate
+equal to `fpr` when `n` distinct elements have been inserted.
+
+==== FPR Estimation
+
+[listing,subs="+macros,+quotes"]
+----
+static double fpr_for(size_type n, size_type m);
+----
+
+[horizontal]
+Postconditions:;; `fpr_for(n, m)` is between 0.0 and 1.0. +
+`fpr_for(n, 0) == 1.0`. +
+`fpr_for(0, m) == 0.0`  (if `m != 0`). +
+Returns:;; An estimation of the resulting false positive rate when
+`n` distinct elements have been inserted into a `filter`
+with capacity `m`.
+
+=== Modifiers
+
+==== Emplace
+
+[listing,subs="+macros,+quotes"]
+----
+template<typename... Args> void emplace(Args&&... args);
+----
+
+Inserts an element constructed from `std::forward<Args>(args)+++...+++`.
+
+[horizontal]
+Preconditions:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/EmplaceConstructible[EmplaceConstructible^]
+into `filter` from `std::forward<Args>(args)+++...+++`. +
+`value_type` is https://en.cppreference.com/w/cpp/named_req/Erasable[Erasable^] from `filter`.
+
+==== Insert
+
+[listing,subs="+macros,+quotes"]
+----
+void insert(const value_type& x);
+template<typename U> void insert(const U& x);
+----
+
+If `capacity() != 0`, sets to one `k * subfilter::k` (not necessarily distinct)
+bits of the internal array deterministically selected from the value
+`hash_function()(x)`.
+
+[horizontal]
+Postconditions:;; `may_contain(x)`.
+Notes:;; The second overload only participates in overload resolution if
+`hasher::is_transparent` is a valid member typedef.
+
+==== Insert Iterator Range
+
+[listing,subs="+macros,+quotes"]
+----
+template<typename InputIterator>
+  void insert(InputIterator first, InputIterator last);
+----
+
+Equivalent to `while(first != last) xref:#filter_insert[insert](*first++)`.
+
+[horizontal]
+Preconditions:;; `InputIterator` is a https://en.cppreference.com/w/cpp/named_req/InputIterator[LegacyInputIterator^] referring to `value_type`. +
+`[first, last)` is a valid range.
+
+==== Insert Initializer List
+
+[listing,subs="+macros,+quotes"]
+----
+void insert(std::initializer_list<value_type> il);
+----
+
+Equivalent to `xref:#filter_insert_iterator_range[insert](il.begin(), il.end())`.
+
+==== Swap
+
+[listing,subs="+macros,+quotes"]
+----
+void swap(filter& x)
+  noexcept(std::allocator_traits<Allocator>::is_always_equal::value ||
+           std::allocator_traits<Allocator>::propagate_on_container_swap::value);
+----
+
+Let `pocs` be `std::allocator_traits<Allocator>::propagate_on_container_swap::value`.
+Swaps the internal array and hash function with those of `x`.
+If `pocs`, swaps the internal allocator with that of `x`.
+
+[horizontal]
+Preconditions:;; `pocs || get_allocator() == x.get_allocator()`. +
+If `pocs`, `Allocator` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^]. +
+`hasher` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^].
+
+
+==== Clear
+
+[listing,subs="+macros,+quotes"]
+----
+void clear() noexcept;
+----
+
+Sets to zero all the bits in the internal array.
+
+==== Reset
+
+[listing,subs="+macros,+quotes"]
+----
+void reset(size_type m = 0);
+void reset(size_type n, double fpr);
+----
+
+First overload: Replaces the internal array if the resulting capacity calculated from `m` is not
+equal to `capacity()`, and clears the filter. +
+Second overload: Equivalent to `reset(capacity_for(n, fpr))`.
+
+[horizontal]
+Postconditions:;; In general, `capacity() >= m`. +
+If `m == 0` or `m == capacity()` or `m == capacity_for(n, fpr)` for some `n` and `fpr`, then `capacity() == m`.
+
+==== Combine with AND
+
+[listing,subs="+macros,+quotes"]
+----
+filter& operator&=(const filter& x);
+----
+
+If `capacity() != x.capacity()`, throws a `std::invalid_argument` exception;
+otherwise, changes the value of each bit in the internal array with the result of
+doing a logical AND operation of that bit and the corresponding one in `x`.
+
+[horizontal]
+Returns:;; `*this`;
+
+==== Combine with OR
+
+[listing,subs="+macros,+quotes"]
+----
+filter& operator|=(const filter& x);
+----
+
+If `capacity() != x.capacity()`, throws an `std::invalid_argument` exception;
+otherwise, changes the value of each bit in the internal array with the result of
+doing a logical OR operation of that bit and the corresponding one in `x`.
+
+[horizontal]
+Returns:;; `*this`;
+
+=== Observers
+
+==== get_allocator
+
+[listing,subs="+macros,+quotes"]
+----
+allocator_type get_allocator() const noexcept;
+----
+
+[horizontal]
+Returns:;; A copy of the internal allocator.
+
+==== hash_function
+
+[listing,subs="+macros,+quotes"]
+----
+hasher hash_function() const;
+----
+
+[horizontal]
+Returns:;; A copy of the internal hash function.
+
+=== Lookup
+
+==== may_contain
+
+[listing,subs="+macros,+quotes"]
+----
+bool may_contain(const value_type& x) const;
+template<typename U> bool may_contain(const U& x) const;
+----
+
+[horizontal]
+Returns:;; `true` iff all the bits selected by a hypothetical
+`xref:filter_insert[insert](x)` operation are set to one.
+Notes:;; The second overload only participates in overload resolution if
+`hasher::is_transparent` is a valid member typedef.
+
+=== Comparison
+
+==== operator==
+
+[listing,subs="+macros,+quotes"]
+----
+template<
+  typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
+>
+bool operator==(
+  const filter<T, K, S, B, H, A>& x, const filter<T, K, S, B, H, A>& y);
+----
+
+[horizontal]
+Returns:;; `true` iff `x.capacity() == y.capacity()` and 
+`x`++'++s and `y`++'++s internal arrays are bitwise identical.
+
+==== operator!=
+
+[listing,subs="+macros,+quotes"]
+----
+template<
+  typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
+>
+bool operator!=(
+  const filter<T, K, S, B, H, A>& x, const filter<T, K, S, B, H, A>& y);
+----
+
+[horizontal]
+Returns:;; `!(x xref:filter_operator[==] y)`.
+
+
+=== Swap
+
+[listing,subs="+macros,+quotes"]
+----
+template<
+  typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
+>
+void swap(filter<T, K, S, B, H, A>& x, filter<T, K, S, B, H, A>& y)
+  noexcept(noexcept(x.swap(y)));
+----
+
+Equivalent to `x.xref:filter_swap[swap](y)`.
--- a/doc/bloom/reference/header_block.adoc
+++ b/doc/bloom/reference/header_block.adoc
@@ -0,0 +1,17 @@
+[#header_block]
+== `<boost/bloom/block.hpp>`
+
+:idprefix: header_block_
+
+[listing,subs="+macros,+quotes"]
+-----
+namespace boost{
+namespace bloom{
+
+template<typename Block, std::size_t K>
+struct xref:block[block];
+
+} // namespace bloom
+} // namespace boost
+-----
+
--- a/doc/bloom/reference/header_fast_multiblock32.adoc
+++ b/doc/bloom/reference/header_fast_multiblock32.adoc
@@ -0,0 +1,17 @@
+[#header_fast_multiblock32]
+== `<boost/bloom/fast_multiblock32.hpp>`
+
+:idprefix: header_fast_multiblock32_
+
+[listing,subs="+macros,+quotes"]
+-----
+namespace boost{
+namespace bloom{
+
+template<std::size_t K>
+struct xref:fast_multiblock32[fast_multiblock32];
+
+} // namespace bloom
+} // namespace boost
+-----
+
--- a/doc/bloom/reference/header_fast_multiblock64.adoc
+++ b/doc/bloom/reference/header_fast_multiblock64.adoc
@@ -0,0 +1,17 @@
+[#header_fast_multiblock64]
+== `<boost/bloom/fast_multiblock64.hpp>`
+
+:idprefix: header_fast_multiblock64_
+
+[listing,subs="+macros,+quotes"]
+-----
+namespace boost{
+namespace bloom{
+
+template<std::size_t K>
+struct xref:fast_multiblock64[fast_multiblock64];
+
+} // namespace bloom
+} // namespace boost
+-----
+
--- a/doc/bloom/reference/header_filter.adoc
+++ b/doc/bloom/reference/header_filter.adoc
@@ -0,0 +1,42 @@
+[#header_filter]
+== `<boost/bloom/filter.hpp>`
+
+:idprefix: header_filter_
+
+Defines `xref:filter[boost::bloom::filter]`
+and associated functions.
+
+[listing,subs="+macros,+quotes"]
+-----
+namespace boost{
+namespace bloom{
+
+template<
+  typename T, std::size_t K,
+  typename Subfilter = block<unsigned char, 1>, std::size_t BucketSize = 0,
+  typename Hash = boost::hash<T>, typename Allocator = std::allocator<T>
+>
+class xref:filter[filter];
+
+template<
+  typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
+>
+bool xref:filter_operator[operator+++==+++](
+  const filter<T, K, S, B, H, A>& x, const filter<T, K, S, B, H, A>& y);
+
+template<
+  typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
+>
+bool xref:filter_operator_2[operator!=](
+  const filter<T, K, S, B, H, A>& x, const filter<T, K, S, B, H, A>& y);
+
+template<
+  typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
+>
+void xref:filter_swap_2[swap](filter<T, K, S, B, H, A>& x, filter<T, K, S, B, H, A>& y)
+  noexcept(noexcept(x.swap(y)));
+
+} // namespace bloom
+} // namespace boost
+-----
+
--- a/doc/bloom/reference/header_multiblock.adoc
+++ b/doc/bloom/reference/header_multiblock.adoc
@@ -0,0 +1,17 @@
+[#header_multiblock]
+== `<boost/bloom/multiblock.hpp>`
+
+:idprefix: header_multiblock_
+
+[listing,subs="+macros,+quotes"]
+-----
+namespace boost{
+namespace bloom{
+
+template<typename Block, std::size_t K>
+struct xref:multiblock[multiblock];
+
+} // namespace bloom
+} // namespace boost
+-----
+
--- a/doc/bloom/reference/multiblock.adoc
+++ b/doc/bloom/reference/multiblock.adoc
@@ -0,0 +1,45 @@
+[#multiblock]
+== Class Template `multiblock`
+
+:idprefix: multiblock_
+
+`boost::bloom::multiblock` -- A xref:subfilter[subfilter] over an array of an integral type.
+
+=== Synopsis
+
+[listing,subs="+macros,+quotes"]
+-----
+// #include <boost/bloom/multiblock.hpp>
+
+namespace boost{
+namespace bloom{
+
+template<typename Block, std::size_t K>
+struct multiblock
+{
+  static constexpr std::size_t k = K;
+  using value_type               = Block[k];
+
+  // the rest of the interface is not public
+
+} // namespace bloom
+} // namespace boost
+-----
+
+=== Description
+
+*Template Parameters*
+
+[cols="1,4"]
+|===
+
+|`Block`
+|An unsigned integral type.
+
+|`K`
+| Number of bits set/checked per operation. Must be greater than zero.
+
+|===
+
+Each of the `K` bits set/checked is located in a different element of the
+`Block[K]` array.
--- a/doc/bloom/reference/subfilters.adoc
+++ b/doc/bloom/reference/subfilters.adoc
@@ -0,0 +1,57 @@
+[#subfilter]
+== Subfilters
+
+:idprefix: subfilters_
+
+A _subfilter_ implements a specific algorithm for bit setting (insertion) and
+bit checking (lookup) for `boost::bloom::filter`. Subfilters operate
+on portions of the filter's internal array called _subarrays_. The
+exact width of these subarrays is statically dependent on the subfilter type.
+
+The full interface of a conforming subfilter is not exposed publicly, hence
+users can't provide their own subfilters and may only use those natively
+provided by the library. What follows is the publicly available interface.
+
+[listing,subs="+macros,+quotes"]
+-----
+Subfilter::k
+-----
+
+[horizontal]
+Result:;; A compile-time `std::size_t` value indicating
+the number of (not necessarily distinct) bits set/checked per operation.
+
+[listing,subs="+macros,+quotes"]
+-----
+typename Subfilter::value_type
+-----
+
+[horizontal]
+Result:;; A cv-unqualified,
+https://en.cppreference.com/w/cpp/named_req/TriviallyCopyable[TriviallyCopyable^]
+type to which the subfilter projects assigned subarrays.
+
+[listing,subs="+macros,+quotes"]
+-----
+Subfilter::used_value_size
+-----
+
+[horizontal]
+Result:;; A compile-time `std::size_t` value indicating
+the size of the effective portion of `Subfilter::value_type` used
+for bit setting/checking (assumed to begin at the lowest address in memory).
+Postconditions:;; Greater than zero and not greater than `sizeof(Subfilter::value_type)`.
+Notes:;; Optional.
+
+=== _used-value-size_
+
+[listing,subs="+macros,+quotes"]
+-----
+template<typename Subfilter>
+constexpr std::size_t _used-value-size_; // exposition only
+-----
+
+`_used-value-size_<Subfilter>` is `Subfilter::used_value_size` if this nested
+constant exists, or `sizeof(Subfilter::value_type)` otherwise.
+The value is the effective size in bytes of the subarrays upon which a
+given subfilter operates.
--- a/doc/bloom/release_notes.adoc
+++ b/doc/bloom/release_notes.adoc
@@ -0,0 +1,9 @@
+[#release_notes]
+= Release Notes
+
+:idprefix: release_notes_
+
+== Boost 1.xx
+
+* Initial release.
+
--- a/doc/bloom/tutorial.adoc
+++ b/doc/bloom/tutorial.adoc
@@ -0,0 +1,204 @@
+[#tutorial]
+= Tutorial
+
+:idprefix: tutorial_
+
+== Filter Definition
+
+A `boost::bloom::filter` can be regarded as a bit array divided into _buckets_ that
+are selected pseudo-randomly (based on a hash function) upon insertion:
+each of the buckets is passed to a _subfilter_ that marks several of its bits according
+to some associated strategy.
+
+[listing,subs="+macros,+quotes"]
+-----
+template<
+  typename T, std::size_t K,
+  typename Subfilter = block<unsigned char, 1>, std::size_t BucketSize = 0,
+  typename Hash = boost::hash<T>, typename Allocator = std::allocator<T>  
+>
+class filter;
+-----
+
+* `T`: Type of the elements inserted.
+* `K`: Number of buckets marked per insertion.
+* `xref:tutorial_subfilter[Subfilter]`: Type of subfilter used.
+* `xref:tutorial_bucketsize[BucketSize`]: Size in bytes of the buckets.
+* `xref:tutorial_hash[Hash]`: A hash function for `T`.
+* `Allocator`: An allocator for `T`.
+
+=== `Subfilter`
+
+The following subfilters can be selected, offering different compromises
+between performance and _false positive rate_ (FPR).
+See the xref:primer_variations_on_the_classical_filter[Bloom Filter Primer]
+for a general explanation of block and multiblock filters.
+
+`block<Block, K'>`
+
+[.indent]
+Sets `K'` bits in an underlying value of the unsigned integral type `Block`
+(e.g. `unsigned char`, `uint32_t`, `uint64_t`). So,
+a `filter<T, K, block<Block, K'>>` will set `K * K'` bits per element.
+The tradeoff here is that insertion/lookup will be (much) faster than
+with `filter<T, K * K'>` while the FPR will be worse (larger).
+FPR is better the wider `Block` is.
+
+`multiblock<Block, K'>`
+
+[.indent]
+Instead of setting `K'` bits in a `Block` value, this subfilter sets
+one bit on each of the elements of a `Block[K']` subarray. This improves FPR
+but impacts performance with respect to `block<Block, K'>`, among other
+things because cacheline boundaries can be crossed when accessing the subarray.
+
+`fast_multiblock32<K'>`
+
+[.indent]
+Statistically equivalent to `multiblock<uint32_t, K'>`, but uses
+faster SIMD-based algorithms when SSE2, AVX2 or Neon are available.
+
+`fast_multiblock64<K'>`
+
+[.indent]
+Statistically equivalent to `multiblock<uint64_t, K'>`, but uses a
+faster SIMD-based algorithm when AVX2 is available.
+
+The default configuration with `block<unsigned char,1>` corresponds to a
+xref:primer[classical Bloom filter] setting `K` bits per element uniformly
+distributed across the array.
+
+=== `BucketSize`
+
+When the default value 0 is used, buckets have the same size as
+the _subarrays_ subfilters operate on (non-overlapping case).
+Otherwise, bucket size is smaller and subarrays spill over adjacent buckets,
+which results in an improved (lower) FPR in exchange for a possibly
+worse performance due to memory unalignment.
+
+=== `Hash`
+
+By default, link:../../../container_hash/index.html[Boost.ContainerHash] is used.
+Consult this library's link:../../../container_hash/doc/html/hash.html#user[dedicated section]
+if you need to extend `boost::hash` for your own types.
+
+When the provided hash function is of sufficient quality, it is used
+as is; otherwise, a bit-mixing post-process is applied to hash values that improves
+their statistical properties so that the resulting FPR approaches its
+theoretical limit. The hash function is determined to be of high quality
+(more precisely, to have the so-called _avalanching_ property) via the
+`link:../../../unordered/doc/html/unordered/reference/hash_traits.html#hash_traits_hash_is_avalanching[boost::unordered::hash_is_avalanching]`
+trait.
+
+== Capacity
+
+The size of the filter's internal array is specified at construction time:
+
+[listing,subs="+macros,+quotes"]
+-----
+using filter = boost::bloom::filter<std::string, ...>;
+filter f(1'000'000); // array of 1'000'000 **bits**
+std::cout << f.capacity(); // >= 1'000'000
+-----
+
+Note that `boost::bloom::filter` default constructor specifies a capacity
+of zero, which in general won't be of much use -- the assigned array
+is null.
+
+Instead of specifying the array's capacity directly, we can let the library
+figure it out based on the number of elements we plan to insert and the
+desired FPR:
+
+[listing,subs="+macros,+quotes"]
+-----
+// we'll insert 100'000 elements and want a FPR ~ 1%
+filter f(100'000, 0.01);
+
+// this is equivalent
+filter f2(filter::capacity_for(100'000, 0.01));
+-----
+
+Once a filter is constructed, its array is fixed (for instance, it won't
+grow dynamically as elements are inserted). The only way to change it is
+by assignment/swapping from a different filter, or using `reset`:
+
+[listing,subs="+macros,+quotes"]
+-----
+f.reset(2'000'000); // change to 2'000'000 bits **and clears the filter**
+f.reset(100'000, 0.005); // equivalent to reset(filter::capacity_for(100'000, 0.005));
+f.reset(); // null array (capacity == 0)
+-----
+
+== Insertion and Lookup
+
+Insertion is done in much the same way as with a traditional container:
+
+[listing,subs="+macros,+quotes"]
+-----
+f.insert("hello");
+f.emplace(100, 'X'); // ~ insert(std::string(100, 'X'))
+f.insert(data.begin(), data.end());
+-----
+
+Of course, in this context "insertion" does not involve any actual
+storage of elements into the filter, but rather the setting of bits in the
+internal array based on the hash values of those elements.
+Lookup goes as follows:
+
+[listing,subs="+macros,+quotes"]
+-----
+bool b1 = f.may_contain("hello"); // b1 is true since we actually inserted "hello"
+bool b2 = f.may_contain("bye"); // b2 is most likely false
+-----
+
+As its name suggests, `may_contain` can return `true` even if the
+element has not been previously inserted, that is, it may yield false
+positives -- this is the essence of probabilistic data structures.
+`fpr_for` provides an estimation of the false positive rate:
+
+[listing,subs="+macros,+quotes"]
+-----
+// we have inserted 100 elements so far, what's our FPR?
+std::cout<< filter::fpr_for(100, f.capacity());
+-----
+
+Note that in the example we provided the number 100 externally:
+`boost::bloom::filter` does not keep track of the number of elements
+that have been inserted -- in other words, it does not have a `size`
+operation.
+
+Once inserted, there is no way to remove a specific element from the filter.
+We can only clear up the filter entirely:
+
+[listing,subs="+macros,+quotes"]
+-----
+f.clear(); // sets all the bits in the array to zero
+-----
+
+== Filter Combination
+
+`boost::bloom::filter`+++s+++ can be combined by doing the OR logical operation
+of the bits of their arrays:
+
+[listing,subs="+macros,+quotes"]
+-----
+filter f2=...;
+...
+f|=f2; // f and f2 must have exactly the same capacity
+-----
+
+The result is equivalent to a filter "containing" both the elements
+of `f` and `f2`. AND combination, on the other hand, results in a filter
+holding the _intersection_ of the elements:
+
+[listing,subs="+macros,+quotes"]
+-----
+filter f3=...;
+...
+f&=f3; // f and f3 must have exactly the same capacity
+-----
+
+For AND combination, be aware that the resulting FPR will be in general
+worse (higher) than if the filter had been constructed from scratch
+by inserting only the commom elements -- don't trust `fpr_for` in this
+case.
--- a/doc/img/block_insertion.png
+++ b/doc/img/block_insertion.png
--- a/doc/img/block_multi_insertion.png
+++ b/doc/img/block_multi_insertion.png
--- a/doc/img/bloom_insertion.png
+++ b/doc/img/bloom_insertion.png
--- a/doc/img/bloom_lookup.png
+++ b/doc/img/bloom_lookup.png
--- a/doc/img/db_speedup.png
+++ b/doc/img/db_speedup.png
--- a/doc/img/fpr_c.png
+++ b/doc/img/fpr_c.png
--- a/doc/img/fpr_n_k.png
+++ b/doc/img/fpr_n_k.png
--- a/doc/img/fpr_n_k_bk.png
+++ b/doc/img/fpr_n_k_bk.png
--- a/doc/img/multiblock_insertion.png
+++ b/doc/img/multiblock_insertion.png
--- a/include/boost/bloom/detail/block_base.hpp
+++ b/include/boost/bloom/detail/block_base.hpp
@@ -32,6 +32,9 @@ struct block_base
  static constexpr std::size_t k=K;
  static constexpr std::size_t hash_width=sizeof(boost::uint64_t)*CHAR_BIT;
  static constexpr std::size_t block_width=sizeof(Block)*CHAR_BIT;
+  static_assert(
+    (block_width&(block_width-1))==0,
+    "Block's size in bits must be a power of two");
  static constexpr std::size_t mask=block_width-1;
  static constexpr std::size_t shift=constexpr_bit_width(mask);
  static constexpr std::size_t rehash_k=(hash_width-shift)/shift;
--- a/include/boost/bloom/detail/core.hpp
+++ b/include/boost/bloom/detail/core.hpp
@@ -60,9 +60,9 @@ namespace detail{
 #endif

 /*  mcg_and_fastrange produces (pos,hash') from hash, where
- *   - x=mulx64(hash,range), mulx64 denotes extended multiplication
- *   - pos=high(x)
- *   - hash'=low(x)
+ *   - m=mulx64(hash,range), mulx64 denotes extended multiplication
+ *   - pos=high(m)
+ *   - hash'=low(m)
 *  pos is uniformly distributed in [0,range) (see
 *  https://arxiv.org/pdf/1805.10941), whereas hash'<-hash is a multiplicative
 *  congruential generator of the form hash'<-hash*rng mod 2^64. This MCG
@@ -100,20 +100,20 @@ struct mcg_and_fastrange
  boost::uint64_t rng;
 };

-/* used_block_size<Subfilter>::value is Subfilter::used_value_size if it
+/* used_value_size<Subfilter>::value is Subfilter::used_value_size if it
 * exists, or sizeof(Subfilter::value_type) otherwise. This covers the
 * case where a subfilter only operates on the first bytes of its entire
 * value_type (e.g. fast_multiblock32<K> with K<8).
 */

 template<typename Subfilter,typename=void>
-struct used_block_size
+struct used_value_size
 {
  static constexpr std::size_t value=sizeof(typename Subfilter::value_type);
 };

 template<typename Subfilter>
-struct used_block_size<
+struct used_value_size<
  Subfilter,
  typename std::enable_if<Subfilter::used_value_size!=0>::type
 >
@@ -187,14 +187,14 @@ private:
  static constexpr std::size_t k_total=k*kp;
  using block_type=typename subfilter::value_type;
  static constexpr std::size_t block_size=sizeof(block_type);
-  static constexpr std::size_t used_block_size=
-    detail::used_block_size<subfilter>::value;
+  static constexpr std::size_t used_value_size=
+    detail::used_value_size<subfilter>::value;

 public:
  static constexpr std::size_t bucket_size=
-    BucketSize?BucketSize:used_block_size;
+    BucketSize?BucketSize:used_value_size;
  static_assert(
-    bucket_size<=used_block_size,"BucketSize can't exceed the block size");
+    bucket_size<=used_value_size,"BucketSize can't exceed the block size");

 private:
  static constexpr std::size_t tail_size=sizeof(block_type)-bucket_size;
@@ -356,7 +356,7 @@ public:

  static double fpr_for(std::size_t n,std::size_t m)
  {
-    return n==0?0.0:m==0?1.0:fpr_for_c((double)m/n);
+    return m==0?1.0:n==0?0.0:fpr_for_c((double)m/n);
  }

  BOOST_FORCEINLINE void insert(boost::uint64_t hash)
@@ -410,6 +410,11 @@ public:
    clear_bytes();
  }

+  void reset(std::size_t n,double fpr)
+  {
+    reset(capacity_for(n,fpr));
+  }
+
  filter_core& operator&=(const filter_core& x)
  {
    combine(x,[](unsigned char& a,unsigned char b){a&=b;});
@@ -459,9 +464,9 @@ private:

  static std::size_t requested_range(std::size_t m)
  {
-    if(m>(used_block_size-bucket_size)*CHAR_BIT){
+    if(m>(used_value_size-bucket_size)*CHAR_BIT){
      /* ensures filter_core{f.capacity()}.capacity()==f.capacity() */
-      m-=(used_block_size-bucket_size)*CHAR_BIT;
+      m-=(used_value_size-bucket_size)*CHAR_BIT;
    }
    return
      (std::numeric_limits<std::size_t>::max)()-m>=bucket_size*CHAR_BIT-1?
@@ -530,7 +535,7 @@ private:

  static std::size_t used_array_size(std::size_t rng)noexcept
  {
-    return rng?rng*bucket_size+(used_block_size-bucket_size):0;
+    return rng?rng*bucket_size+(used_value_size-bucket_size):0;
  }

  static std::size_t unadjusted_capacity_for(std::size_t n,double fpr)
@@ -539,7 +544,7 @@ private:
    using double_limits=std::numeric_limits<double>;

    BOOST_ASSERT(fpr>=0.0&&fpr<=1.0);
-    if(n==0)return 0;
+    if(n==0)return fpr==1.0?0:1;

    constexpr double eps=1.0/(double)(size_t_limits::max)();
    constexpr double max_size_t_as_double=
@@ -593,7 +598,7 @@ private:

  static double fpr_for_c(double c)
  {
-    constexpr std::size_t w=(2*used_block_size-bucket_size)*CHAR_BIT;
+    constexpr std::size_t w=(2*used_value_size-bucket_size)*CHAR_BIT;
    const double          lambda=w*k/c;
    const double          loglambda=std::log(lambda);
    double                res=0.0;
--- a/include/boost/bloom/filter.hpp
+++ b/include/boost/bloom/filter.hpp
@@ -146,7 +146,7 @@ public:
    const allocator_type& al=allocator_type()):
    super{m,al},hash_base{empty_init,h}{}

-  explicit filter(
+  filter(
    std::size_t n,double fpr,const hasher& h=hasher(),
    const allocator_type& al=allocator_type()):
    super{n,fpr,al},hash_base{empty_init,h}{}
--- a/test/test_capacity.cpp
+++ b/test/test_capacity.cpp
@@ -85,6 +85,14 @@ void test_capacity()
    BOOST_TEST_EQ(f.capacity(),0);
    BOOST_TEST(f==filter{});
  }
+  {
+    filter f{{fac(),fac()},1000};
+    num_allocations=0;
+    f.reset(0,1.0);
+    BOOST_TEST_EQ(num_allocations,0);
+    BOOST_TEST_EQ(f.capacity(),0);
+    BOOST_TEST(f==filter{});
+  }
  {
    filter      f{{fac(),fac()},1000};
    std::size_t c=f.capacity();
@@ -94,6 +102,14 @@ void test_capacity()
    BOOST_TEST_GE(f.capacity(),c+1);
    BOOST_TEST(f==filter{f.capacity()});
  }
+  {
+    filter      f;
+    std::size_t c=filter::capacity_for(100,0.1);
+    num_allocations=0;
+    f.reset(100,0.1);
+    BOOST_TEST_EQ(num_allocations,1);
+    BOOST_TEST_EQ(f.capacity(),c);
+  }
  {
    filter      f1{{fac(),fac()},1000},f2;
    std::size_t c=f1.capacity();
--- a/test/test_fpr.cpp
+++ b/test/test_fpr.cpp
@@ -64,7 +64,9 @@ void test_fpr()
    boost::hash<std::string>
  >;

-  BOOST_TEST_EQ(filter(0,0.01).capacity(),0);
+  BOOST_TEST_GT(filter(0,0.0).capacity(),0);
+  BOOST_TEST_GT(filter(0,0.5).capacity(),0);
+  BOOST_TEST_EQ(filter(0,1.0).capacity(),0);
  BOOST_TEST_THROWS((void)filter(1,0.0),std::bad_alloc);
  BOOST_TEST_EQ(filter(100,1.0).capacity(),0);

@@ -82,7 +84,7 @@ void test_fpr()
  }

  BOOST_TEST_EQ(filter::fpr_for(0,1),0.0);
-  BOOST_TEST_EQ(filter::fpr_for(0,0),0.0);
+  BOOST_TEST_EQ(filter::fpr_for(0,0),1.0);
  BOOST_TEST_EQ(filter::fpr_for(1,0),1.0);

  {