added documentation
* removed unneeded explicit * fixed boundary results for capacity_for and fpr_for * renamed used_block_size to used_value_size * added reset(n,n) * added initial documentation draft * static asserted assumption on Block size * synced up naming in comment with that of docs * added implementation notes * editorial * expanded tables * removed unneeded explicit * fixed boundary results for capacity_for and fpr_for * renamed used_block_size to used_value_size * added reset(n,n) * added initial documentation draft * static asserted assumption on Block size * synced up naming in comment with that of docs * added implementation notes * editorial * added benchmarks * editorial * added configuration section * editorial * s/multiinsertion/multi-insertion * added section on use cases * editorial
23
doc/Jamfile.v2
Normal file
@@ -0,0 +1,23 @@
|
||||
# Copyright 2025 Joaquín M López Muñoz.
|
||||
# Distributed under the Boost Software License, Version 1.0.
|
||||
# (See accompanying file LICENSE_1_0.txt or copy at
|
||||
# http://www.boost.org/LICENSE_1_0.txt)
|
||||
#
|
||||
# See http://www.boost.org/libs/bloom for library home page.
|
||||
|
||||
import asciidoctor ;
|
||||
|
||||
html bloom.html : bloom.adoc ;
|
||||
|
||||
install html_ : bloom.html : <location>html ;
|
||||
|
||||
pdf bloom.pdf : bloom.adoc ;
|
||||
explicit bloom.pdf ;
|
||||
|
||||
install pdf_ : bloom.pdf : <location>bloom ;
|
||||
explicit pdf_ ;
|
||||
|
||||
alias boostdoc ;
|
||||
explicit boostdoc ;
|
||||
alias boostrelease : html_ ;
|
||||
explicit boostrelease ;
|
||||
41
doc/bloom.adoc
Normal file
@@ -0,0 +1,41 @@
|
||||
= Boost.Bloom
|
||||
:toc: left
|
||||
:toclevels: 3
|
||||
:idprefix:
|
||||
:docinfo: private-footer
|
||||
:source-highlighter: rouge
|
||||
:source-language: c++
|
||||
:nofooter:
|
||||
:sectlinks:
|
||||
:leveloffset: +1
|
||||
:imagesdir: ../img
|
||||
:stem: latexmath
|
||||
:small: pass:[<font style="font-size:90%">]
|
||||
:small-end: pass:[</font>]
|
||||
|
||||
++++
|
||||
<style>
|
||||
.imageblock > .title {
|
||||
text-align: inherit;
|
||||
}
|
||||
|
||||
.indent {
|
||||
padding-left: 2rem;
|
||||
}
|
||||
|
||||
.bordered_table th, .bordered_table td {
|
||||
border: 1px solid lightgray;
|
||||
}
|
||||
</style>
|
||||
++++
|
||||
|
||||
include::bloom/intro.adoc[]
|
||||
include::bloom/primer.adoc[]
|
||||
include::bloom/tutorial.adoc[]
|
||||
include::bloom/configuration.adoc[]
|
||||
include::bloom/benchmarks.adoc[]
|
||||
include::bloom/reference.adoc[]
|
||||
include::bloom/fpr_estimation.adoc[]
|
||||
include::bloom/implementation_notes.adoc[]
|
||||
include::bloom/release_notes.adoc[]
|
||||
include::bloom/copyright.adoc[]
|
||||
1202
doc/bloom/benchmarks.adoc
Normal file
99
doc/bloom/configuration.adoc
Normal file
@@ -0,0 +1,99 @@
|
||||
[#configuration]
|
||||
= Choosing a Filter Configuration
|
||||
|
||||
:idprefix: configuration_
|
||||
|
||||
Boost.Bloom offers a plethora of compile-time and run-time configuration options,
|
||||
so it may be difficult to make a choice.
|
||||
If you're aiming for a given FPR or have a particular capacity in mind and
|
||||
you'd like to choose the most appropriate filter type, the following chart
|
||||
may come handy.
|
||||
|
||||
image::fpr_c.png[align=center, title="FPR vs. _c_ for different filter types."]
|
||||
|
||||
The chart plots FPR vs. _c_ (capacity / number of elements inserted) for several
|
||||
`boost::bloom::filter`+++s+++ where `K` has been set to its optimum value (minimum FPR)
|
||||
as shown in the table below.
|
||||
|
||||
+++
|
||||
<table class="bordered_table" style="text-align: center;">
|
||||
<tr>
|
||||
<th rowspan="2"></th>
|
||||
<th colspan="21"><i>c</i> = capacity / number of elements inserted</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>4</th> <th>5</th> <th>6</th> <th>7</th> <th>8</th> <th>9</th> <th>10</th> <th>11</th> <th>12</th> <th>13</th>
|
||||
<th>14</th> <th>15</th> <th>16</th> <th>17</th> <th>18</th> <th>19</th> <th>20</th> <th>21</th> <th>22</th> <th>23</th> <th>24</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,block<uint32_t,K>></code></td> <td>3</td> <td>3</td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td>
|
||||
<td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,block<uint32_t,K>,1></code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>6</td>
|
||||
<td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,block<uint64_t,K>></code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>5</td> <td>6</td>
|
||||
<td>6</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,block<uint64_t,K>,1></code></td> <td>2</td> <td>3</td> <td>4</td> <td>4</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td>
|
||||
<td>7</td> <td>7</td> <td>7</td> <td>7</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td> <td>9</td> <td>9</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,multiblock<uint32_t,K>></code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>8</td> <td>8</td> <td>8</td> <td>8</td>
|
||||
<td>9</td> <td>9</td> <td>9</td> <td>10</td> <td>13</td> <td>13</td> <td>15</td> <td>15</td> <td>15</td> <td>16</td> <td>16</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,multiblock<uint32_t,K>,1></code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>7</td> <td>8</td> <td>8</td>
|
||||
<td>9</td> <td>9</td> <td>10</td> <td>10</td> <td>12</td> <td>12</td> <td>14</td> <td>14</td> <td>14</td> <td>14</td> <td>15</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,multiblock<uint64_t,K>></code></td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>6</td> <td>7</td> <td>8</td> <td>8</td>
|
||||
<td>10</td> <td>10</td> <td>12</td> <td>13</td> <td>14</td> <td>15</td> <td>15</td> <td>15</td> <td>15</td> <td>16</td> <td>17</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<1,multiblock<uint64_t,K>,1></code></td> <td>3</td> <td>3</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>7</td> <td>9</td> <td>10</td>
|
||||
<td>10</td> <td>11</td> <td>11</td> <td>12</td> <td>12</td> <td>13</td> <td>13</td> <td>13</td> <td>15</td> <td>16</td> <td>16</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: left;"><code>filter<K></code></td> <td>3</td> <td>4</td> <td>4</td> <td>5</td> <td>5</td> <td>6</td> <td>6</td> <td>8</td> <td>8</td> <td>9</td>
|
||||
<td>10</td> <td>11</td> <td>12</td> <td>13</td> <td>13</td> <td>13</td> <td>14</td> <td>16</td> <td>16</td> <td>16</td> <td>17</td>
|
||||
</tr>
|
||||
</table>
|
||||
+++
|
||||
|
||||
Let's see how this can be used by way of an example. Suppose we plan to insert 10M elements
|
||||
and want to keep the FPR at 10^-4^. The chart gives us five possibilities:
|
||||
|
||||
* `filter<K>` -> _c_ ≅ 19 bits per element
|
||||
* `filter<1, multiblock<uint32_t, K>, 1>` -> _c_ ≅ 20 bits per element
|
||||
* `filter<1, multiblock<uint64_t, K>>` -> _c_ ≅ 21 bits per element
|
||||
* `filter<1, multiblock<uint32_t, K>, 1>` -> _c_ ≅ 21.5 bits per element
|
||||
* `filter<1, multiblock<uint32_t, K>>` -> _c_ ≅ 23 bits per element
|
||||
|
||||
These options have different tradeoffs in terms of space used and performance. If
|
||||
we choose `filter<1, multiblock<uint32_t, K>, 1>` as a compromise (or better yet,
|
||||
`filter<1, fast_multiblock32<K>, 1>`), the only remaining step is to consult the
|
||||
value of `K` in the table for _c_ = 21 or 22, and we get our final configuration:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
using my_filter=filter<std::string, 1, fast_multiblock32<**14**>, 1>;
|
||||
-----
|
||||
|
||||
The resulting filter can be constructed in any of the following ways:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
// 1) calculate the capacity from the value of c we got from the chart
|
||||
my_filter pass:[f((]std::size_t)(10'000'000 * 21.5));
|
||||
|
||||
// 2) let the library calculate the capacity from n and target fpr
|
||||
// expect some deviation from the capacity in 1)
|
||||
my_filter f(10'000'000, 1E-4);
|
||||
|
||||
// 3) equivalent to 2)
|
||||
my_filter f(my_filter::capacity_for(10'000'000, 1E-4));
|
||||
-----
|
||||
10
doc/bloom/copyright.adoc
Normal file
@@ -0,0 +1,10 @@
|
||||
[#copyright]
|
||||
= Copyright and License
|
||||
|
||||
:idprefix: copyright_
|
||||
|
||||
Of this documentation:
|
||||
|
||||
* Copyright © 2025 Joaquín M López Muñoz
|
||||
|
||||
Distributed under the http://www.boost.org/LICENSE_1_0.txt[Boost Software License, Version 1.0^].
|
||||
74
doc/bloom/fpr_estimation.adoc
Normal file
@@ -0,0 +1,74 @@
|
||||
[#fpr_estimation]
|
||||
= Appendix A: FPR Estimation
|
||||
|
||||
:idprefix: fpr_estimation_
|
||||
|
||||
For a classical Bloom filter, the theoretical false positive rate, under some simplifying assumptions,
|
||||
is given by
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[\text{FPR}(n,m,k)=\left(1 - \left(1 - \displaystyle\frac{1}{m}\right)^{kn}\right)^k \approx \left(1 - e^{-kn/m}\right)^k]{small-end} for large {small}stem:[m]{small-end},
|
||||
|
||||
where {small}stem:[n]{small-end} is the number of elements inserted in the filter, {small}stem:[m]{small-end} its capacity in bits and {small}stem:[k]{small-end} the
|
||||
number of bits set per insertion (see a https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives[derivation^]
|
||||
of this formula). For a given inverse load factor {small}stem:[c=m/n]{small-end}, the optimum {small}stem:[k]{small-end} is
|
||||
the integer closest to:
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[k_{\text{opt}}=c\cdot\ln2,]{small-end}
|
||||
|
||||
yielding a minimum attainable FPR of {small}stem:[1/2^{k_{\text{opt}}} \approx 0.6185^{c}]{small-end}.
|
||||
|
||||
In the case of filter of the form `boost::bloom::filter<T, K, block<Block, K'>>`, we can extend
|
||||
the approach from https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=f376ff09a64b388bfcde2f5353e9ddb44033aac8[Putze et al.^]
|
||||
to derive the (approximate but very precise) formula:
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[\text{FPR}_{\text{block}}(n,m,b,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}(i,nbk/m) \cdot \text{FPR}(i,b,k')\right)^{k},]{small-end}
|
||||
|
||||
where
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[\text{Pois}(i,\lambda)=\displaystyle\frac{\lambda^i e^{-\lambda}}{i!}]{small-end}
|
||||
|
||||
is the probability mass function of a https://en.wikipedia.org/wiki/Poisson_distribution[Poisson distribution^]
|
||||
with mean {small}stem:[\lambda]{small-end}, and {small}stem:[b]{small-end} is the size of `Block` in bits. If we're using `multiblock<Block,K'>`, we have
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[\text{FPR}_\text{multiblock}(n,m,b,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}(i,nbkk'/m) \cdot \text{FPR}(i,b,1)^{k'}\right)^{k}.]{small-end}
|
||||
|
||||
As we have commented xref:primer_multiblock_filters[before], in general
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[\text{FPR}_\text{block}(n,m,b,k,k') \geq \text{FPR}_\text{multiblock}(n,m,b,k,k') \geq \text{FPR}(n,m,kk'),]{small-end}
|
||||
|
||||
that is, block and multiblock filters have worse FPR than the classical filter for the same number of bits
|
||||
set per insertion, but they will be faster. We have the particular case
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[\text{FPR}_{\text{block}}(n,m,b,k,1)=\text{FPR}_{\text{multiblock}}(n,m,b,k,1)=\text{FPR}(n,m,k),]{small-end}
|
||||
|
||||
which follows simply from the observation that using `{block|multiblock}<Block, 1>` behaves exactly as
|
||||
a classical Bloom filter.
|
||||
|
||||
We don't know of any closed, simple formula for the FPR of block and multiblock filters when
|
||||
`Bucketsize` is not its "natural" size `xref:subfilters_used_value_size[_used-value-size_]<Subfilter>`,
|
||||
that is, when subfilter subarrays overlap.
|
||||
We can use the following approximations ({small}stem:[s]{small-end} = `BucketSize` in bits):
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[\text{FPR}_{\text{block}}(n,m,b,s,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}\left(i,\frac{n(2b-s)k}{m}\right) \cdot \text{FPR}(i,2b-s,k')\right)^{k},]{small-end} +
|
||||
{small}stem:[\text{FPR}_\text{multiblock}(n,m,b,s,k,k')=\left(\displaystyle\sum_{i=0}^{\infty} \text{Pois}\left(i,\frac{n(2bk'-s)k}{m}\right) \cdot \text{FPR}\left(i,\frac{2bk'-s}{k'},1\right)^{k'}\right)^{k},]{small-end}
|
||||
|
||||
where the replacement of {small}stem:[b]{small-end} with {small}stem:[2b-s]{small-end}
|
||||
(or {small}stem:[bk']{small-end} with {small}stem:[2bk'-s]{small-end} for multiblock filters) accounts
|
||||
for the fact that the window of hashing positions affecting a particular bit spreads due to
|
||||
overlapping. Note that the formulas reduce to the non-ovelapping case when {small}stem:[s]{small-end} takes its
|
||||
default value (stem:[b] for block, stem:[bk'] for multiblock). These approximations are acceptable for
|
||||
low values of {small}stem:[k']{small-end} but tend to underestimate the actual FPR as {small}stem:[k']{small-end} grows.
|
||||
In general, the use of overlapping improves (decreases) FPR by a factor ranging from
|
||||
0.6 to 0.9 for typical filter configurations.
|
||||
|
||||
{small}stem:[\text{FPR}_{\text{block}}(n,m,b,s,k,k')]{small-end} and {small}stem:[\text{FPR}_\text{multiblock}(n,m,b,s,k,k')]{small-end}
|
||||
are the formulas used by the implementation of
|
||||
`xref:filter_fpr_estimation[boost::filter::fpr_for]`.
|
||||
130
doc/bloom/implementation_notes.adoc
Normal file
@@ -0,0 +1,130 @@
|
||||
[#implementation_notes]
|
||||
= Appendix B: Implementation Notes
|
||||
|
||||
:idprefix: implementation_notes_
|
||||
|
||||
== Hash Mixing
|
||||
|
||||
This is the bit-mixing post-process we use to improve the statistical properties
|
||||
of the hash function when it doesn't have the avalanching property:
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[m\leftarrow\text{mulx}(h,C)]{small-end}, +
|
||||
{small}stem:[h'\leftarrow\text{high}(m)\text{ xor }\text{low}(m)]{small-end},
|
||||
|
||||
where {small}stem:[\text{mulx}]{small-end} denotes 128-bit multiplication of two 64-bit factors,
|
||||
{small}stem:[\text{high}(m)]{small-end} and {small}stem:[\text{low}(m)]{small-end}
|
||||
are the high and low 64-bit words of {small}stem:[m]{small-end}, respectively,
|
||||
{small}stem:[C=\lfloor 2^{64}/\varphi \rfloor]{small-end} and
|
||||
{small}stem:[\varphi]{small-end} is the https://en.wikipedia.org/wiki/Golden_ratio[golden ratio^].
|
||||
|
||||
== 32-bit mode
|
||||
|
||||
Internally, we always use 64-bit hash values even if in 32-bit mode, where
|
||||
the user-provided hash function produces 32-bit outputs. To expand
|
||||
a 32-bit hash value to 64 bits, we use the same mixing procedure
|
||||
described
|
||||
xref:implementation_notes_hash_mixing[above].
|
||||
|
||||
== Dispensing with Multiple Hash Functions
|
||||
|
||||
Direct implementations of a Bloom filter with {small}stem:[k]{small-end}
|
||||
bits per operation require {small}stem:[k]{small-end} different and independent
|
||||
hash functions {small}stem:[h_i(x)]{small-end}, which incurs an important
|
||||
performance penalty, particularly if the objects are expensive to hash
|
||||
(e.g. strings). https://www.eecs.harvard.edu/~michaelm/postscripts/rsa2008.pdf[Kirsch and Mitzenmacher^]
|
||||
show how to relax this requirement down to two different hash functions
|
||||
{small}stem:[h_1(x)]{small-end} and {small}stem:[h_2(x)]{small-end} linearly
|
||||
combined as
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[g_i(x)=h_1(x)+ih_2(x).]{small-end}
|
||||
|
||||
Without formal justification, we have relaxed this even further to just one
|
||||
initial hash value {small}stem:[h_0=h_0(x)]{small-end}, where new values
|
||||
{small}stem:[h_i]{small-end} are computed from {small}stem:[h_{i-1}]{small-end}
|
||||
by means of very cheap mixing schemes. In what follows
|
||||
{small}stem:[k]{small-end}, {small}stem:[k']{small-end} are the homonym values
|
||||
in a filter of the form `boost::bloom::filter<T, K, {block|multiblock}<Block, K'>>`,
|
||||
{small}stem:[b]{small-end} is `sizeof(Block) * CHAR_BIT`,
|
||||
and {small}stem:[r]{small-end} is the number of buckets in the filter.
|
||||
|
||||
=== Bucket Location
|
||||
|
||||
To produce a location (i.e. a number {small}stem:[p]{small-end} in {small}stem:[[0,r)]{small-end}) from
|
||||
{small}stem:[h_{i-1}]{small-end}, instead of the straightforward but costly
|
||||
procedure {small}stem:[p\leftarrow h_{i-1}\bmod r]{small-end} we resort to
|
||||
Lemire's https://arxiv.org/pdf/1805.10941[fastrange technique^]. Moreover,
|
||||
we combine this calculation with the production of {small}stem:[h_{i}]{small-end}
|
||||
from {small}stem:[h_{i-1}]{small-end} as follows:
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[m\leftarrow\text{mulx}(h_{i-1},r),]{small-end} +
|
||||
{small}stem:[p\leftarrow\lfloor m/2^{64} \rfloor=\text{high}(m),]{small-end} +
|
||||
{small}stem:[h_i\leftarrow m \bmod 2^{64}=\text{low}(m).]{small-end}
|
||||
|
||||
The transformation {small}stem:[h_{i-1} \rightarrow h_i]{small-end} is
|
||||
a simple https://en.wikipedia.org/wiki/Linear_congruential_generator[multiplicative congruential generator^]
|
||||
over {small}stem:[2^{64}]{small-end}. For this MCG to produce long
|
||||
cycles, {small}stem:[h_0]{small-end} must be odd and the multiplicative constant
|
||||
{small}stem:[r]{small-end} must be {small}stem:[\equiv \pm 3 \text{ (mod 8)}]{small-end}:
|
||||
to meet these requirements, the implementation adjusts {small}stem:[h_0]{small-end}
|
||||
to {small}stem:[h_0']{small-end} and {small}stem:[r]{small-end}
|
||||
to {small}stem:[r']{small-end}. This renders the least significant bit
|
||||
of {small}stem:[h_i]{small-end} unsuitable for pseudorandomization
|
||||
(it is always one).
|
||||
|
||||
=== Bit selection
|
||||
|
||||
Inside a subfilter, we must produce {small}stem:[k']{small-end}
|
||||
values from {small}stem:[h_i]{small-end} in the range
|
||||
{small}stem:[[0,b)]{small-end} (the positions of the {small}stem:[k']{small-end}
|
||||
bits). We do this by successively taking {small}stem:[\log_2b]{small-end} bits
|
||||
from {small}stem:[h_i]{small-end} without utilizing the portion containing
|
||||
its least significant bit (which is always one as we have discussed).
|
||||
If we run out of bits (which happens when
|
||||
{small}stem:[k'> 63/\log_2b]{small-end}), we produce a new hash value
|
||||
{small}stem:[h_{i+1}]{small-end} from {small}stem:[h_{i}]{small-end}
|
||||
using the mixing procedure
|
||||
xref:implementation_notes_hash_mixing[already described].
|
||||
|
||||
== SIMD algorithms
|
||||
|
||||
=== `fast_multiblock32`
|
||||
|
||||
When using AVX2, we select up to 8 bits at a time by creating
|
||||
a `+++__+++m256i` of 32-bit values {small}stem:[(x_0,x_1,...,x_7)]{small-end}
|
||||
where each {small}stem:[x_i]{small-end} is constructed from
|
||||
a different 5-bit portion of the hash value, and calculating from this
|
||||
the `+++__+++m256i` {small}stem:[(2^{x_0},2^{x_1},...,2^{x_7})]{small-end}
|
||||
with https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-10/mm256-sllv-epi32-64.html[`+++_+++mm256_sllv_epi32`^].
|
||||
If more bits are needed, we generate a new hash value as
|
||||
xref:implementation_notes_hash_mixing[described before] and repeat.
|
||||
|
||||
For little-endian Neon, the algorithm is similar but the computations
|
||||
are carried out with two `uint32x4_t`+++s+++ in parallel as Neon does not have
|
||||
256-bit registers.
|
||||
|
||||
In the case of SSE2, we don't have the 128-bit equivalent of
|
||||
`+++_+++mm256_sllv_epi32`, so we use the following, mildly interesting
|
||||
technique: a `+++__+++m128i` of the form
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[((x_0+127)\cdot 2^{23},(x_1+127)\cdot 2^{23},(x_2+127)\cdot 2^{23},(x_3+127)\cdot 2^{23}),]{small-end}
|
||||
|
||||
where each {small}stem:[x_i]{small-end} is in {small}stem:[[0,32)]{small-end},
|
||||
can be `reinterpret_cast`+++ed+++ to (i.e., has the same binary representation as)
|
||||
the `+++__+++m128` (register of `float`+++s+++)
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[(2^{x_0},2^{x_1},2^{x_2},2^{x_3}),]{small-end}
|
||||
|
||||
from which our desired `+++__+++m128i` of shifted 1s can be obtained
|
||||
with https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-10/conversion-intrinsics-003.html#GUID-B1CFE576-21E9-4E70-BE5E-B9B18D598C12[`+++_+++mm_cvttps_epi32`^].
|
||||
|
||||
=== `fast_multiblock64`
|
||||
|
||||
We only provide a SIMD implementation for AVX2 that relies in two
|
||||
parallel `+++__+++m256i`+++s+++ for the generation of up
|
||||
to 8 64-bit values with shifted 1s. For Neon and SSE2, emulation
|
||||
through 4 128-bit registers proved slower than non-SIMD `multiblock<uint64_t, K>`.
|
||||
49
doc/bloom/intro.adoc
Normal file
@@ -0,0 +1,49 @@
|
||||
[#intro]
|
||||
= Introduction
|
||||
|
||||
:idprefix: intro_
|
||||
|
||||
Boost.Bloom provides the class template `xref:tutorial[boost::bloom::filter]`
|
||||
that can be configured to implement a classical Bloom filter as well as
|
||||
variations discussed in the literature such as block filters, multiblock filters,
|
||||
and more.
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
#include <boost/bloom/filter.hpp>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
|
||||
int main()
|
||||
{
|
||||
// Bloom filter of strings with 5 bits set per insertion
|
||||
using filter = boost::bloom::filter<std::string, 5>;
|
||||
|
||||
// create filter with a capacity of 1'000'000 **bits**
|
||||
filter f(1'000'000);
|
||||
|
||||
// insert elements (they can't be erased, Bloom filters are insert-only)
|
||||
f.insert("hello");
|
||||
f.insert("Boost");
|
||||
//...
|
||||
|
||||
// elements inserted are always correctly checked as such
|
||||
assert(f.may_contain("hello") == true);
|
||||
|
||||
// elements not inserted may incorrectly be identified as such with a
|
||||
// false positive rate (FPR) which is a function of the array capacity,
|
||||
// the number of bits set per element and generally how the boost::bloom::filter
|
||||
// was specified
|
||||
if(f.may_contain("bye")) { // likely false
|
||||
//...
|
||||
}
|
||||
}
|
||||
-----
|
||||
|
||||
The different filter variations supported are specified at compile time
|
||||
as part of the `boost::bloom::filter` instantiation definition.
|
||||
Boost.Bloom has been implemented with a focus on performance;
|
||||
SIMD technologies such as AVX2, Neon and SSE2 can be leveraged to speed up
|
||||
operations.
|
||||
|
||||
Boost.Bloom is a header-only library. C++11 or later required.
|
||||
118
doc/bloom/primer.adoc
Normal file
@@ -0,0 +1,118 @@
|
||||
[#primer]
|
||||
= Bloom Filter Primer
|
||||
|
||||
:idprefix: primer_
|
||||
|
||||
A Bloom filter is a probabilistic data structure where inserted elements can be looked up
|
||||
with 100% accuracy, whereas looking up for a non-inserted element may fail with
|
||||
some probability called the filter's _false positive rate_ or FPR. The tradeoff here is
|
||||
that Bloom filters occupy much less space than traditional non-probabilistic containers
|
||||
(typically, around 8-20 bits per element) for an acceptably low FPR. The greater
|
||||
the filter's _capacity_ (its size in bits), the lower the resulting FPR.
|
||||
|
||||
One prime application of Bloom filters and similar data structures is for the prevention
|
||||
of expensive disk/network accesses when these would fail to retrieve a given piece of
|
||||
information.
|
||||
For instance, suppose we are developing a frontend for a database with access time
|
||||
10 ms and we know 50% of the requests will not succeed (the record does not exist).
|
||||
Inserting a Bloom filter with a lookup time of 200 ns and a FPR of 0.5% will reduce the
|
||||
average response time of the system from 10 ms to
|
||||
|
||||
[.text-center]
|
||||
(10 + 0.0002) × 50.25% + 0.0002 × 49.75% ≅ 5.03 ms,
|
||||
|
||||
that is, we get a ×1.99 overall speedup. If the database holds 1 billion records,
|
||||
an in-memory filter with say 8 bits per element will occupy 0.93 GB,
|
||||
which is perfectly realizable.
|
||||
|
||||
image::db_speedup.png[align=center, title="Improving DB negative access time with a Bloom filter."]
|
||||
|
||||
In general, Bloom filters are useful to prevent/mitigate queries against large data sets
|
||||
when exact retrieval is costly and/or can't be made in main memory.
|
||||
Applications have been described in the areas of web caching,
|
||||
dictionary compression, network routing and genomics, among others.
|
||||
https://www.eecs.harvard.edu/~michaelm/postscripts/im2005b.pdf[Broder and Mitzenmacher^]
|
||||
provide a rather extensive review of use cases with a focus on networking.
|
||||
|
||||
== Implementation
|
||||
|
||||
The implementation of a Bloom filter consists of an array of _m_ bits, initially set to zero.
|
||||
Inserting an element _x_ reduces to selecting _k_ positions pseudorandomly (with the help
|
||||
of _k_ independent hash functions) and setting them to one.
|
||||
|
||||
image::bloom_insertion.png[align=center, title="Insertion in a classical Bloom filter, _k_ = 6."]
|
||||
|
||||
To check if an element _y_ is in the filter, we follow the same procedure and see if
|
||||
the selected bits are all set to one. In the example figure there are two unset bits, which
|
||||
definitely indicates _y_ was not inserted in the filter.
|
||||
|
||||
image::bloom_lookup.png[align=center, title="Lookup in a classical Bloom filter."]
|
||||
|
||||
A false positive occurs when the bits checked happen to be all set to one due to
|
||||
other, unrelated insertions. The probability of having a false positive increases as we
|
||||
add more elements to the filter, whereas for a given number _n_ of inserted elements, a filter
|
||||
with greater capacity (larger bit array) will have a lower FPR.
|
||||
The number _k_ of bits set per operation also affects the FPR, albeit in a more complicated way:
|
||||
when the array is sparsely populated, a higher value of _k_ improves (decreases) the FPR,
|
||||
as there are more chances that we hit a non-set bit; however, if _k_ is very high
|
||||
the array will have more and more bits set to one as new elements are inserted, which
|
||||
eventually will reach a point where we lose out to a filter with a lower _k_ and
|
||||
thus a smaller proportions of set bits.
|
||||
|
||||
image::fpr_n_k.png[align=center, title="FPR vs. number of inserted elements for two filters with _m_ = 10^5^ bits."]
|
||||
|
||||
For given values of _n_ and _m_, the optimum _k_ is the integer closest to
|
||||
|
||||
[.text-center]
|
||||
{small}stem:[k_{\text{opt}}=\displaystyle\frac{m\cdot\ln2}{n}]{small-end}
|
||||
|
||||
for a minimum FPR of
|
||||
{small}stem:[1/2^{k_{\text{opt}}} \approx 0.6185^{m/n}]{small-end}. See the appendix
|
||||
on xref:fpr_estimation[FPR estimation] for mode details.
|
||||
|
||||
== Variations on the Classical Filter
|
||||
|
||||
=== Block Filters
|
||||
|
||||
An operation on a Bloom filter involves accessing _k_ different positions in memory,
|
||||
which, for large arrays, results in _k_ CPU cache misses and affects the
|
||||
operation's performance. A variation on the classical approach called a
|
||||
_block filter_ seeks to minimize cache misses by concentrating all bit
|
||||
setting/checking in a small block of _b_ bits pseudorandomly selected from the
|
||||
entire array. If the block is small enough, it will fit in a CPU cacheline,
|
||||
thus drastically reducing the number of cache misses.
|
||||
|
||||
image::block_insertion.png[align=center, title="Block filter."]
|
||||
|
||||
The downside is that the resulting FPR is worse than that of a classical filter for
|
||||
the same values of _n_, _m_ and _k_. Intuitively, block filters reduce the
|
||||
uniformity of the distribution of bits in the array, which ultimately hurts their
|
||||
probabilistic performance.
|
||||
|
||||
image::fpr_n_k_bk.png[align=center, title="FPR (logarithmic scale) vs. number of inserted elements for a classical and a block filter, _m_ = 10^5^ bits."]
|
||||
|
||||
A further variation in this idea is to have operations select _k_ blocks
|
||||
with _k'_ bits set on each. This, again, will have a worse FPR than a classical
|
||||
filter with _k·k'_ bits per operation, but improves on a plain
|
||||
_k·k'_ block filter.
|
||||
|
||||
image::block_multi_insertion.png[align=center, title="Block filter with multi-insertion."]
|
||||
|
||||
=== Multiblock Filters
|
||||
|
||||
_Multiblock filters_ take block filters' approach further by having
|
||||
bit setting/checking done on a sequence of consecutive blocks of size _b_,
|
||||
so that each block takes exactly one bit. This still maintains a good cache
|
||||
locality but improves FPR with respect to block filters because bits set to one
|
||||
are more spread out across the array.
|
||||
|
||||
image::multiblock_insertion.png[align=center, title="Multiblock filter."]
|
||||
|
||||
Multiblock filters can also be combined with multi-insertion. In general,
|
||||
for the same number of bits per operation and equal values of _n_ and _m_,
|
||||
a classical Bloom filter will have the better (lower) FPR, followed by
|
||||
multiblock filters and then block filters. Execution speed will roughly go
|
||||
in the reverse order. When considering block/multiblock filters with
|
||||
multi-insertion, the number of available configurations grows quickly and
|
||||
you will need to do some experimenting to locate your preferred point in the
|
||||
(FPR, capacity, speed) tradeoff space.
|
||||
14
doc/bloom/reference.adoc
Normal file
@@ -0,0 +1,14 @@
|
||||
[#reference]
|
||||
= Reference
|
||||
|
||||
include::reference/header_filter.adoc[]
|
||||
include::reference/filter.adoc[]
|
||||
include::reference/subfilters.adoc[]
|
||||
include::reference/header_block.adoc[]
|
||||
include::reference/block.adoc[]
|
||||
include::reference/header_multiblock.adoc[]
|
||||
include::reference/multiblock.adoc[]
|
||||
include::reference/header_fast_multiblock32.adoc[]
|
||||
include::reference/fast_multiblock32.adoc[]
|
||||
include::reference/header_fast_multiblock64.adoc[]
|
||||
include::reference/fast_multiblock64.adoc[]
|
||||
42
doc/bloom/reference/block.adoc
Normal file
@@ -0,0 +1,42 @@
|
||||
[#block]
|
||||
== Class Template `block`
|
||||
|
||||
:idprefix: block_
|
||||
|
||||
`boost::bloom::block` -- A xref:subfilter[subfilter] over an integral type.
|
||||
|
||||
=== Synopsis
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
// #include <boost/bloom/block.hpp>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<typename Block, std::size_t K>
|
||||
struct block
|
||||
{
|
||||
static constexpr std::size_t k = K;
|
||||
using value_type = Block;
|
||||
|
||||
// the rest of the interface is not public
|
||||
|
||||
} // namespace bloom
|
||||
} // namespace boost
|
||||
-----
|
||||
|
||||
=== Description
|
||||
|
||||
*Template Parameters*
|
||||
|
||||
[cols="1,4"]
|
||||
|===
|
||||
|
||||
|`Block`
|
||||
|An unsigned integral type.
|
||||
|
||||
|`K`
|
||||
| Number of bits set/checked per operation. Must be greater than zero.
|
||||
|
||||
|===
|
||||
52
doc/bloom/reference/fast_multiblock32.adoc
Normal file
@@ -0,0 +1,52 @@
|
||||
[#fast_multiblock32]
|
||||
== Class Template `fast_multiblock32`
|
||||
|
||||
:idprefix: fast_multiblock32_
|
||||
|
||||
`boost::bloom::fast_multiblock32` -- A faster replacement of
|
||||
`xref:multiblock[multiblock]<std::uint32_t, K>`.
|
||||
|
||||
=== Synopsis
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
// #include <boost/bloom/fast_multiblock32.hpp>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<std::size_t K>
|
||||
struct fast_multiblock32
|
||||
{
|
||||
static constexpr std::size_t k = K;
|
||||
using value_type = _implementation-defined_;
|
||||
|
||||
// might not be present
|
||||
static constexpr std::size_t used_value_size = _implementation-defined_;
|
||||
|
||||
// the rest of the interface is not public
|
||||
|
||||
} // namespace bloom
|
||||
} // namespace boost
|
||||
-----
|
||||
|
||||
=== Description
|
||||
|
||||
*Template Parameters*
|
||||
|
||||
[cols="1,4"]
|
||||
|===
|
||||
|
||||
|`K`
|
||||
| Number of bits set/checked per operation. Must be greater than zero.
|
||||
|
||||
|===
|
||||
|
||||
`fast_multiblock32<K>` is statistically equivalent to
|
||||
`xref:multiblock[multiblock]<std::uint32_t, K>`, but takes advantage
|
||||
of selected SIMD technologies, when available at compile time, to perform faster.
|
||||
Currently supported: AVX2, little-endian Neon, SSE2.
|
||||
The non-SIMD case falls back to regular `multiblock`.
|
||||
|
||||
`xref:subfilters_used_value_size[_used-value-size_]<fast_multiblock32<K>>` is
|
||||
`4 * K`.
|
||||
52
doc/bloom/reference/fast_multiblock64.adoc
Normal file
@@ -0,0 +1,52 @@
|
||||
[#fast_multiblock64]
|
||||
== Class Template `fast_multiblock64`
|
||||
|
||||
:idprefix: fast_multiblock64_
|
||||
|
||||
`boost::bloom::fast_multiblock64` -- A faster replacement of
|
||||
`xref:multiblock[multiblock]<std::uint64_t, K>`.
|
||||
|
||||
=== Synopsis
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
// #include <boost/bloom/fast_multiblock64.hpp>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<std::size_t K>
|
||||
struct fast_multiblock64
|
||||
{
|
||||
static constexpr std::size_t k = K;
|
||||
using value_type = _implementation-defined_;
|
||||
|
||||
// might not be present
|
||||
static constexpr std::size_t used_value_size = _implementation-defined_;
|
||||
|
||||
// the rest of the interface is not public
|
||||
|
||||
} // namespace bloom
|
||||
} // namespace boost
|
||||
-----
|
||||
|
||||
=== Description
|
||||
|
||||
*Template Parameters*
|
||||
|
||||
[cols="1,4"]
|
||||
|===
|
||||
|
||||
|`K`
|
||||
| Number of bits set/checked per operation. Must be greater than zero.
|
||||
|
||||
|===
|
||||
|
||||
`fast_multiblock64<K>` is statistically equivalent to
|
||||
`xref:multiblock[multiblock]<std::uint64_t, K>`, but takes advantage
|
||||
of selected SIMD technologies, when available at compile time, to perform faster.
|
||||
Currently supported: AVX2.
|
||||
The non-SIMD case falls back to regular `multiblock`.
|
||||
|
||||
`xref:subfilters_used_value_size[_used-value-size_]<fast_multiblock64<K>>` is
|
||||
`8 * K`.
|
||||
711
doc/bloom/reference/filter.adoc
Normal file
@@ -0,0 +1,711 @@
|
||||
[#filter]
|
||||
== Class Template `filter`
|
||||
|
||||
:idprefix: filter_
|
||||
|
||||
`boost::bloom::filter` -- A data structure that supports element insertion
|
||||
and _probabilistic_ lookup, where an element can be determined to be in the filter
|
||||
with high confidence or else not be with absolute certainty. The probability
|
||||
that lookup erroneously classifies a non-present element as present is called
|
||||
the filter's _false positive rate_ (FPR).
|
||||
|
||||
`boost::bloom::filter` maintains an internal array of `m` bits where `m` is the
|
||||
filter's _capacity_. Unlike traditional containers, inserting an
|
||||
element `x` does not store a copy of `x` within the filter, but rather results
|
||||
in a fixed number of bits in the array being set to one, where the positions
|
||||
of the bits are pseudorandomly produced from the hash value of `x`. Lookup
|
||||
for `y` simply checks whether all the bits associated to `y` are actually set.
|
||||
|
||||
* For a given filter, the FPR increases as new elements are inserted.
|
||||
* For a given number of inserted elements, a filter with higher capacity
|
||||
has a lower FPR.
|
||||
|
||||
By convention, we say that a filter is _empty_ if its capacity is zero or
|
||||
all the bits in the internal array are set to zero.
|
||||
|
||||
=== Synopsis
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
// #include <boost/bloom/filter.hpp>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<
|
||||
typename T, std::size_t K,
|
||||
typename Subfilter = block<unsigned char, 1>, std::size_t BucketSize = 0,
|
||||
typename Hash = boost::hash<T>, typename Allocator = std::allocator<T>
|
||||
>
|
||||
class filter
|
||||
{
|
||||
public:
|
||||
// types and constants
|
||||
using value_type = T;
|
||||
static constexpr std::size_t k = K;
|
||||
using subfilter = Subfilter;
|
||||
static constexpr std::size_t xref:filter_bucket_size[bucket_size] = xref:filter_bucket_size[__see below__];
|
||||
using hasher = Hash;
|
||||
using allocator_type = Allocator;
|
||||
using size_type = std::size_t;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using reference = value_type&;
|
||||
using const_reference = const value_type&;
|
||||
using pointer = value_type*;
|
||||
using const_pointer = const value_type*;
|
||||
|
||||
// construct/copy/destroy
|
||||
xref:#filter_default_constructor[filter]();
|
||||
explicit xref:#filter_capacity_constructor[filter](
|
||||
size_type m, const hasher& h = hasher(),
|
||||
const allocator_type& al = allocator_type());
|
||||
xref:#filter_capacity_constructor[filter](
|
||||
size_type n, double fpr, const hasher& h = hasher(),
|
||||
const allocator_type& al = allocator_type());
|
||||
template<typename InputIterator>
|
||||
xref:#filter_iterator_range_constructor[filter](
|
||||
InputIterator first, InputIterator last,
|
||||
size_type m, const hasher& h = hasher(),
|
||||
const allocator_type& al = allocator_type());
|
||||
template<typename InputIterator>
|
||||
xref:#filter_iterator_range_constructor[filter](
|
||||
InputIterator first, InputIterator last,
|
||||
size_type n, double fpr, const hasher& h = hasher(),
|
||||
const allocator_type& al = allocator_type());
|
||||
xref:#filter_copy_constructor[filter](const filter& x);
|
||||
xref:#filter_move_constructor[filter](filter&& x);
|
||||
template<typename InputIterator>
|
||||
xref:#filter_iterator_range_constructor_with_allocator[filter](
|
||||
InputIterator first, InputIterator last,
|
||||
size_type m, const allocator_type& al);
|
||||
template<typename InputIterator>
|
||||
xref:#filter_iterator_range_constructor_with_allocator[filter](
|
||||
InputIterator first, InputIterator last,
|
||||
size_type n, double fpr, const allocator_type& al);
|
||||
explicit xref:#filter_allocator_constructor[filter](const allocator_type& al);
|
||||
xref:#filter_copy_constructor_with_allocator[filter](const filter& x, const allocator_type& al);
|
||||
xref:#filter_move_constructor_with_allocator[filter](filter&& x, const allocator_type& al);
|
||||
xref:#filter_initializer_list_constructor[filter](
|
||||
std::initializer_list<value_type> il,
|
||||
size_type m, const hasher& h = hasher(),
|
||||
const allocator_type& al = allocator_type());
|
||||
xref:#filter_initializer_list_constructor[filter](
|
||||
std::initializer_list<value_type> il,
|
||||
size_type n, double fpr, const hasher& h = hasher(),
|
||||
const allocator_type& al = allocator_type());
|
||||
xref:#filter_capacity_constructor_with_allocator[filter](size_type m, const allocator_type& al);
|
||||
xref:#filter_capacity_constructor_with_allocator[filter](size_type n, double fpr, const allocator_type& al);
|
||||
xref:#filter_initializer_list_constructor_with_allocator[filter](
|
||||
std::initializer_list<value_type> il,
|
||||
size_type m, const allocator_type& al);
|
||||
xref:#filter_initializer_list_constructor_with_allocator[filter](
|
||||
std::initializer_list<value_type> il,
|
||||
size_type n, double fpr, const allocator_type& al);
|
||||
xref:#filter_destructor[~filter]();
|
||||
filter& xref:#filter_copy_assignment[operator+++=+++](const filter& x);
|
||||
filter& xref:#filter_move_assignment[operator+++=+++](filter&& x)
|
||||
noexcept(
|
||||
std::allocator_traits<Allocator>::is_always_equal::value ||
|
||||
std::allocator_traits<Allocator>::propagate_on_container_move_assignment::value);
|
||||
filter& xref:#filter_initializer_list_assignment[operator+++=+++](std::initializer_list<value_type> il);
|
||||
allocator_type xref:#filter_get_allocator[get_allocator]() const noexcept;
|
||||
|
||||
// capacity
|
||||
size_type xref:#filter_capacity_2[capacity]() const noexcept;
|
||||
static size_type xref:#filter_capacity_estimation[capacity_for](size_type n, double fpr);
|
||||
static double xref:#filter_fpr_estimation[fpr_for](size_type n,size_type m)
|
||||
|
||||
// modifiers
|
||||
template<typename... Args>
|
||||
void xref:#filter_emplace[emplace](Args&&... args);
|
||||
void xref:#filter_insert[insert](const value_type& x);
|
||||
template<typename U>
|
||||
void xref:#filter_insert[insert](const U& x);
|
||||
template<typename InputIterator>
|
||||
void xref:#filter_insert_iterator_range[insert](InputIterator first, InputIterator last);
|
||||
void xref:#filter_insert_initializer_list[insert](std::initializer_list<value_type> il);
|
||||
|
||||
void xref:#filter_swap[swap](filter& x)
|
||||
noexcept(std::allocator_traits<Allocator>::is_always_equal::value ||
|
||||
std::allocator_traits<Allocator>::propagate_on_container_swap::value);
|
||||
void xref:#filter_clear[clear]() noexcept;
|
||||
void xref:#filter_reset[reset](size_type m = 0);
|
||||
void xref:#filter_reset[reset](size_type n, double fpr);
|
||||
|
||||
filter& xref:#filter_combine_with_and[operator&=](const filter& x);
|
||||
filter& xref:#filter_combine_with_or[operator|=](const filter& x);
|
||||
|
||||
// observers
|
||||
hasher xref:#filter_hash_function[hash_function]() const;
|
||||
|
||||
// lookup
|
||||
bool xref:#filter_may_contain[may_contain](const value_type& x) const;
|
||||
template<typename U>
|
||||
bool xref:#filter_may_contain[may_contain](const U& x) const;
|
||||
};
|
||||
|
||||
} // namespace bloom
|
||||
} // namespace boost
|
||||
-----
|
||||
|
||||
=== Description
|
||||
|
||||
*Template Parameters*
|
||||
|
||||
[cols="1,4"]
|
||||
|===
|
||||
|
||||
|`T`
|
||||
|The cv-unqualified object type of the elements inserted into the filter.
|
||||
|
||||
|`K`
|
||||
| Number of times the associated subfilter is invoked per element upon insertion or lookup.
|
||||
`K` must be greater than zero.
|
||||
|
||||
|`Subfilter`
|
||||
| A xref:subfilter[subfilter] type providing the exact algorithm for
|
||||
bit setting/checking into the filter's internal array. The subfilter is invoked `K` times
|
||||
per operation on `K` pseudorandomly selected portions of the array (_subarrays_) of width
|
||||
`xref:subfilters_used_value_size[_used-value-size_]<Subfilter>`.
|
||||
|
||||
|`BucketSize`
|
||||
| Distance in bytes between the initial positions of consecutive subarrays.
|
||||
If `BucketSize` is specified as zero, the actual distance is automatically selected to
|
||||
`_used-value-size_<Subfilter>` (non-overlapping subarrays).
|
||||
Otherwise, `BucketSize` must be not greater than `_used-value-size_<Subfilter>`.
|
||||
|
||||
|`Hash`
|
||||
|A https://en.cppreference.com/w/cpp/named_req/Hash[Hash^] type over `T`.
|
||||
|
||||
|`Allocator`
|
||||
|An https://en.cppreference.com/w/cpp/named_req/Allocator[Allocator^] whose value type is `T`.
|
||||
|
||||
|===
|
||||
|
||||
Allocation and deallocation of the internal array is done through an internal copy of the
|
||||
provided allocator. `value_type` construction/destruction (which only happens in
|
||||
`xref:filter_emplace[emplace]`) uses
|
||||
`std::allocator_traits<Allocator>::construct`/`destroy`.
|
||||
|
||||
If `link:../../../unordered/doc/html/unordered/reference/hash_traits.html#hash_traits_hash_is_avalanching[boost::unordered::hash_is_avalanching]<Hash>::value`
|
||||
is `true` and `sizeof(std::size_t) >= 8`,
|
||||
the hash function is used as-is; otherwise, a bit-mixing post-processing stage
|
||||
is added to increase the quality of hashing at the expense of extra computational cost.
|
||||
|
||||
=== Types and Constants
|
||||
|
||||
[[filter_bucket_size]]
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
static constexpr std::size_t bucket_size;
|
||||
----
|
||||
|
||||
Equal to `BucketSize` if that parameter was specified as distinct from zero.
|
||||
Otherwise, equal to `xref:subfilters_used_value_size[_used-value-size_]<subfilter>`.
|
||||
|
||||
=== Constructors
|
||||
|
||||
==== Default Constructor
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
filter();
|
||||
----
|
||||
|
||||
Constructs an empty filter using `hasher()` as the hash function and
|
||||
`allocator_type()` as the allocator.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; `hasher`, and `allocator_type` must be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
|
||||
Postconditions:;; `capacity() == 0`.
|
||||
|
||||
==== Capacity Constructor
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
explicit filter(
|
||||
size_type m, const hasher& h = hasher(),
|
||||
const allocator_type& al = allocator_type());
|
||||
filter(
|
||||
size_type n, double fpr, const hasher& h = hasher(),
|
||||
const allocator_type& al = allocator_type());
|
||||
----
|
||||
|
||||
Constructs an empty filter using copies of `h` and `al` as the hash function and allocator, respectively.
|
||||
|
||||
[horizontal]
|
||||
Postconditions:;; `capacity() == 0` if `m == 0`, `capacity() >= m` otherwise (first overload). +
|
||||
`capacity() == capacity_for(n, fpr)` (second overload).
|
||||
|
||||
==== Iterator Range Constructor
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
template<typename InputIterator>
|
||||
filter(
|
||||
InputIterator first, InputIterator last,
|
||||
size_type m, const hasher& h = hasher(),
|
||||
const allocator_type& al = allocator_type());
|
||||
template<typename InputIterator>
|
||||
filter(
|
||||
InputIterator first, InputIterator last,
|
||||
size_type n, double fpr, const hasher& h = hasher(),
|
||||
const allocator_type& al = allocator_type());
|
||||
----
|
||||
|
||||
Constructs a filter using copies of `h` and `al` as the hash function and allocator, respectively,
|
||||
and inserts the values from `[first, last)` into it.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; `InputIterator` is a https://en.cppreference.com/w/cpp/named_req/InputIterator[LegacyInputIterator^] referring to `value_type`. +
|
||||
`[first, last)` is a valid range.
|
||||
Postconditions:;; `capacity() == 0` if `m == 0`, `capacity() >= m` otherwise (first overload). +
|
||||
`capacity() == capacity_for(n, fpr)` (second overload). +
|
||||
`may_contain(x)` for all values `x` from `[first, last)`.
|
||||
|
||||
==== Copy Constructor
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
filter(const filter& x);
|
||||
----
|
||||
|
||||
Constructs a filter using copies of `x`++'++s internal array, `x.hash_function()`
|
||||
and `std::allocator_traits<Allocator>::select_on_container_copy_construction(x.get_allocator())`.
|
||||
|
||||
[horizontal]
|
||||
Postconditions:;; `*this == x`.
|
||||
|
||||
==== Move Constructor
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
filter(filter&& x);
|
||||
----
|
||||
|
||||
Constructs a filter tranferring `x`++'++s internal array to `*this` and using
|
||||
a hash function and allocator move-constructed from `x`++'++s hash function
|
||||
and allocator, respectively.
|
||||
|
||||
[horizontal]
|
||||
Postconditions:;; `x.capacity() == 0`.
|
||||
|
||||
==== Iterator Range Constructor with Allocator
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
template<typename InputIterator>
|
||||
filter(
|
||||
InputIterator first, InputIterator last,
|
||||
size_type m, const allocator_type& al);
|
||||
template<typename InputIterator>
|
||||
filter(
|
||||
InputIterator first, InputIterator last,
|
||||
size_type n, double fpr, const allocator_type& al);
|
||||
----
|
||||
|
||||
Equivalent to `xref:#filter_iterator_range_constructor[filter](first, last, m, hasher(), al)` (first overload)
|
||||
or `xref:#filter_iterator_range_constructor[filter](first, last, n, fpr, hasher(), al)` (second overload).
|
||||
|
||||
==== Allocator Constructor
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
explicit filter(const allocator_type& al);
|
||||
----
|
||||
|
||||
Constructs an empty filter using `hasher()` as the hash function and
|
||||
a copy of `al` as the allocator.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; `hasher` must be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
|
||||
Postconditions:;; `capacity() == 0`.
|
||||
|
||||
==== Copy Constructor with Allocator
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
filter(const filter& x, const allocator_type& al);
|
||||
----
|
||||
|
||||
Constructs a filter using copies of `x`++'++s internal array, `x.hash_function()`
|
||||
and `al`.
|
||||
|
||||
[horizontal]
|
||||
Postconditions:;; `*this == x`.
|
||||
|
||||
==== Move Constructor with Allocator
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
filter(filter&& x, const allocator_type& al);
|
||||
----
|
||||
|
||||
Constructs a filter tranferring `x`++'++s internal array to `*this` if
|
||||
`al == x.get_allocator()`, or using a copy of the array otherwise.
|
||||
The hash function of the new filter is move-constructed from `x`++'++s
|
||||
hash function and the allocator is a copy of `al`.
|
||||
|
||||
[horizontal]
|
||||
Postconditions:;; `x.capacity() == 0`.
|
||||
|
||||
==== Initializer List Constructor
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
filter(
|
||||
std::initializer_list<value_type> il,
|
||||
size_type m, const hasher& h = hasher(),
|
||||
const allocator_type& al = allocator_type());
|
||||
filter(
|
||||
std::initializer_list<value_type> il,
|
||||
size_type n, double fpr, const hasher& h = hasher(),
|
||||
const allocator_type& al = allocator_type());
|
||||
----
|
||||
|
||||
Equivalent to `xref:#filter_iterator_range_constructor[filter](il.begin(), il.end(), m, h, al)` (first overload)
|
||||
or `xref:#filter_iterator_range_constructor[filter](il.begin(), il.end(), n, fpr, h, al)` (second overload).
|
||||
|
||||
|
||||
==== Capacity Constructor with Allocator
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
filter(size_type m, const allocator_type& al);
|
||||
filter(size_type n, double fpr, const allocator_type& al);
|
||||
----
|
||||
|
||||
Equivalent to `xref:#filter_capacity_constructor[filter](m, hasher(), al)` (first overload)
|
||||
or `xref:#filter_capacity_constructor[filter](n, fpr, hasher(), al)` (second overload).
|
||||
|
||||
|
||||
==== Initializer List Constructor with Allocator
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
filter(
|
||||
std::initializer_list<value_type> il,
|
||||
size_type m, const allocator_type& al);
|
||||
filter(
|
||||
std::initializer_list<value_type> il,
|
||||
size_type n, double fpr, const allocator_type& al);
|
||||
----
|
||||
|
||||
Equivalent to `xref:#filter_initializer_list_constructor[filter](il, m, hasher(), al)` (first overload)
|
||||
or `xref:#filter_initializer_list_constructor[filter](il, n, fpr, hasher(), al)` (second overload).
|
||||
|
||||
=== Destructor
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
~filter();
|
||||
----
|
||||
|
||||
Deallocates the internal array and destructs the internal hash function and allocator.
|
||||
|
||||
=== Assignment
|
||||
|
||||
==== Copy Assignment
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
filter& operator=(const filter& x);
|
||||
----
|
||||
|
||||
Let `pocca` be `std::allocator_traits<Allocator>::propagate_on_container_copy_assignment::value`.
|
||||
If `pocca`, replaces the internal allocator `al` with a copy of `x.get_allocator()`.
|
||||
If `capacity() != x.capacity()` or `pocca && al != x.get_allocator()`, replaces the internal array
|
||||
with a new one with capacity `x.capacity()`.
|
||||
Copies the values of `x`++'++s internal array.
|
||||
Replaces the internal hash function with a copy of `x.hash_function()`.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; If `pocca`,
|
||||
`Allocator` is nothrow https://en.cppreference.com/w/cpp/named_req/CopyAssignable[CopyAssignable^]. +
|
||||
`hasher` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^].
|
||||
Postconditions:;; `*this == x`.
|
||||
Returns:;; `*this`.
|
||||
|
||||
==== Move Assignment
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
filter& operator=(filter&& x)
|
||||
noexcept(
|
||||
std::allocator_traits<Allocator>::is_always_equal::value ||
|
||||
std::allocator_traits<Allocator>::propagate_on_container_move_assignment::value);
|
||||
----
|
||||
|
||||
Let `pocma` be `std::allocator_traits<Allocator>::propagate_on_container_move_assignment::value`.
|
||||
If `pocma`, replaces the internal allocator with a copy of `x.get_allocator()`.
|
||||
If `get_allocator() == x.get_allocator()`, transfers `x`++'++s internal array to `*this`;
|
||||
otherwise, replaces the internal array with a new one with capacity `x.capacity()`
|
||||
and copies the values of `x`++'++s internal array.
|
||||
Replaces the internal hash function with a copy of `x.hash_function()`.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; If `pocma`,
|
||||
`Allocator` is nothrow https://en.cppreference.com/w/cpp/named_req/CopyAssignable[CopyAssignable^]. +
|
||||
`hasher` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^].
|
||||
Postconditions:;; `x.capacity() == 0`.
|
||||
Returns:;; `*this`.
|
||||
|
||||
==== Initializer List Assignment
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
filter& operator=(std::initializer_list<value_type> il);
|
||||
----
|
||||
|
||||
Clears the filter and inserts the values from `il`.
|
||||
|
||||
[horizontal]
|
||||
Returns:;; `*this`.
|
||||
|
||||
=== Capacity
|
||||
|
||||
==== Capacity
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
size_type capacity() const noexcept;
|
||||
----
|
||||
|
||||
[horizontal]
|
||||
Returns:;; The size in bits of the internal array.
|
||||
|
||||
==== Capacity Estimation
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
static size_type capacity_for(size_type n, double fpr);
|
||||
----
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; `fpr` is between 0.0 and 1.0.
|
||||
Postconditions:;; `filter(capacity_for(n, fpr)).capacity() == capacity_for(n, fpr)`. +
|
||||
`capacity_for(n, 1.0) == 0`.
|
||||
Returns:;; An estimation of the capacity required by a `filter` to attain a false positive rate
|
||||
equal to `fpr` when `n` distinct elements have been inserted.
|
||||
|
||||
==== FPR Estimation
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
static double fpr_for(size_type n, size_type m);
|
||||
----
|
||||
|
||||
[horizontal]
|
||||
Postconditions:;; `fpr_for(n, m)` is between 0.0 and 1.0. +
|
||||
`fpr_for(n, 0) == 1.0`. +
|
||||
`fpr_for(0, m) == 0.0` (if `m != 0`). +
|
||||
Returns:;; An estimation of the resulting false positive rate when
|
||||
`n` distinct elements have been inserted into a `filter`
|
||||
with capacity `m`.
|
||||
|
||||
=== Modifiers
|
||||
|
||||
==== Emplace
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
template<typename... Args> void emplace(Args&&... args);
|
||||
----
|
||||
|
||||
Inserts an element constructed from `std::forward<Args>(args)+++...+++`.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/EmplaceConstructible[EmplaceConstructible^]
|
||||
into `filter` from `std::forward<Args>(args)+++...+++`. +
|
||||
`value_type` is https://en.cppreference.com/w/cpp/named_req/Erasable[Erasable^] from `filter`.
|
||||
|
||||
==== Insert
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
void insert(const value_type& x);
|
||||
template<typename U> void insert(const U& x);
|
||||
----
|
||||
|
||||
If `capacity() != 0`, sets to one `k * subfilter::k` (not necessarily distinct)
|
||||
bits of the internal array deterministically selected from the value
|
||||
`hash_function()(x)`.
|
||||
|
||||
[horizontal]
|
||||
Postconditions:;; `may_contain(x)`.
|
||||
Notes:;; The second overload only participates in overload resolution if
|
||||
`hasher::is_transparent` is a valid member typedef.
|
||||
|
||||
==== Insert Iterator Range
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
template<typename InputIterator>
|
||||
void insert(InputIterator first, InputIterator last);
|
||||
----
|
||||
|
||||
Equivalent to `while(first != last) xref:#filter_insert[insert](*first++)`.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; `InputIterator` is a https://en.cppreference.com/w/cpp/named_req/InputIterator[LegacyInputIterator^] referring to `value_type`. +
|
||||
`[first, last)` is a valid range.
|
||||
|
||||
==== Insert Initializer List
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
void insert(std::initializer_list<value_type> il);
|
||||
----
|
||||
|
||||
Equivalent to `xref:#filter_insert_iterator_range[insert](il.begin(), il.end())`.
|
||||
|
||||
==== Swap
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
void swap(filter& x)
|
||||
noexcept(std::allocator_traits<Allocator>::is_always_equal::value ||
|
||||
std::allocator_traits<Allocator>::propagate_on_container_swap::value);
|
||||
----
|
||||
|
||||
Let `pocs` be `std::allocator_traits<Allocator>::propagate_on_container_swap::value`.
|
||||
Swaps the internal array and hash function with those of `x`.
|
||||
If `pocs`, swaps the internal allocator with that of `x`.
|
||||
|
||||
[horizontal]
|
||||
Preconditions:;; `pocs || get_allocator() == x.get_allocator()`. +
|
||||
If `pocs`, `Allocator` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^]. +
|
||||
`hasher` is nothrow https://en.cppreference.com/w/cpp/named_req/Swappable[Swappable^].
|
||||
|
||||
|
||||
==== Clear
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
void clear() noexcept;
|
||||
----
|
||||
|
||||
Sets to zero all the bits in the internal array.
|
||||
|
||||
==== Reset
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
void reset(size_type m = 0);
|
||||
void reset(size_type n, double fpr);
|
||||
----
|
||||
|
||||
First overload: Replaces the internal array if the resulting capacity calculated from `m` is not
|
||||
equal to `capacity()`, and clears the filter. +
|
||||
Second overload: Equivalent to `reset(capacity_for(n, fpr))`.
|
||||
|
||||
[horizontal]
|
||||
Postconditions:;; In general, `capacity() >= m`. +
|
||||
If `m == 0` or `m == capacity()` or `m == capacity_for(n, fpr)` for some `n` and `fpr`, then `capacity() == m`.
|
||||
|
||||
==== Combine with AND
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
filter& operator&=(const filter& x);
|
||||
----
|
||||
|
||||
If `capacity() != x.capacity()`, throws a `std::invalid_argument` exception;
|
||||
otherwise, changes the value of each bit in the internal array with the result of
|
||||
doing a logical AND operation of that bit and the corresponding one in `x`.
|
||||
|
||||
[horizontal]
|
||||
Returns:;; `*this`;
|
||||
|
||||
==== Combine with OR
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
filter& operator|=(const filter& x);
|
||||
----
|
||||
|
||||
If `capacity() != x.capacity()`, throws an `std::invalid_argument` exception;
|
||||
otherwise, changes the value of each bit in the internal array with the result of
|
||||
doing a logical OR operation of that bit and the corresponding one in `x`.
|
||||
|
||||
[horizontal]
|
||||
Returns:;; `*this`;
|
||||
|
||||
=== Observers
|
||||
|
||||
==== get_allocator
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
allocator_type get_allocator() const noexcept;
|
||||
----
|
||||
|
||||
[horizontal]
|
||||
Returns:;; A copy of the internal allocator.
|
||||
|
||||
==== hash_function
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
hasher hash_function() const;
|
||||
----
|
||||
|
||||
[horizontal]
|
||||
Returns:;; A copy of the internal hash function.
|
||||
|
||||
=== Lookup
|
||||
|
||||
==== may_contain
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
bool may_contain(const value_type& x) const;
|
||||
template<typename U> bool may_contain(const U& x) const;
|
||||
----
|
||||
|
||||
[horizontal]
|
||||
Returns:;; `true` iff all the bits selected by a hypothetical
|
||||
`xref:filter_insert[insert](x)` operation are set to one.
|
||||
Notes:;; The second overload only participates in overload resolution if
|
||||
`hasher::is_transparent` is a valid member typedef.
|
||||
|
||||
=== Comparison
|
||||
|
||||
==== operator==
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
template<
|
||||
typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
|
||||
>
|
||||
bool operator==(
|
||||
const filter<T, K, S, B, H, A>& x, const filter<T, K, S, B, H, A>& y);
|
||||
----
|
||||
|
||||
[horizontal]
|
||||
Returns:;; `true` iff `x.capacity() == y.capacity()` and
|
||||
`x`++'++s and `y`++'++s internal arrays are bitwise identical.
|
||||
|
||||
==== operator!=
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
template<
|
||||
typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
|
||||
>
|
||||
bool operator!=(
|
||||
const filter<T, K, S, B, H, A>& x, const filter<T, K, S, B, H, A>& y);
|
||||
----
|
||||
|
||||
[horizontal]
|
||||
Returns:;; `!(x xref:filter_operator[==] y)`.
|
||||
|
||||
|
||||
=== Swap
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
----
|
||||
template<
|
||||
typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
|
||||
>
|
||||
void swap(filter<T, K, S, B, H, A>& x, filter<T, K, S, B, H, A>& y)
|
||||
noexcept(noexcept(x.swap(y)));
|
||||
----
|
||||
|
||||
Equivalent to `x.xref:filter_swap[swap](y)`.
|
||||
17
doc/bloom/reference/header_block.adoc
Normal file
@@ -0,0 +1,17 @@
|
||||
[#header_block]
|
||||
== `<boost/bloom/block.hpp>`
|
||||
|
||||
:idprefix: header_block_
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<typename Block, std::size_t K>
|
||||
struct xref:block[block];
|
||||
|
||||
} // namespace bloom
|
||||
} // namespace boost
|
||||
-----
|
||||
|
||||
17
doc/bloom/reference/header_fast_multiblock32.adoc
Normal file
@@ -0,0 +1,17 @@
|
||||
[#header_fast_multiblock32]
|
||||
== `<boost/bloom/fast_multiblock32.hpp>`
|
||||
|
||||
:idprefix: header_fast_multiblock32_
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<std::size_t K>
|
||||
struct xref:fast_multiblock32[fast_multiblock32];
|
||||
|
||||
} // namespace bloom
|
||||
} // namespace boost
|
||||
-----
|
||||
|
||||
17
doc/bloom/reference/header_fast_multiblock64.adoc
Normal file
@@ -0,0 +1,17 @@
|
||||
[#header_fast_multiblock64]
|
||||
== `<boost/bloom/fast_multiblock64.hpp>`
|
||||
|
||||
:idprefix: header_fast_multiblock64_
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<std::size_t K>
|
||||
struct xref:fast_multiblock64[fast_multiblock64];
|
||||
|
||||
} // namespace bloom
|
||||
} // namespace boost
|
||||
-----
|
||||
|
||||
42
doc/bloom/reference/header_filter.adoc
Normal file
@@ -0,0 +1,42 @@
|
||||
[#header_filter]
|
||||
== `<boost/bloom/filter.hpp>`
|
||||
|
||||
:idprefix: header_filter_
|
||||
|
||||
Defines `xref:filter[boost::bloom::filter]`
|
||||
and associated functions.
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<
|
||||
typename T, std::size_t K,
|
||||
typename Subfilter = block<unsigned char, 1>, std::size_t BucketSize = 0,
|
||||
typename Hash = boost::hash<T>, typename Allocator = std::allocator<T>
|
||||
>
|
||||
class xref:filter[filter];
|
||||
|
||||
template<
|
||||
typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
|
||||
>
|
||||
bool xref:filter_operator[operator+++==+++](
|
||||
const filter<T, K, S, B, H, A>& x, const filter<T, K, S, B, H, A>& y);
|
||||
|
||||
template<
|
||||
typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
|
||||
>
|
||||
bool xref:filter_operator_2[operator!=](
|
||||
const filter<T, K, S, B, H, A>& x, const filter<T, K, S, B, H, A>& y);
|
||||
|
||||
template<
|
||||
typename T, std::size_t K, typename S, std::size_t B, typename H, typename A
|
||||
>
|
||||
void xref:filter_swap_2[swap](filter<T, K, S, B, H, A>& x, filter<T, K, S, B, H, A>& y)
|
||||
noexcept(noexcept(x.swap(y)));
|
||||
|
||||
} // namespace bloom
|
||||
} // namespace boost
|
||||
-----
|
||||
|
||||
17
doc/bloom/reference/header_multiblock.adoc
Normal file
@@ -0,0 +1,17 @@
|
||||
[#header_multiblock]
|
||||
== `<boost/bloom/multiblock.hpp>`
|
||||
|
||||
:idprefix: header_multiblock_
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<typename Block, std::size_t K>
|
||||
struct xref:multiblock[multiblock];
|
||||
|
||||
} // namespace bloom
|
||||
} // namespace boost
|
||||
-----
|
||||
|
||||
45
doc/bloom/reference/multiblock.adoc
Normal file
@@ -0,0 +1,45 @@
|
||||
[#multiblock]
|
||||
== Class Template `multiblock`
|
||||
|
||||
:idprefix: multiblock_
|
||||
|
||||
`boost::bloom::multiblock` -- A xref:subfilter[subfilter] over an array of an integral type.
|
||||
|
||||
=== Synopsis
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
// #include <boost/bloom/multiblock.hpp>
|
||||
|
||||
namespace boost{
|
||||
namespace bloom{
|
||||
|
||||
template<typename Block, std::size_t K>
|
||||
struct multiblock
|
||||
{
|
||||
static constexpr std::size_t k = K;
|
||||
using value_type = Block[k];
|
||||
|
||||
// the rest of the interface is not public
|
||||
|
||||
} // namespace bloom
|
||||
} // namespace boost
|
||||
-----
|
||||
|
||||
=== Description
|
||||
|
||||
*Template Parameters*
|
||||
|
||||
[cols="1,4"]
|
||||
|===
|
||||
|
||||
|`Block`
|
||||
|An unsigned integral type.
|
||||
|
||||
|`K`
|
||||
| Number of bits set/checked per operation. Must be greater than zero.
|
||||
|
||||
|===
|
||||
|
||||
Each of the `K` bits set/checked is located in a different element of the
|
||||
`Block[K]` array.
|
||||
57
doc/bloom/reference/subfilters.adoc
Normal file
@@ -0,0 +1,57 @@
|
||||
[#subfilter]
|
||||
== Subfilters
|
||||
|
||||
:idprefix: subfilters_
|
||||
|
||||
A _subfilter_ implements a specific algorithm for bit setting (insertion) and
|
||||
bit checking (lookup) for `boost::bloom::filter`. Subfilters operate
|
||||
on portions of the filter's internal array called _subarrays_. The
|
||||
exact width of these subarrays is statically dependent on the subfilter type.
|
||||
|
||||
The full interface of a conforming subfilter is not exposed publicly, hence
|
||||
users can't provide their own subfilters and may only use those natively
|
||||
provided by the library. What follows is the publicly available interface.
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
Subfilter::k
|
||||
-----
|
||||
|
||||
[horizontal]
|
||||
Result:;; A compile-time `std::size_t` value indicating
|
||||
the number of (not necessarily distinct) bits set/checked per operation.
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
typename Subfilter::value_type
|
||||
-----
|
||||
|
||||
[horizontal]
|
||||
Result:;; A cv-unqualified,
|
||||
https://en.cppreference.com/w/cpp/named_req/TriviallyCopyable[TriviallyCopyable^]
|
||||
type to which the subfilter projects assigned subarrays.
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
Subfilter::used_value_size
|
||||
-----
|
||||
|
||||
[horizontal]
|
||||
Result:;; A compile-time `std::size_t` value indicating
|
||||
the size of the effective portion of `Subfilter::value_type` used
|
||||
for bit setting/checking (assumed to begin at the lowest address in memory).
|
||||
Postconditions:;; Greater than zero and not greater than `sizeof(Subfilter::value_type)`.
|
||||
Notes:;; Optional.
|
||||
|
||||
=== _used-value-size_
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
template<typename Subfilter>
|
||||
constexpr std::size_t _used-value-size_; // exposition only
|
||||
-----
|
||||
|
||||
`_used-value-size_<Subfilter>` is `Subfilter::used_value_size` if this nested
|
||||
constant exists, or `sizeof(Subfilter::value_type)` otherwise.
|
||||
The value is the effective size in bytes of the subarrays upon which a
|
||||
given subfilter operates.
|
||||
9
doc/bloom/release_notes.adoc
Normal file
@@ -0,0 +1,9 @@
|
||||
[#release_notes]
|
||||
= Release Notes
|
||||
|
||||
:idprefix: release_notes_
|
||||
|
||||
== Boost 1.xx
|
||||
|
||||
* Initial release.
|
||||
|
||||
204
doc/bloom/tutorial.adoc
Normal file
@@ -0,0 +1,204 @@
|
||||
[#tutorial]
|
||||
= Tutorial
|
||||
|
||||
:idprefix: tutorial_
|
||||
|
||||
== Filter Definition
|
||||
|
||||
A `boost::bloom::filter` can be regarded as a bit array divided into _buckets_ that
|
||||
are selected pseudo-randomly (based on a hash function) upon insertion:
|
||||
each of the buckets is passed to a _subfilter_ that marks several of its bits according
|
||||
to some associated strategy.
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
template<
|
||||
typename T, std::size_t K,
|
||||
typename Subfilter = block<unsigned char, 1>, std::size_t BucketSize = 0,
|
||||
typename Hash = boost::hash<T>, typename Allocator = std::allocator<T>
|
||||
>
|
||||
class filter;
|
||||
-----
|
||||
|
||||
* `T`: Type of the elements inserted.
|
||||
* `K`: Number of buckets marked per insertion.
|
||||
* `xref:tutorial_subfilter[Subfilter]`: Type of subfilter used.
|
||||
* `xref:tutorial_bucketsize[BucketSize`]: Size in bytes of the buckets.
|
||||
* `xref:tutorial_hash[Hash]`: A hash function for `T`.
|
||||
* `Allocator`: An allocator for `T`.
|
||||
|
||||
=== `Subfilter`
|
||||
|
||||
The following subfilters can be selected, offering different compromises
|
||||
between performance and _false positive rate_ (FPR).
|
||||
See the xref:primer_variations_on_the_classical_filter[Bloom Filter Primer]
|
||||
for a general explanation of block and multiblock filters.
|
||||
|
||||
`block<Block, K'>`
|
||||
|
||||
[.indent]
|
||||
Sets `K'` bits in an underlying value of the unsigned integral type `Block`
|
||||
(e.g. `unsigned char`, `uint32_t`, `uint64_t`). So,
|
||||
a `filter<T, K, block<Block, K'>>` will set `K * K'` bits per element.
|
||||
The tradeoff here is that insertion/lookup will be (much) faster than
|
||||
with `filter<T, K * K'>` while the FPR will be worse (larger).
|
||||
FPR is better the wider `Block` is.
|
||||
|
||||
`multiblock<Block, K'>`
|
||||
|
||||
[.indent]
|
||||
Instead of setting `K'` bits in a `Block` value, this subfilter sets
|
||||
one bit on each of the elements of a `Block[K']` subarray. This improves FPR
|
||||
but impacts performance with respect to `block<Block, K'>`, among other
|
||||
things because cacheline boundaries can be crossed when accessing the subarray.
|
||||
|
||||
`fast_multiblock32<K'>`
|
||||
|
||||
[.indent]
|
||||
Statistically equivalent to `multiblock<uint32_t, K'>`, but uses
|
||||
faster SIMD-based algorithms when SSE2, AVX2 or Neon are available.
|
||||
|
||||
`fast_multiblock64<K'>`
|
||||
|
||||
[.indent]
|
||||
Statistically equivalent to `multiblock<uint64_t, K'>`, but uses a
|
||||
faster SIMD-based algorithm when AVX2 is available.
|
||||
|
||||
The default configuration with `block<unsigned char,1>` corresponds to a
|
||||
xref:primer[classical Bloom filter] setting `K` bits per element uniformly
|
||||
distributed across the array.
|
||||
|
||||
=== `BucketSize`
|
||||
|
||||
When the default value 0 is used, buckets have the same size as
|
||||
the _subarrays_ subfilters operate on (non-overlapping case).
|
||||
Otherwise, bucket size is smaller and subarrays spill over adjacent buckets,
|
||||
which results in an improved (lower) FPR in exchange for a possibly
|
||||
worse performance due to memory unalignment.
|
||||
|
||||
=== `Hash`
|
||||
|
||||
By default, link:../../../container_hash/index.html[Boost.ContainerHash] is used.
|
||||
Consult this library's link:../../../container_hash/doc/html/hash.html#user[dedicated section]
|
||||
if you need to extend `boost::hash` for your own types.
|
||||
|
||||
When the provided hash function is of sufficient quality, it is used
|
||||
as is; otherwise, a bit-mixing post-process is applied to hash values that improves
|
||||
their statistical properties so that the resulting FPR approaches its
|
||||
theoretical limit. The hash function is determined to be of high quality
|
||||
(more precisely, to have the so-called _avalanching_ property) via the
|
||||
`link:../../../unordered/doc/html/unordered/reference/hash_traits.html#hash_traits_hash_is_avalanching[boost::unordered::hash_is_avalanching]`
|
||||
trait.
|
||||
|
||||
== Capacity
|
||||
|
||||
The size of the filter's internal array is specified at construction time:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
using filter = boost::bloom::filter<std::string, ...>;
|
||||
filter f(1'000'000); // array of 1'000'000 **bits**
|
||||
std::cout << f.capacity(); // >= 1'000'000
|
||||
-----
|
||||
|
||||
Note that `boost::bloom::filter` default constructor specifies a capacity
|
||||
of zero, which in general won't be of much use -- the assigned array
|
||||
is null.
|
||||
|
||||
Instead of specifying the array's capacity directly, we can let the library
|
||||
figure it out based on the number of elements we plan to insert and the
|
||||
desired FPR:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
// we'll insert 100'000 elements and want a FPR ~ 1%
|
||||
filter f(100'000, 0.01);
|
||||
|
||||
// this is equivalent
|
||||
filter f2(filter::capacity_for(100'000, 0.01));
|
||||
-----
|
||||
|
||||
Once a filter is constructed, its array is fixed (for instance, it won't
|
||||
grow dynamically as elements are inserted). The only way to change it is
|
||||
by assignment/swapping from a different filter, or using `reset`:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
f.reset(2'000'000); // change to 2'000'000 bits **and clears the filter**
|
||||
f.reset(100'000, 0.005); // equivalent to reset(filter::capacity_for(100'000, 0.005));
|
||||
f.reset(); // null array (capacity == 0)
|
||||
-----
|
||||
|
||||
== Insertion and Lookup
|
||||
|
||||
Insertion is done in much the same way as with a traditional container:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
f.insert("hello");
|
||||
f.emplace(100, 'X'); // ~ insert(std::string(100, 'X'))
|
||||
f.insert(data.begin(), data.end());
|
||||
-----
|
||||
|
||||
Of course, in this context "insertion" does not involve any actual
|
||||
storage of elements into the filter, but rather the setting of bits in the
|
||||
internal array based on the hash values of those elements.
|
||||
Lookup goes as follows:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
bool b1 = f.may_contain("hello"); // b1 is true since we actually inserted "hello"
|
||||
bool b2 = f.may_contain("bye"); // b2 is most likely false
|
||||
-----
|
||||
|
||||
As its name suggests, `may_contain` can return `true` even if the
|
||||
element has not been previously inserted, that is, it may yield false
|
||||
positives -- this is the essence of probabilistic data structures.
|
||||
`fpr_for` provides an estimation of the false positive rate:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
// we have inserted 100 elements so far, what's our FPR?
|
||||
std::cout<< filter::fpr_for(100, f.capacity());
|
||||
-----
|
||||
|
||||
Note that in the example we provided the number 100 externally:
|
||||
`boost::bloom::filter` does not keep track of the number of elements
|
||||
that have been inserted -- in other words, it does not have a `size`
|
||||
operation.
|
||||
|
||||
Once inserted, there is no way to remove a specific element from the filter.
|
||||
We can only clear up the filter entirely:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
f.clear(); // sets all the bits in the array to zero
|
||||
-----
|
||||
|
||||
== Filter Combination
|
||||
|
||||
`boost::bloom::filter`+++s+++ can be combined by doing the OR logical operation
|
||||
of the bits of their arrays:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
filter f2=...;
|
||||
...
|
||||
f|=f2; // f and f2 must have exactly the same capacity
|
||||
-----
|
||||
|
||||
The result is equivalent to a filter "containing" both the elements
|
||||
of `f` and `f2`. AND combination, on the other hand, results in a filter
|
||||
holding the _intersection_ of the elements:
|
||||
|
||||
[listing,subs="+macros,+quotes"]
|
||||
-----
|
||||
filter f3=...;
|
||||
...
|
||||
f&=f3; // f and f3 must have exactly the same capacity
|
||||
-----
|
||||
|
||||
For AND combination, be aware that the resulting FPR will be in general
|
||||
worse (higher) than if the filter had been constructed from scratch
|
||||
by inserting only the commom elements -- don't trust `fpr_for` in this
|
||||
case.
|
||||
BIN
doc/img/block_insertion.png
Normal file
|
After Width: | Height: | Size: 4.6 KiB |
BIN
doc/img/block_multi_insertion.png
Normal file
|
After Width: | Height: | Size: 4.6 KiB |
BIN
doc/img/bloom_insertion.png
Normal file
|
After Width: | Height: | Size: 4.8 KiB |
BIN
doc/img/bloom_lookup.png
Normal file
|
After Width: | Height: | Size: 2.9 KiB |
BIN
doc/img/db_speedup.png
Normal file
|
After Width: | Height: | Size: 4.6 KiB |
BIN
doc/img/fpr_c.png
Normal file
|
After Width: | Height: | Size: 56 KiB |
BIN
doc/img/fpr_n_k.png
Normal file
|
After Width: | Height: | Size: 11 KiB |
BIN
doc/img/fpr_n_k_bk.png
Normal file
|
After Width: | Height: | Size: 12 KiB |
BIN
doc/img/multiblock_insertion.png
Normal file
|
After Width: | Height: | Size: 5.1 KiB |
@@ -32,6 +32,9 @@ struct block_base
|
||||
static constexpr std::size_t k=K;
|
||||
static constexpr std::size_t hash_width=sizeof(boost::uint64_t)*CHAR_BIT;
|
||||
static constexpr std::size_t block_width=sizeof(Block)*CHAR_BIT;
|
||||
static_assert(
|
||||
(block_width&(block_width-1))==0,
|
||||
"Block's size in bits must be a power of two");
|
||||
static constexpr std::size_t mask=block_width-1;
|
||||
static constexpr std::size_t shift=constexpr_bit_width(mask);
|
||||
static constexpr std::size_t rehash_k=(hash_width-shift)/shift;
|
||||
|
||||
@@ -60,9 +60,9 @@ namespace detail{
|
||||
#endif
|
||||
|
||||
/* mcg_and_fastrange produces (pos,hash') from hash, where
|
||||
* - x=mulx64(hash,range), mulx64 denotes extended multiplication
|
||||
* - pos=high(x)
|
||||
* - hash'=low(x)
|
||||
* - m=mulx64(hash,range), mulx64 denotes extended multiplication
|
||||
* - pos=high(m)
|
||||
* - hash'=low(m)
|
||||
* pos is uniformly distributed in [0,range) (see
|
||||
* https://arxiv.org/pdf/1805.10941), whereas hash'<-hash is a multiplicative
|
||||
* congruential generator of the form hash'<-hash*rng mod 2^64. This MCG
|
||||
@@ -100,20 +100,20 @@ struct mcg_and_fastrange
|
||||
boost::uint64_t rng;
|
||||
};
|
||||
|
||||
/* used_block_size<Subfilter>::value is Subfilter::used_value_size if it
|
||||
/* used_value_size<Subfilter>::value is Subfilter::used_value_size if it
|
||||
* exists, or sizeof(Subfilter::value_type) otherwise. This covers the
|
||||
* case where a subfilter only operates on the first bytes of its entire
|
||||
* value_type (e.g. fast_multiblock32<K> with K<8).
|
||||
*/
|
||||
|
||||
template<typename Subfilter,typename=void>
|
||||
struct used_block_size
|
||||
struct used_value_size
|
||||
{
|
||||
static constexpr std::size_t value=sizeof(typename Subfilter::value_type);
|
||||
};
|
||||
|
||||
template<typename Subfilter>
|
||||
struct used_block_size<
|
||||
struct used_value_size<
|
||||
Subfilter,
|
||||
typename std::enable_if<Subfilter::used_value_size!=0>::type
|
||||
>
|
||||
@@ -187,14 +187,14 @@ private:
|
||||
static constexpr std::size_t k_total=k*kp;
|
||||
using block_type=typename subfilter::value_type;
|
||||
static constexpr std::size_t block_size=sizeof(block_type);
|
||||
static constexpr std::size_t used_block_size=
|
||||
detail::used_block_size<subfilter>::value;
|
||||
static constexpr std::size_t used_value_size=
|
||||
detail::used_value_size<subfilter>::value;
|
||||
|
||||
public:
|
||||
static constexpr std::size_t bucket_size=
|
||||
BucketSize?BucketSize:used_block_size;
|
||||
BucketSize?BucketSize:used_value_size;
|
||||
static_assert(
|
||||
bucket_size<=used_block_size,"BucketSize can't exceed the block size");
|
||||
bucket_size<=used_value_size,"BucketSize can't exceed the block size");
|
||||
|
||||
private:
|
||||
static constexpr std::size_t tail_size=sizeof(block_type)-bucket_size;
|
||||
@@ -356,7 +356,7 @@ public:
|
||||
|
||||
static double fpr_for(std::size_t n,std::size_t m)
|
||||
{
|
||||
return n==0?0.0:m==0?1.0:fpr_for_c((double)m/n);
|
||||
return m==0?1.0:n==0?0.0:fpr_for_c((double)m/n);
|
||||
}
|
||||
|
||||
BOOST_FORCEINLINE void insert(boost::uint64_t hash)
|
||||
@@ -410,6 +410,11 @@ public:
|
||||
clear_bytes();
|
||||
}
|
||||
|
||||
void reset(std::size_t n,double fpr)
|
||||
{
|
||||
reset(capacity_for(n,fpr));
|
||||
}
|
||||
|
||||
filter_core& operator&=(const filter_core& x)
|
||||
{
|
||||
combine(x,[](unsigned char& a,unsigned char b){a&=b;});
|
||||
@@ -459,9 +464,9 @@ private:
|
||||
|
||||
static std::size_t requested_range(std::size_t m)
|
||||
{
|
||||
if(m>(used_block_size-bucket_size)*CHAR_BIT){
|
||||
if(m>(used_value_size-bucket_size)*CHAR_BIT){
|
||||
/* ensures filter_core{f.capacity()}.capacity()==f.capacity() */
|
||||
m-=(used_block_size-bucket_size)*CHAR_BIT;
|
||||
m-=(used_value_size-bucket_size)*CHAR_BIT;
|
||||
}
|
||||
return
|
||||
(std::numeric_limits<std::size_t>::max)()-m>=bucket_size*CHAR_BIT-1?
|
||||
@@ -530,7 +535,7 @@ private:
|
||||
|
||||
static std::size_t used_array_size(std::size_t rng)noexcept
|
||||
{
|
||||
return rng?rng*bucket_size+(used_block_size-bucket_size):0;
|
||||
return rng?rng*bucket_size+(used_value_size-bucket_size):0;
|
||||
}
|
||||
|
||||
static std::size_t unadjusted_capacity_for(std::size_t n,double fpr)
|
||||
@@ -539,7 +544,7 @@ private:
|
||||
using double_limits=std::numeric_limits<double>;
|
||||
|
||||
BOOST_ASSERT(fpr>=0.0&&fpr<=1.0);
|
||||
if(n==0)return 0;
|
||||
if(n==0)return fpr==1.0?0:1;
|
||||
|
||||
constexpr double eps=1.0/(double)(size_t_limits::max)();
|
||||
constexpr double max_size_t_as_double=
|
||||
@@ -593,7 +598,7 @@ private:
|
||||
|
||||
static double fpr_for_c(double c)
|
||||
{
|
||||
constexpr std::size_t w=(2*used_block_size-bucket_size)*CHAR_BIT;
|
||||
constexpr std::size_t w=(2*used_value_size-bucket_size)*CHAR_BIT;
|
||||
const double lambda=w*k/c;
|
||||
const double loglambda=std::log(lambda);
|
||||
double res=0.0;
|
||||
|
||||
@@ -146,7 +146,7 @@ public:
|
||||
const allocator_type& al=allocator_type()):
|
||||
super{m,al},hash_base{empty_init,h}{}
|
||||
|
||||
explicit filter(
|
||||
filter(
|
||||
std::size_t n,double fpr,const hasher& h=hasher(),
|
||||
const allocator_type& al=allocator_type()):
|
||||
super{n,fpr,al},hash_base{empty_init,h}{}
|
||||
|
||||
@@ -85,6 +85,14 @@ void test_capacity()
|
||||
BOOST_TEST_EQ(f.capacity(),0);
|
||||
BOOST_TEST(f==filter{});
|
||||
}
|
||||
{
|
||||
filter f{{fac(),fac()},1000};
|
||||
num_allocations=0;
|
||||
f.reset(0,1.0);
|
||||
BOOST_TEST_EQ(num_allocations,0);
|
||||
BOOST_TEST_EQ(f.capacity(),0);
|
||||
BOOST_TEST(f==filter{});
|
||||
}
|
||||
{
|
||||
filter f{{fac(),fac()},1000};
|
||||
std::size_t c=f.capacity();
|
||||
@@ -94,6 +102,14 @@ void test_capacity()
|
||||
BOOST_TEST_GE(f.capacity(),c+1);
|
||||
BOOST_TEST(f==filter{f.capacity()});
|
||||
}
|
||||
{
|
||||
filter f;
|
||||
std::size_t c=filter::capacity_for(100,0.1);
|
||||
num_allocations=0;
|
||||
f.reset(100,0.1);
|
||||
BOOST_TEST_EQ(num_allocations,1);
|
||||
BOOST_TEST_EQ(f.capacity(),c);
|
||||
}
|
||||
{
|
||||
filter f1{{fac(),fac()},1000},f2;
|
||||
std::size_t c=f1.capacity();
|
||||
|
||||
@@ -64,7 +64,9 @@ void test_fpr()
|
||||
boost::hash<std::string>
|
||||
>;
|
||||
|
||||
BOOST_TEST_EQ(filter(0,0.01).capacity(),0);
|
||||
BOOST_TEST_GT(filter(0,0.0).capacity(),0);
|
||||
BOOST_TEST_GT(filter(0,0.5).capacity(),0);
|
||||
BOOST_TEST_EQ(filter(0,1.0).capacity(),0);
|
||||
BOOST_TEST_THROWS((void)filter(1,0.0),std::bad_alloc);
|
||||
BOOST_TEST_EQ(filter(100,1.0).capacity(),0);
|
||||
|
||||
@@ -82,7 +84,7 @@ void test_fpr()
|
||||
}
|
||||
|
||||
BOOST_TEST_EQ(filter::fpr_for(0,1),0.0);
|
||||
BOOST_TEST_EQ(filter::fpr_for(0,0),0.0);
|
||||
BOOST_TEST_EQ(filter::fpr_for(0,0),1.0);
|
||||
BOOST_TEST_EQ(filter::fpr_for(1,0),1.0);
|
||||
|
||||
{
|
||||
|
||||