Add bsm url handling and whitepaper email capture (#1957)

This commit is contained in:
Greg Kaleka
2025-10-10 12:15:21 -04:00
committed by GitHub
parent 2b392f8538
commit 83e6bc45f5
17 changed files with 609 additions and 113 deletions

View File

@@ -96,6 +96,7 @@ INSTALLED_APPS += [
"versions",
"libraries",
"mailing_list",
"marketing",
"news",
"reports",
"core",

View File

@@ -33,8 +33,8 @@ from core.views import (
UserGuideTemplateView,
BoostDevelopmentView,
ModernizedDocsView,
QRCodeView,
)
from marketing.views import PlausibleRedirectView, WhitePaperView
from libraries.api import LibrarySearchView
from libraries.views import (
LibraryDetail,
@@ -122,13 +122,30 @@ urlpatterns = (
path("feed/news.atom", AtomNewsFeed(), name="news_feed_atom"),
path("LICENSE_1_0.txt", BSLView, name="license"),
path(
"qrc/<str:campaign_identifier>/", QRCodeView.as_view(), name="qr_code_root"
"qrc/<str:campaign_identifier>/",
PlausibleRedirectView.as_view(),
name="qr_code_root",
), # just in case
path(
"qrc/<str:campaign_identifier>/<path:main_path>",
QRCodeView.as_view(),
PlausibleRedirectView.as_view(),
name="qr_code",
),
path(
"bsm/<str:campaign_identifier>/",
PlausibleRedirectView.as_view(),
name="bsm_root",
),
path(
"bsm/<str:campaign_identifier>/<path:main_path>",
PlausibleRedirectView.as_view(),
name="bsm",
),
path(
"outreach/<slug:category>/<slug:slug>",
WhitePaperView.as_view(),
name="whitepaper",
),
path(
"accounts/social/signup/",
CustomSocialSignupViewView.as_view(),

View File

@@ -1,4 +1,3 @@
import logging
from unittest.mock import patch
import pytest
@@ -357,53 +356,3 @@ def test_docs_libs_gateway_200_html_transformed(rf, tp, mock_get_file_data):
def test_calendar(rf, tp):
response = tp.get("calendar")
tp.response_200(response)
def test_qrc_redirect_and_plausible_payload(tp):
"""XFF present; querystring preserved; payload/headers correct."""
with patch("core.views.requests.post", return_value=None) as post_mock:
url = "/qrc/pv-01/library/latest/beast/?x=1&y=2"
res = tp.get(url)
tp.response_302(res)
assert res["Location"] == "/library/latest/beast/?x=1&y=2"
# Plausible call
(endpoint,), kwargs = post_mock.call_args
assert endpoint == "https://plausible.io/api/event"
# View uses request.path, so no querystring in payload URL
assert kwargs["json"] == {
"name": "pageview",
"domain": "qrc.boost.org",
"url": "http://testserver/qrc/pv-01/library/latest/beast/",
"referrer": "", # matches view behavior with no forwarded referer
}
headers = kwargs["headers"]
assert headers["Content-Type"] == "application/json"
assert kwargs["timeout"] == 2.0
def test_qrc_falls_back_to_remote_addr_when_no_xff(tp):
"""No XFF provided -> uses REMOTE_ADDR (127.0.0.1 in Django test client)."""
with patch("core.views.requests.post", return_value=None) as post_mock:
res = tp.get("/qrc/camp/library/latest/algorithm/")
tp.response_302(res)
assert res["Location"] == "/library/latest/algorithm/"
(_, kwargs) = post_mock.call_args
headers = kwargs["headers"]
assert headers["X-Forwarded-For"] == "127.0.0.1" # Django test client default
def test_qrc_logs_plausible_error_but_still_redirects(tp, caplog):
"""Plausible post raises -> error logged; redirect not interrupted."""
with patch("core.views.requests.post", side_effect=RuntimeError("boom")):
with caplog.at_level(logging.ERROR, logger="core.views"):
res = tp.get("/qrc/c1/library/", HTTP_USER_AGENT="ua")
tp.response_302(res)
assert res["Location"] == "/library/"
assert any("Plausible event post failed" in r.message for r in caplog.records)

View File

@@ -3,7 +3,6 @@ from django.utils import timezone
from urllib.parse import urljoin
import requests
import structlog
from bs4 import BeautifulSoup
import chardet
@@ -16,14 +15,11 @@ from django.http import (
HttpResponse,
HttpResponseNotFound,
HttpResponseRedirect,
HttpRequest,
)
from django.shortcuts import redirect
from django.template.loader import render_to_string
from django.urls import reverse
from django.utils.decorators import method_decorator
from django.views import View
from django.views.decorators.cache import never_cache
from django.views.generic import TemplateView
from config.settings import ENABLE_DB_CACHE
@@ -915,57 +911,3 @@ class RedirectToLibrariesView(BaseRedirectView):
if requested_version == "release":
new_path = "/libraries/"
return HttpResponseRedirect(new_path)
@method_decorator(never_cache, name="dispatch")
class QRCodeView(View):
"""Handles QR code urls, sending them to Plausible, then redirecting to the desired url.
QR code urls are formatted /qrc/<campaign_identifier>/desired/path/to/content/, and will
result in a redirect to /desired/path/to/content/.
E.g. https://www.boost.org/qrc/pv-01/library/latest/beast/ will send this full url to Plausible,
then redirect to https://www.boost.org/library/latest/beast/
"""
def get(self, request: HttpRequest, campaign_identifier: str, main_path: str = ""):
absolute_url = request.build_absolute_uri(request.path)
referrer = request.META.get("HTTP_REFERER", "")
user_agent = request.META.get("HTTP_USER_AGENT", "")
plausible_payload = {
"name": "pageview",
"domain": "qrc.boost.org",
"url": absolute_url,
"referrer": referrer,
}
headers = {"Content-Type": "application/json", "User-Agent": user_agent}
client_ip = request.META.get("HTTP_X_FORWARDED_FOR", "").split(",")[0].strip()
client_ip = client_ip or request.META.get("REMOTE_ADDR")
if client_ip:
headers["X-Forwarded-For"] = client_ip
try:
requests.post(
"https://plausible.io/api/event",
json=plausible_payload,
headers=headers,
timeout=2.0,
)
except Exception as e:
# Dont interrupt the redirect - just log it
logger.error(f"Plausible event post failed: {e}")
# Now that we've sent the request url to plausible, we can redirect to the main_path
# Preserve the original querystring, if any.
# Example: /qrc/3/library/latest/algorithm/?x=1 -> /library/latest/algorithm/?x=1
# `main_path` is everything after qrc/<campaign>/ thanks to <path:main_path>.
redirect_path = "/" + main_path if main_path else "/"
qs = request.META.get("QUERY_STRING")
if qs:
redirect_path = f"{redirect_path}?{qs}"
return HttpResponseRedirect(redirect_path)

View File

@@ -141,5 +141,5 @@ alias shell := console
docker compose cp "db:/tmp/${DUMP_FILENAME}" "./${DUMP_FILENAME}"
echo "Database dumped successfully to ${DUMP_FILENAME}"
@manage args:
@manage +args:
docker compose run --rm web python manage.py {{ args }}

0
marketing/__init__.py Normal file
View File

9
marketing/admin.py Normal file
View File

@@ -0,0 +1,9 @@
from django.contrib import admin
from marketing.models import CapturedEmail
@admin.register(CapturedEmail)
class CapturedEmailAdmin(admin.ModelAdmin):
model = CapturedEmail
list_display = ("email", "referrer", "page_slug")

6
marketing/apps.py Normal file
View File

@@ -0,0 +1,6 @@
from django.apps import AppConfig
class MarketingConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "marketing"

17
marketing/forms.py Normal file
View File

@@ -0,0 +1,17 @@
from django import forms
from .models import CapturedEmail
class CapturedEmailForm(forms.ModelForm):
class Meta:
model = CapturedEmail
fields = ["email"]
widgets = {
"email": forms.EmailInput(
attrs={
"placeholder": "your@email.com",
"autocomplete": "email",
}
)
}

View File

@@ -0,0 +1,30 @@
# Generated by Django 4.2.24 on 2025-10-08 18:18
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = []
operations = [
migrations.CreateModel(
name="CapturedEmail",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("email", models.EmailField(max_length=254)),
("referrer", models.CharField(blank=True, default="")),
("page_slug", models.CharField(blank=True, default="")),
],
),
]

View File

13
marketing/models.py Normal file
View File

@@ -0,0 +1,13 @@
from django.db import models
class CapturedEmail(models.Model):
email = models.EmailField()
referrer = models.CharField(blank=True, default="")
page_slug = models.CharField(blank=True, default="")
def __str__(self):
return self.email
def __repr__(self):
return f"<{self.__class__.__name__} ({self.pk}): {self}>"

59
marketing/tests.py Normal file
View File

@@ -0,0 +1,59 @@
import logging
from unittest.mock import patch
import pytest
def test_whitepaper_view(tp):
tp.assertGoodView("whitepaper", slug="_example")
@pytest.mark.parametrize("url_stem", ["qrc", "bsm"])
def test_plausible_redirect_and_plausible_payload(tp, url_stem):
"""XFF present; querystring preserved; payload/headers correct."""
with patch("marketing.views.requests.post", return_value=None) as post_mock:
url = f"/{url_stem}/pv-01/library/latest/beast/?x=1&y=2"
res = tp.get(url)
tp.response_302(res)
assert res["Location"] == "/library/latest/beast/?x=1&y=2"
# Plausible call
(endpoint,), kwargs = post_mock.call_args
assert endpoint == "https://plausible.io/api/event"
# View uses request.path, so no querystring in payload URL
assert kwargs["json"] == {
"name": "pageview",
"domain": "qrc.boost.org",
"url": f"http://testserver/{url_stem}/pv-01/library/latest/beast/",
"referrer": "", # matches view behavior with no forwarded referer
}
headers = kwargs["headers"]
assert headers["Content-Type"] == "application/json"
assert kwargs["timeout"] == 2.0
def test_qrc_falls_back_to_remote_addr_when_no_xff(tp):
"""No XFF provided -> uses REMOTE_ADDR (127.0.0.1 in Django test client)."""
with patch("marketing.views.requests.post", return_value=None) as post_mock:
res = tp.get("/qrc/camp/library/latest/algorithm/")
tp.response_302(res)
assert res["Location"] == "/library/latest/algorithm/"
(_, kwargs) = post_mock.call_args
headers = kwargs["headers"]
assert headers["X-Forwarded-For"] == "127.0.0.1" # Django test client default
def test_qrc_logs_plausible_error_but_still_redirects(tp, caplog):
"""Plausible post raises -> error logged; redirect not interrupted."""
with patch("marketing.views.requests.post", side_effect=RuntimeError("boom")):
with caplog.at_level(logging.ERROR, logger="core.views"):
res = tp.get("/qrc/c1/library/", HTTP_USER_AGENT="ua")
tp.response_302(res)
assert res["Location"] == "/library/"
assert any("Plausible event post failed" in r.message for r in caplog.records)

103
marketing/views.py Normal file
View File

@@ -0,0 +1,103 @@
import requests
from django.contrib.messages.views import SuccessMessageMixin
from django.http import HttpRequest, HttpResponseRedirect
from django.urls import reverse
from django.utils.decorators import method_decorator
from django.views import View
from django.views.decorators.cache import never_cache
from django.views.generic import CreateView
from core.views import logger
from marketing.forms import CapturedEmailForm
from marketing.models import CapturedEmail
@method_decorator(never_cache, name="dispatch")
class PlausibleRedirectView(View):
"""Handles QR code and social media urls, sending them to Plausible, then redirecting to the desired url.
QR code urls are formatted /qrc/<campaign_identifier>/desired/path/to/content/, and will
result in a redirect to /desired/path/to/content/.
Social media urls are formatted /bsm/<campaign_identifier>/desired/path/to/content/, and will
result in a redirect to /desired/path/to/content/.
E.g. https://www.boost.org/qrc/pv-01/library/latest/beast/ will send this full url to Plausible,
then redirect to https://www.boost.org/library/latest/beast/
"""
def get(self, request: HttpRequest, campaign_identifier: str, main_path: str = ""):
absolute_url = request.build_absolute_uri(request.path)
referrer = request.META.get("HTTP_REFERER", "")
print(f"\n\n{referrer = }\n")
user_agent = request.META.get("HTTP_USER_AGENT", "")
plausible_payload = {
"name": "pageview",
"domain": "qrc.boost.org",
"url": absolute_url,
"referrer": referrer,
}
headers = {"Content-Type": "application/json", "User-Agent": user_agent}
client_ip = request.META.get("HTTP_X_FORWARDED_FOR", "").split(",")[0].strip()
client_ip = client_ip or request.META.get("REMOTE_ADDR")
if client_ip:
headers["X-Forwarded-For"] = client_ip
try:
requests.post(
"https://plausible.io/api/event",
json=plausible_payload,
headers=headers,
timeout=2.0,
)
except Exception as e:
# Dont interrupt the redirect - just log it
logger.error(f"Plausible event post failed: {e}")
# Now that we've sent the request url to plausible, we can redirect to the main_path
# Preserve the original querystring, if any.
# Example: /qrc/3/library/latest/algorithm/?x=1 -> /library/latest/algorithm/?x=1
# `main_path` is everything after qrc/<campaign>/ thanks to <path:main_path>.
redirect_path = "/" + main_path if main_path else "/"
qs = request.META.get("QUERY_STRING")
if qs:
redirect_path = f"{redirect_path}?{qs}"
request.session["original_referrer"] = referrer or campaign_identifier
return HttpResponseRedirect(redirect_path)
class WhitePaperView(SuccessMessageMixin, CreateView):
"""Email capture and whitepaper view."""
model = CapturedEmail
form_class = CapturedEmailForm
success_message = "Thanks! We'll be in touch."
referrer = ""
def get(self, request, *args, **kwargs):
"""Store self.referrer for use in form submission."""
# If this view originated from PlausibleRedirectView, we should have original_referrer in the session
if original_referrer := self.request.session.pop("original_referrer", ""):
self.referrer = original_referrer
else:
self.referrer = self.request.META.get("HTTP_REFERER", "")
return super().get(request, *args, **kwargs)
def get_template_names(self):
category = self.kwargs["category"]
slug = self.kwargs["slug"]
return [f"marketing/whitepapers/{category}/{slug}.html"]
def form_valid(self, form):
form.instance.referrer = self.referrer
form.instance.page_slug = f"{self.kwargs['category']}/{self.kwargs['slug']}"
return super().form_valid(form)
def get_success_url(self):
return reverse("whitepaper", kwargs=self.kwargs)

View File

@@ -0,0 +1,42 @@
:root {
/*color-scheme: light dark;*/
}
html {
margin:0;
padding:0;
}
body {
/* background-color: light-dark(white, #212121);
color: light-dark(black, white);*/
margin:auto;
max-width: 41rem;
font-family: Arial, Helvetica, sans-serif;
padding:2rem;
font-size: 0.88rem;
}
input {display:inline-block;padding:5px;border:1px solid #ccc;}
hr {margin:2rem auto; width: 150px;}
p {font-size: 0.88rem; padding-top:0.7rem;}
a {color:#0284c7;}
a:hover, a:active {color:darkblue}
a:visited {color:#0284c7;}
h1 {font-size: 1.44rem; font-weight:700; padding-top:1rem; padding-bottom:0.3rem}
h2 {font-size: 1rem; font-weight:600; text-transform: uppercase; padding-top:1rem; padding-bottom:0.6rem}
h3 {font-size: 1.1rem; font-weight:700; padding-top:0.6rem;}
h4 {font-size: 0.95rem; font-weight:700;padding-top:0.6rem;}
h5 {font-size: 0.69; font-weight:700;}
h6 {}
ul {}
ul > li {padding-top:0.5rem;}
ol {}
ol > li {padding-top:0.5rem;}
code,pre {font-family:'Courier New', Courier, monospace; font-size: 0.88rem; display:inline; color:green;}
.section {}
.inset {padding-left:50px}
.logo {background-image:url("/static/img/Boost_Symbol_Transparent.svg"); background-position:center;width:80px; height:80px; display:inline-block;background-repeat: no-repeat;margin-right: 1rem;}
.flex {display:flex; align-items:center;}
.email-block {display:flex; align-items:center;margin:auto; justify-content:center;}
.email-button {margin-left:1rem; background-color:#0284c7; color:white; font-weight:bold; padding:5px 10px; border:0;}
.email-button:hover {background-color:#006394}

View File

@@ -0,0 +1,154 @@
{% load static %}
<html>
<head>
<link rel="stylesheet" href="{% static 'css/landing-style.css' %}" />
</head>
<body>
<h1>Drop your email below to get engineering updates. Then scroll down for the Unordered tech overview & links.</h1>
{% if messages %}
{# Expect a success message from submitting the form #}
<div class="email-block">
<div class="messages">
{% for message in messages %}
<p{% if message.tags %} class="{{ message.tags }}"{% endif %}>{{ message }}</p>
{% endfor %}
</div>
</div>
{% else %}
<div class="email-block">
<form method="post">
{% csrf_token %}
{{ form.non_field_errors }}
{{ form.email.errors }}
{{ form.email }}
<button class="email-button" type="submit">Get C++ updates</button>
</form>
</div>
<p>Privacy: no spam, one step unsubscribe. We'll only send high-signal dev content re Unordered and other Boost libraries.</p>
{% endif %}
<hr/>
<h2>TECH OVERVIEW</h2>
<h1 class="flex"><div class="logo"></div>Boost.Unordered: High-Performance Hash Containers for C++</h1>
<h3>Understanding the Container Options</h3>
<p>Boost.Unordered gives you 12 different hash container types to choose from, organized into three main families. Think of these as tools in your performance toolbox—each one is optimized for different situations.</p>
<p><b>I. Closed-addressing containers</b> (like <code>boost::unordered_map</code> and <code>boost::unordered_set</code>) work exactly like <code>std::unordered</code> containers. You can drop them into existing code as faster replacements. They support C++11 and newer standards.</p>
<p><b>II. Open-addressing containers are the speed champions. </b><code>boost::unordered_flat_map</code> and <code>boost::unordered_flat_set</code> store elements directly in the bucket array for maximum performance. If you need pointer stability (addresses that don't change), use <code>boost::unordered_node_map</code> and <code>boost::unordered_node_set</code> instead—they're slightly slower but still very fast.</p>
<p><b>III. Concurrent containers</b> like <code>boost::concurrent_flat_map</code> and <code>boost::concurrent_flat_set</code> are designed for multithreaded programs where multiple threads need to access the same container safely.
<h3>I. Closed-Addressing Containers: How boost::unordered_map Got So Fast</h3>
<h4>The Problem with Standard Implementations</h4>
<p>Back in 2003, when C++ standardized hash tables, the committee chose "closed addressing" (also called separate chaining) because open addressing wasn't mature yet. This decision became baked into the standard through requirements like the bucket API, pointer stability, and user-controllable load factors.</p>
<p>The standard also required that iterator increment be constant time and erase be constant time on average. These requirements forced standard libraries to use complicated workarounds that made their implementations slower.</p>
<p>For example, libstdc++ and libc++ link all nodes together across the entire container. To make this work, buckets point to the node before the first one in the bucket (not the first one itself), and each node stores its hash value. These extra pointers and stored hash values waste memory and slow things down.</p>
<h4>Boost's Solution (Released August 2022)</h4>
<p>Boost.Unordered 1.80 went back to basics. Nodes are only linked within each bucket, not across the whole container. This makes deletion trivial—just remove the node from its bucket's list.</p>
<p>For iteration, Boost introduced <b>bucket groups</b>. Each group has a 32/64-bit mask showing which buckets are occupied, plus pointers linking groups together. To iterate, you use fast bit operations on the masks and jump between groups using the pointers. This takes only 4 bits per bucket and is very fast.</p>
<h4>Fast Modulo Magic</h4>
<p>Hash tables need to map hash values to bucket positions. Traditional approaches use either expensive modulo operations with prime numbers, or power-of-two sizes with bit masking.</p>
<p>Boost uses prime numbers (for better distribution) but uses them in combination with Daniel Lemire's "fastmod" technique—it's as fast as power-of-two bit operations but gives you the better distribution of prime numbers. Even better, eliminating function pointer tables allows the compiler to inline code for extra speed.</p>
<h4>Real-World Speed Test: FastNetMon DDoS Detection</h4>
<p>Pavel Odintsov, who runs one of the fastest DDoS detection products on the market (FastNetMon), tested the performance improvements using actual network traffic from a large ISP. The test used 131,000 unique IP addresses with real access patterns—not synthetic benchmark data.</p>
<p>Testing on an AMD Ryzen 5 3600 with gcc 12.1 showed the Boost 1.80 <code>boost::unordered_map</code> achieved 32.4M ops/sec vs <code>std::unordered_map</code>'s 25.3M ops/sec.</p>
<p>That's a <b>28% speed improvement</b> in real-world DDoS detection workloads. When you're trying to detect network attacks in under a second, this kind of performance gain matters.</p>
<h4>Why It's Faster</h4>
<p>Boost's improved layout uses less memory: only 12N + 0.5B bytes of overhead per element (on 64-bit systems) compared to libstdc++'s 16N + 8B bytes. Less memory means better cache performance. Combine that with fast modulo and one less pointer indirection per lookup, and you get substantial real-world speedups.</p>
<h3>II. Open-Addressing Containers: When to Use boost::unordered_flat_map</h3>
<p>Starting in Boost 1.81 (December 2022), Boost added <code>boost::unordered_flat_map</code> and <code>boost::unordered_flat_set</code>—containers that break some C++ standard requirements in exchange for much better performance. By 2022, open addressing had clearly won the performance race.</p>
<h4>Choose boost::unordered_flat_map when:</h4>
<ul>
<li>Speed is your top priority</li>
<li>Your types support move construction (most do)</li>
<li>You're using good hash functions (or Boost's defaults)</li>
</ul>
<h4>Stick with boost::unordered_map when:</h4>
<ul><li>You need exact <code>std::unordered_map</code> compatibility</li>
<li>You need pointer stability (pointers to elements that never change)</li>
<li>You're using multimap or multiset variants</li>
<li>Your hash functions aren't great</li>
</ul>
<h4>Why Open Addressing Is Fast</h4>
<p><b>Cache-friendly</b> design: Elements live directly in the bucket array, not scattered in separate nodes. Modern CPUs love this because:</p>
<ul>
<li>No pointer indirection—the bucket position is the element position</li>
<li>Contiguous memory layout means better cache utilization</li>
</ul>
<p><b>The collision problem:</b> When two elements hash to the same bucket, open addressing uses a "probing sequence" to find an empty bucket nearby. Boost uses a non-relocating approach (elements stay where they're inserted) to behave more like <code>std::unordered_map</code>.</p>
<p>The main challenge: when you delete an element, you can't just mark its bucket as empty—that would break lookups for elements stored further along the probing sequence. Traditional solutions use "tombstones" (markers that say "something was here"), but those slow down lookups over time.</p>
<h4>SIMD: Checking Multiple Buckets at Once</h4>
<p>SMID stands for "Single Instruction, Multiple Data"—CPU instructions that process multiple values in parallel. Originally designed for video processing, hash table implementers realized SIMD could speed up lookups.</p>
<p> <b>The basic idea:</b> Instead of storing just elements, also maintain a metadata array with one byte per bucket. Each metadata byte holds a "reduced hash value"—a shortened version of the element's hash. When looking up an element, SIMD instructions can check 16 metadata bytes simultaneously to find potential matches, then only do the expensive full comparison on actual candidates.</p>
<p> This technique checks 16 buckets in constant time. Google's Abseil and Meta's F14 containers pioneered this approach.</p>
<h4>How boost::unordered_flat_map Works</h4>
<b>Group-based organization:</b> The bucket array is split into groups of 15 buckets. When inserting, the hash value selects a group (not an individual bucket). Elements fill groups from one end to the other, creating clusters of used buckets.
<p><b>Metadata structure:</b> Each group has a 16-byte metadata word:</p>
<ul>
<li><b>15 hi bytes:</b> One per bucket, storing either 0 (empty), 1 (sentinel marker), or a reduced hash value [2-255]</li>
<li><b>1 overflow byte (ofw):</b> Eight bits acting as a "mini-Bloom filter" for probing</li>
</ul>
<p><b>SIMD lookups:</b> When searching a group, SIMD instructions match the lookup's reduced hash against all 15 metadata bytes simultaneously. Only matching buckets get full element comparisons.</p>
<p><b>The overflow byte trick:</b> Here's the clever part that avoids tombstones. When a group fills up during insertion, set a bit in the overflow byte (based on the hash value) before moving to the next group. During lookup, if the corresponding overflow bit is 0, you can stop searching—the element definitely isn't in later groups.</p>
<p>This overflow byte acts like a Bloom filter: bits set to 1 mean "keep looking," bits set to 0 mean "definitely stop here."</p>
<p><b>No SIMD? No problem:</b> On systems without SSE2 or Neon, Boost uses "bit interleaving" to pack the metadata into two 64-bit words, enabling reasonably fast operations without SIMD.</p>
<p><b>Preventing performance drift:</b> Open addressing has a problem where repeated insertions and deletions gradually degrade performance. Boost's solution: when you delete an element whose overflow bit is set, the container lowers its maximum load threshold slightly. Eventually this triggers a rehash, restoring optimal performance.</p>
<h4>Boost.Unordered vs Abseil</h4>
<p>Simulation programs comparing <code>boost::unordered_flat_map</code> with <code>absl::flat_hash_map</code> reveal key performance differences.</p>
<p><b>How Abseil works:</b> Abseil's Swiss Tables hash individual buckets (not groups), use 16-bucket SIMD scans, and store 7 bits of hash information per bucket with tombstones for deletion.</p>
<p><b>Successful lookups:</b> Boost needs slightly more hops on average because free buckets cluster at group ends rather than distributing uniformly. However, actual comparison counts are nearly identical (within 1%) because Boost uses 7.99 bits of hash information versus Abseil's 7 bits—each extra bit roughly halves false matches.</p>
<p><b>Unsuccessful lookups:</b> Boost is considerably faster here. Abseil's probe terminates only when finding a non-full group (all-or-nothing). Boost's overflow byte acts like a Bloom filter, providing 8 bits of termination information and making early termination 1.75x more likely. Under high load, Boost performs up to 3.2x fewer comparisons for unsuccessful lookups.</p>
<h4>Real-World Performance Tests</h4>
<p>Boost's aggregate benchmarks combine multiple operations using different key types (strings, integers, UUIDs) on Intel Xeon E5-2683 @ 2.10GHz:</p>
<ul>
<li><b>std::uint32_t:</b> Boost 29% faster (4,974ms vs 6,400ms) with lower memory</li>
<li><b>std::uint64_t:</b> Boost 20% faster (5,447ms vs 6,530ms) with lower memory</li>
<li><b>UUID (16 bytes):</b> Boost 14% faster (9,223ms vs 10,559ms) with lower memory</li>
<li><b>std::string:</b> Abseil slightly faster (13,018ms vs 14,486ms) but uses more memory</li>
</ul>
<h4>Independent Verification</h4>
<p>Jackson Allan's extensive 2024 benchmark suite tested diverse conditions on AMD Ryzen 7 5800H with GCC 13.2.0, confirming <code>boost::unordered_flat_map</code> as "the all-around best performer, especially when hot in the cache." The analysis found very fast insertions, excellent performance for looking up and erasing nonexisting keys, very fast string key lookups, and excellent iteration performance due to key clustering within bucket groups.</p>
<p>Boost's advantage is particularly pronounced in low-key-count benchmarks (0 to 200,000 keys), suggesting it benefits more from cache residency than competing implementations.</p>
<h3>III. Concurrent Containers: Multithreading with boost::concurrent_flat_map</h3>
<p>Boost 1.83 added <code>boost::concurrent_flat_map</code> for programs where multiple threads need to access the same hash table. It uses the same fast open-addressing layout as <code>boost::unordered_flat_map</code> but adds smart locking.</p>
<p><b>Two-level locking strategy:</b></p>
<ul>
<li><b>Container level:</b> A read-write mutex for whole-table operations like rehashing (rarely locked)</li>
<li><b>Group level:</b> Each group has its own spinlock, so different threads can work on different groups simultaneously</li>
</ul>
<p><b>Mostly lock-free lookups:</b> Hash calculation, probing, and SIMD matching all happen without locks. Only the final element comparison needs a group lock.</p>
<p><b>Smart insertions:</b> Uses "transactional optimistic insertion" to prevent duplicate elements. The algorithm saves the group's counter, does the insertion, then checks if another thread interfered. If so, it rolls back and retries. Even in worst-case scenarios, retries happen only parts-per-million times.</p>
<h4>Performance vs Intel TBB</h4>
<p>Benchmarks on AMD Ryzen 5 3600 show <code>boost::concurrent_flat_map</code> significantly outperforms <code>tbb::concurrent_hash_map</code>, particularly when many threads target a small set of keys (high-skew workloads). The fine-grained group locking (potentially thousands of groups) handles contention better than coarse 256-shard locking.</p>
<p><b>The Results:</b></p>
<p>500k updates across low (.01), medium (.5) , high skew (.99) via GCC 12, x64:</p>
<p class="inset"><code>boost::concurrent_flat_map</code> handles <b>2x ops / sec</b> vs. <code>tbb::concurrent_hash_map</code></p>
<p>5M updates across low (.01), medium (.5) , high skew (.99) via GCC 12, x64:</p>
<p class="inset"><code>boost::concurrent_flat_map</code> handles <b>2.5x ops / sec</b> vs. <code>tbb::concurrent_hash_map</code></p>
<p>For cache-friendly workloads with 500,000 operations, Boost continues improving performance even beyond the physical core count, suggesting memory latency (not computation) is the bottleneck. Performance characteristics depend heavily on your specific CPU and memory architecture, so test on your target hardware for best results.</p>
<h3>Conclusion: By The Numbers</h3>
<p>Boost.Unordered establishes itself as the performance leader through systematic innovations across all container types. Here's what the numbers show:
<p><b>For drop-in std replacement:</b> <code>boost::unordered_map</code> delivers 28% improvements over <code>std::unordered_map</code> in real-world DDoS detection workloads while maintaining complete API compatibility.
<p><b>For maximum speed:</b> <code>boost::unordered_flat_map</code> outperforms Abseil Swiss tables by 14-29% across diverse workloads, with particularly strong advantages for unsuccessful lookups (up to 3.2x better under high load) and integer key operations.
<p><b>For multithreading:</b> <code>boost::concurrent_flat_map</code> outperforms Intel TBB by 2 to 2.5x while providing excellent performance through fine-grained locking and mostly lock-free operations.
<p>Independent benchmarking consistently identifies <code>boost::unordered_flat_map</code> as "the all-around best performer, especially when hot in the cache." The library provides high-performance hash containers matched to your specific requirements, whether you need standards compliance, maximum throughput, or thread safety.
<p><a href="https://github.com/boostorg/unordered" target="_blank">Unordered on GitHub</a></p>
<p><a href="https://www.boost.org/doc/libs/latest/libs/unordered/index.html" target="_blank">Unordered documentation</a></p>
<p><a href="https://www.boost.org/library/latest/unordered/" target="_blank">Unordered website page</a></p>
<p>Sources:</p>
<p><a href="https://medium.com/@pavel.odintsov/boost-unordered-map-is-a-new-king-of-data-structures-292124d3ee2" target="_blank">https://medium.com/@pavel.odintsov/boost-unordered-map-is-a-new-king-of-data-structures-292124d3ee2</a></p>
<p><a href="https://bannalia.blogspot.com/2022/06/advancing-state-of-art-for.html" target="_blank">https://bannalia.blogspot.com/2022/06/advancing-state-of-art-for.html</a></p>
<p><a href="https://bannalia.blogspot.com/2022/11/inside-boostunorderedflatmap.html" target="_blank">https://bannalia.blogspot.com/2022/11/inside-boostunorderedflatmap.html</a></p>
<p><a href="https://bannalia.blogspot.com/2023/07/inside-boostconcurrentflatmap.html?m=1" target="_blank">https://bannalia.blogspot.com/2023/07/inside-boostconcurrentflatmap.html?m=1</a></p>
<p><a href="https://martin.ankerl.com/2022/08/27/hashmap-bench-01/" target="_blank">https://martin.ankerl.com/2022/08/27/hashmap-bench-01/</a></p>
<p><a href="https://jacksonallan.github.io/c_cpp_hash_tables_benchmark/" target="_blank">https://jacksonallan.github.io/c_cpp_hash_tables_benchmark/</a></p>
<p><a href="https://artificial-mind.net/blog/2021/10/09/unordered-map-badness" target="_blank">https://artificial-mind.net/blog/2021/10/09/unordered-map-badness</a></p>
</body>
</html>

View File

@@ -0,0 +1,154 @@
{% load static %}
<html>
<head>
<link rel="stylesheet" href="{% static 'css/landing-style.css' %}" />
</head>
<body>
<h1>Drop your email below to get engineering updates. Then scroll down for the Unordered tech overview & links.</h1>
{% if messages %}
{# Expect a success message from submitting the form #}
<div class="email-block">
<div class="messages">
{% for message in messages %}
<p{% if message.tags %} class="{{ message.tags }}"{% endif %}>{{ message }}</p>
{% endfor %}
</div>
</div>
{% else %}
<div class="email-block">
<form method="post">
{% csrf_token %}
{{ form.non_field_errors }}
{{ form.email.errors }}
{{ form.email }}
<button class="email-button" type="submit">Get C++ updates</button>
</form>
</div>
<p>Privacy: no spam, one step unsubscribe. We'll only send high-signal dev content re Unordered and other Boost libraries.</p>
{% endif %}
<hr/>
<h2>TECH OVERVIEW</h2>
<h1 class="flex"><div class="logo"></div>Boost.Unordered: High-Performance Hash Containers for C++</h1>
<h3>Understanding the Container Options</h3>
<p>Boost.Unordered gives you 12 different hash container types to choose from, organized into three main families. Think of these as tools in your performance toolbox—each one is optimized for different situations.</p>
<p><b>I. Closed-addressing containers</b> (like <code>boost::unordered_map</code> and <code>boost::unordered_set</code>) work exactly like <code>std::unordered</code> containers. You can drop them into existing code as faster replacements. They support C++11 and newer standards.</p>
<p><b>II. Open-addressing containers are the speed champions. </b><code>boost::unordered_flat_map</code> and <code>boost::unordered_flat_set</code> store elements directly in the bucket array for maximum performance. If you need pointer stability (addresses that don't change), use <code>boost::unordered_node_map</code> and <code>boost::unordered_node_set</code> instead—they're slightly slower but still very fast.</p>
<p><b>III. Concurrent containers</b> like <code>boost::concurrent_flat_map</code> and <code>boost::concurrent_flat_set</code> are designed for multithreaded programs where multiple threads need to access the same container safely.
<h3>I. Closed-Addressing Containers: How boost::unordered_map Got So Fast</h3>
<h4>The Problem with Standard Implementations</h4>
<p>Back in 2003, when C++ standardized hash tables, the committee chose "closed addressing" (also called separate chaining) because open addressing wasn't mature yet. This decision became baked into the standard through requirements like the bucket API, pointer stability, and user-controllable load factors.</p>
<p>The standard also required that iterator increment be constant time and erase be constant time on average. These requirements forced standard libraries to use complicated workarounds that made their implementations slower.</p>
<p>For example, libstdc++ and libc++ link all nodes together across the entire container. To make this work, buckets point to the node before the first one in the bucket (not the first one itself), and each node stores its hash value. These extra pointers and stored hash values waste memory and slow things down.</p>
<h4>Boost's Solution (Released August 2022)</h4>
<p>Boost.Unordered 1.80 went back to basics. Nodes are only linked within each bucket, not across the whole container. This makes deletion trivial—just remove the node from its bucket's list.</p>
<p>For iteration, Boost introduced <b>bucket groups</b>. Each group has a 32/64-bit mask showing which buckets are occupied, plus pointers linking groups together. To iterate, you use fast bit operations on the masks and jump between groups using the pointers. This takes only 4 bits per bucket and is very fast.</p>
<h4>Fast Modulo Magic</h4>
<p>Hash tables need to map hash values to bucket positions. Traditional approaches use either expensive modulo operations with prime numbers, or power-of-two sizes with bit masking.</p>
<p>Boost uses prime numbers (for better distribution) but uses them in combination with Daniel Lemire's "fastmod" technique—it's as fast as power-of-two bit operations but gives you the better distribution of prime numbers. Even better, eliminating function pointer tables allows the compiler to inline code for extra speed.</p>
<h4>Real-World Speed Test: FastNetMon DDoS Detection</h4>
<p>Pavel Odintsov, who runs one of the fastest DDoS detection products on the market (FastNetMon), tested the performance improvements using actual network traffic from a large ISP. The test used 131,000 unique IP addresses with real access patterns—not synthetic benchmark data.</p>
<p>Testing on an AMD Ryzen 5 3600 with gcc 12.1 showed the Boost 1.80 <code>boost::unordered_map</code> achieved 32.4M ops/sec vs <code>std::unordered_map</code>'s 25.3M ops/sec.</p>
<p>That's a <b>28% speed improvement</b> in real-world DDoS detection workloads. When you're trying to detect network attacks in under a second, this kind of performance gain matters.</p>
<h4>Why It's Faster</h4>
<p>Boost's improved layout uses less memory: only 12N + 0.5B bytes of overhead per element (on 64-bit systems) compared to libstdc++'s 16N + 8B bytes. Less memory means better cache performance. Combine that with fast modulo and one less pointer indirection per lookup, and you get substantial real-world speedups.</p>
<h3>II. Open-Addressing Containers: When to Use boost::unordered_flat_map</h3>
<p>Starting in Boost 1.81 (December 2022), Boost added <code>boost::unordered_flat_map</code> and <code>boost::unordered_flat_set</code>—containers that break some C++ standard requirements in exchange for much better performance. By 2022, open addressing had clearly won the performance race.</p>
<h4>Choose boost::unordered_flat_map when:</h4>
<ul>
<li>Speed is your top priority</li>
<li>Your types support move construction (most do)</li>
<li>You're using good hash functions (or Boost's defaults)</li>
</ul>
<h4>Stick with boost::unordered_map when:</h4>
<ul><li>You need exact <code>std::unordered_map</code> compatibility</li>
<li>You need pointer stability (pointers to elements that never change)</li>
<li>You're using multimap or multiset variants</li>
<li>Your hash functions aren't great</li>
</ul>
<h4>Why Open Addressing Is Fast</h4>
<p><b>Cache-friendly</b> design: Elements live directly in the bucket array, not scattered in separate nodes. Modern CPUs love this because:</p>
<ul>
<li>No pointer indirection—the bucket position is the element position</li>
<li>Contiguous memory layout means better cache utilization</li>
</ul>
<p><b>The collision problem:</b> When two elements hash to the same bucket, open addressing uses a "probing sequence" to find an empty bucket nearby. Boost uses a non-relocating approach (elements stay where they're inserted) to behave more like <code>std::unordered_map</code>.</p>
<p>The main challenge: when you delete an element, you can't just mark its bucket as empty—that would break lookups for elements stored further along the probing sequence. Traditional solutions use "tombstones" (markers that say "something was here"), but those slow down lookups over time.</p>
<h4>SIMD: Checking Multiple Buckets at Once</h4>
<p>SMID stands for "Single Instruction, Multiple Data"—CPU instructions that process multiple values in parallel. Originally designed for video processing, hash table implementers realized SIMD could speed up lookups.</p>
<p> <b>The basic idea:</b> Instead of storing just elements, also maintain a metadata array with one byte per bucket. Each metadata byte holds a "reduced hash value"—a shortened version of the element's hash. When looking up an element, SIMD instructions can check 16 metadata bytes simultaneously to find potential matches, then only do the expensive full comparison on actual candidates.</p>
<p> This technique checks 16 buckets in constant time. Google's Abseil and Meta's F14 containers pioneered this approach.</p>
<h4>How boost::unordered_flat_map Works</h4>
<b>Group-based organization:</b> The bucket array is split into groups of 15 buckets. When inserting, the hash value selects a group (not an individual bucket). Elements fill groups from one end to the other, creating clusters of used buckets.
<p><b>Metadata structure:</b> Each group has a 16-byte metadata word:</p>
<ul>
<li><b>15 hi bytes:</b> One per bucket, storing either 0 (empty), 1 (sentinel marker), or a reduced hash value [2-255]</li>
<li><b>1 overflow byte (ofw):</b> Eight bits acting as a "mini-Bloom filter" for probing</li>
</ul>
<p><b>SIMD lookups:</b> When searching a group, SIMD instructions match the lookup's reduced hash against all 15 metadata bytes simultaneously. Only matching buckets get full element comparisons.</p>
<p><b>The overflow byte trick:</b> Here's the clever part that avoids tombstones. When a group fills up during insertion, set a bit in the overflow byte (based on the hash value) before moving to the next group. During lookup, if the corresponding overflow bit is 0, you can stop searching—the element definitely isn't in later groups.</p>
<p>This overflow byte acts like a Bloom filter: bits set to 1 mean "keep looking," bits set to 0 mean "definitely stop here."</p>
<p><b>No SIMD? No problem:</b> On systems without SSE2 or Neon, Boost uses "bit interleaving" to pack the metadata into two 64-bit words, enabling reasonably fast operations without SIMD.</p>
<p><b>Preventing performance drift:</b> Open addressing has a problem where repeated insertions and deletions gradually degrade performance. Boost's solution: when you delete an element whose overflow bit is set, the container lowers its maximum load threshold slightly. Eventually this triggers a rehash, restoring optimal performance.</p>
<h4>Boost.Unordered vs Abseil</h4>
<p>Simulation programs comparing <code>boost::unordered_flat_map</code> with <code>absl::flat_hash_map</code> reveal key performance differences.</p>
<p><b>How Abseil works:</b> Abseil's Swiss Tables hash individual buckets (not groups), use 16-bucket SIMD scans, and store 7 bits of hash information per bucket with tombstones for deletion.</p>
<p><b>Successful lookups:</b> Boost needs slightly more hops on average because free buckets cluster at group ends rather than distributing uniformly. However, actual comparison counts are nearly identical (within 1%) because Boost uses 7.99 bits of hash information versus Abseil's 7 bits—each extra bit roughly halves false matches.</p>
<p><b>Unsuccessful lookups:</b> Boost is considerably faster here. Abseil's probe terminates only when finding a non-full group (all-or-nothing). Boost's overflow byte acts like a Bloom filter, providing 8 bits of termination information and making early termination 1.75x more likely. Under high load, Boost performs up to 3.2x fewer comparisons for unsuccessful lookups.</p>
<h4>Real-World Performance Tests</h4>
<p>Boost's aggregate benchmarks combine multiple operations using different key types (strings, integers, UUIDs) on Intel Xeon E5-2683 @ 2.10GHz:</p>
<ul>
<li><b>std::uint32_t:</b> Boost 29% faster (4,974ms vs 6,400ms) with lower memory</li>
<li><b>std::uint64_t:</b> Boost 20% faster (5,447ms vs 6,530ms) with lower memory</li>
<li><b>UUID (16 bytes):</b> Boost 14% faster (9,223ms vs 10,559ms) with lower memory</li>
<li><b>std::string:</b> Abseil slightly faster (13,018ms vs 14,486ms) but uses more memory</li>
</ul>
<h4>Independent Verification</h4>
<p>Jackson Allan's extensive 2024 benchmark suite tested diverse conditions on AMD Ryzen 7 5800H with GCC 13.2.0, confirming <code>boost::unordered_flat_map</code> as "the all-around best performer, especially when hot in the cache." The analysis found very fast insertions, excellent performance for looking up and erasing nonexisting keys, very fast string key lookups, and excellent iteration performance due to key clustering within bucket groups.</p>
<p>Boost's advantage is particularly pronounced in low-key-count benchmarks (0 to 200,000 keys), suggesting it benefits more from cache residency than competing implementations.</p>
<h3>III. Concurrent Containers: Multithreading with boost::concurrent_flat_map</h3>
<p>Boost 1.83 added <code>boost::concurrent_flat_map</code> for programs where multiple threads need to access the same hash table. It uses the same fast open-addressing layout as <code>boost::unordered_flat_map</code> but adds smart locking.</p>
<p><b>Two-level locking strategy:</b></p>
<ul>
<li><b>Container level:</b> A read-write mutex for whole-table operations like rehashing (rarely locked)</li>
<li><b>Group level:</b> Each group has its own spinlock, so different threads can work on different groups simultaneously</li>
</ul>
<p><b>Mostly lock-free lookups:</b> Hash calculation, probing, and SIMD matching all happen without locks. Only the final element comparison needs a group lock.</p>
<p><b>Smart insertions:</b> Uses "transactional optimistic insertion" to prevent duplicate elements. The algorithm saves the group's counter, does the insertion, then checks if another thread interfered. If so, it rolls back and retries. Even in worst-case scenarios, retries happen only parts-per-million times.</p>
<h4>Performance vs Intel TBB</h4>
<p>Benchmarks on AMD Ryzen 5 3600 show <code>boost::concurrent_flat_map</code> significantly outperforms <code>tbb::concurrent_hash_map</code>, particularly when many threads target a small set of keys (high-skew workloads). The fine-grained group locking (potentially thousands of groups) handles contention better than coarse 256-shard locking.</p>
<p><b>The Results:</b></p>
<p>500k updates across low (.01), medium (.5) , high skew (.99) via GCC 12, x64:</p>
<p class="inset"><code>boost::concurrent_flat_map</code> handles <b>2x ops / sec</b> vs. <code>tbb::concurrent_hash_map</code></p>
<p>5M updates across low (.01), medium (.5) , high skew (.99) via GCC 12, x64:</p>
<p class="inset"><code>boost::concurrent_flat_map</code> handles <b>2.5x ops / sec</b> vs. <code>tbb::concurrent_hash_map</code></p>
<p>For cache-friendly workloads with 500,000 operations, Boost continues improving performance even beyond the physical core count, suggesting memory latency (not computation) is the bottleneck. Performance characteristics depend heavily on your specific CPU and memory architecture, so test on your target hardware for best results.</p>
<h3>Conclusion: By The Numbers</h3>
<p>Boost.Unordered establishes itself as the performance leader through systematic innovations across all container types. Here's what the numbers show:
<p><b>For drop-in std replacement:</b> <code>boost::unordered_map</code> delivers 28% improvements over <code>std::unordered_map</code> in real-world DDoS detection workloads while maintaining complete API compatibility.
<p><b>For maximum speed:</b> <code>boost::unordered_flat_map</code> outperforms Abseil Swiss tables by 14-29% across diverse workloads, with particularly strong advantages for unsuccessful lookups (up to 3.2x better under high load) and integer key operations.
<p><b>For multithreading:</b> <code>boost::concurrent_flat_map</code> outperforms Intel TBB by 2 to 2.5x while providing excellent performance through fine-grained locking and mostly lock-free operations.
<p>Independent benchmarking consistently identifies <code>boost::unordered_flat_map</code> as "the all-around best performer, especially when hot in the cache." The library provides high-performance hash containers matched to your specific requirements, whether you need standards compliance, maximum throughput, or thread safety.
<p><a href="https://github.com/boostorg/unordered" target="_blank">Unordered on GitHub</a></p>
<p><a href="https://www.boost.org/doc/libs/latest/libs/unordered/index.html" target="_blank">Unordered documentation</a></p>
<p><a href="https://www.boost.org/library/latest/unordered/" target="_blank">Unordered website page</a></p>
<p>Sources:</p>
<p><a href="https://medium.com/@pavel.odintsov/boost-unordered-map-is-a-new-king-of-data-structures-292124d3ee2" target="_blank">https://medium.com/@pavel.odintsov/boost-unordered-map-is-a-new-king-of-data-structures-292124d3ee2</a></p>
<p><a href="https://bannalia.blogspot.com/2022/06/advancing-state-of-art-for.html" target="_blank">https://bannalia.blogspot.com/2022/06/advancing-state-of-art-for.html</a></p>
<p><a href="https://bannalia.blogspot.com/2022/11/inside-boostunorderedflatmap.html" target="_blank">https://bannalia.blogspot.com/2022/11/inside-boostunorderedflatmap.html</a></p>
<p><a href="https://bannalia.blogspot.com/2023/07/inside-boostconcurrentflatmap.html?m=1" target="_blank">https://bannalia.blogspot.com/2023/07/inside-boostconcurrentflatmap.html?m=1</a></p>
<p><a href="https://martin.ankerl.com/2022/08/27/hashmap-bench-01/" target="_blank">https://martin.ankerl.com/2022/08/27/hashmap-bench-01/</a></p>
<p><a href="https://jacksonallan.github.io/c_cpp_hash_tables_benchmark/" target="_blank">https://jacksonallan.github.io/c_cpp_hash_tables_benchmark/</a></p>
<p><a href="https://artificial-mind.net/blog/2021/10/09/unordered-map-badness" target="_blank">https://artificial-mind.net/blog/2021/10/09/unordered-map-badness</a></p>
</body>
</html>