Latest links updates (#1976, #2000) (#2060)

This commit is contained in:
daveoconnor
2026-02-20 12:41:54 -08:00
committed by GitHub
parent 53a1a94ba1
commit 925bb7d942
24 changed files with 945 additions and 21 deletions

View File

@@ -56,6 +56,7 @@ There are two options for development setups, nix and native. The native setup d
1. [Non-Dev Server Allauth Setup](docs/non-dev-server-allauth-setup.md)
1. [Admin Features](docs/admin.md)
1. [Mailing List Setup](docs/mailing_list.md)
1. [Connecting to Servers](https://github.com/cppalliance/website-v2-operations/blob/master/gcp/README.md)
After going through the "Development System setup" steps above to create the Docker image, install dependencies, and start the services in `docker-compose.yml`, run:

View File

@@ -2,18 +2,31 @@ from django.contrib import admin
from django.urls import path
from django.shortcuts import redirect, render
from django.contrib import messages
from django.utils import timezone
from .models import RenderedContent, SiteSettings
from .tasks import delete_all_rendered_content
@admin.register(RenderedContent)
class RenderedContentAdmin(admin.ModelAdmin):
list_display = ("cache_key", "content_type", "modified")
list_display = (
"cache_key",
"content_type",
"modified",
"latest_path_matched_indicator",
"latest_path_match_class",
)
search_fields = ("cache_key",)
readonly_fields = ("latest_path_match_class",)
def get_urls(self):
urls = super().get_urls()
custom_urls = [
path(
"start-content-refresh/",
self.admin_site.admin_view(self.start_content_refresh_view),
name="core_renderedcontent_start_content_refresh",
),
path(
"delete-all/",
self.admin_site.admin_view(self.delete_all_view),
@@ -22,6 +35,27 @@ class RenderedContentAdmin(admin.ModelAdmin):
]
return custom_urls + urls
def start_content_refresh_view(self, request):
if request.method == "POST":
settings = SiteSettings.load()
settings.rendered_content_replacement_start = timezone.now()
settings.save()
messages.success(
request,
f"Content refresh start time set to {settings.rendered_content_replacement_start}",
)
return redirect("..")
context = {
**self.admin_site.each_context(request),
"title": "Start Content Refresh",
}
return render(
request,
"admin/core/renderedcontent/start_content_refresh_confirmation.html",
context,
)
def delete_all_view(self, request):
if request.method == "POST":
delete_all_rendered_content.delay()
@@ -42,13 +76,15 @@ class RenderedContentAdmin(admin.ModelAdmin):
def changelist_view(self, request, extra_context=None):
extra_context = extra_context or {}
extra_context["has_start_content_refresh"] = True
extra_context["has_delete_all"] = True
return super().changelist_view(request, extra_context=extra_context)
@admin.register(SiteSettings)
class SiteSettingsAdmin(admin.ModelAdmin):
list_display = ("id", "wordcloud_ignore")
list_display = ("id", "wordcloud_ignore", "rendered_content_replacement_start")
readonly_fields = ("rendered_content_replacement_start",)
def has_add_permission(self, request):
return super().has_add_permission(request) and SiteSettings.objects.count() == 0

View File

@@ -0,0 +1,36 @@
# Generated by Django 6.0.2 on 2026-02-18 20:54
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("core", "0003_sitesettings_and_more"),
]
operations = [
migrations.AddField(
model_name="renderedcontent",
name="latest_docs_path",
field=models.CharField(blank=True, default=""),
),
migrations.AddField(
model_name="renderedcontent",
name="latest_path_match_class",
field=models.CharField(blank=True, default="", max_length=128),
),
migrations.AddField(
model_name="renderedcontent",
name="latest_path_matched_indicator",
field=models.IntegerField(
choices=[
(0, "Undetermined"),
(1, "Direct match exists"),
(2, "Determined by matcher"),
],
default=0,
help_text="Indicates how the latest path should be determined.",
),
),
]

View File

@@ -0,0 +1,23 @@
# Generated by Django 5.2.8 on 2026-01-27 18:47
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("core", "0004_renderedcontent_latest_docs_path_and_more"),
]
operations = [
migrations.AddField(
model_name="sitesettings",
name="rendered_content_replacement_start",
field=models.DateTimeField(
blank=True,
editable=False,
help_text="Set via RenderedContent admin action.",
null=True,
),
),
]

View File

@@ -1,10 +1,20 @@
import re
from django.db import models
from django.utils.translation import gettext_lazy as _
from django_extensions.db.models import TimeStampedModel
from libraries.path_matcher.utils import determine_latest_url
from versions.models import Version
from .managers import RenderedContentManager
class LatestPathMatchIndicator(models.IntegerChoices):
UNDETERMINED = 0, _("Undetermined")
DIRECT_MATCH = 1, _("Direct match exists")
CUSTOM_MATCH = 2, _("Determined by matcher")
class RenderedContent(TimeStampedModel):
"""Stores a copy of rendered content. Generally, this content is retrieved
from the S3 buckets and, if necessary, converted to HTML.
@@ -41,6 +51,16 @@ class RenderedContent(TimeStampedModel):
blank=True,
)
latest_path_matched_indicator = models.IntegerField(
choices=LatestPathMatchIndicator,
default=LatestPathMatchIndicator.UNDETERMINED,
null=False,
blank=False,
help_text=_("Indicates how the latest path should be determined."),
)
latest_docs_path = models.CharField(blank=True, default="")
latest_path_match_class = models.CharField(max_length=128, blank=True, default="")
objects = RenderedContentManager()
class Meta:
@@ -50,6 +70,23 @@ class RenderedContent(TimeStampedModel):
def __str__(self):
return self.cache_key
@property
def latest_path(self) -> str | None:
indicator = self.latest_path_matched_indicator
if indicator == LatestPathMatchIndicator.DIRECT_MATCH:
return re.sub(
r"static_content_[\d_]+/(?P<content_path>[^/]\S+)",
"doc/libs/latest/\g<content_path>",
self.cache_key,
)
elif indicator == LatestPathMatchIndicator.CUSTOM_MATCH:
return self.latest_docs_path
elif indicator == LatestPathMatchIndicator.UNDETERMINED:
return determine_latest_url(
self.cache_key.replace("static_content_", ""),
Version.objects.most_recent(),
)
def save(self, *args, **kwargs):
if isinstance(self.content_original, bytes):
self.content_original = self.content_original.decode("utf-8")
@@ -66,6 +103,12 @@ class SiteSettings(models.Model):
default="",
help_text="A comma-separated list of words to ignore in the release report wordcloud.", # noqa E501
)
rendered_content_replacement_start = models.DateTimeField(
null=True,
blank=True,
editable=False,
help_text="Set via RenderedContent admin action.",
)
class Meta:
constraints = [

View File

@@ -4,11 +4,14 @@ from celery import shared_task
from dateutil.parser import parse
from django.core.cache import caches
from django.utils import timezone
from core.asciidoc import convert_adoc_to_html
from libraries.path_matcher.utils import get_path_match_from_chain
from versions.models import Version
from .boostrenderer import get_content_from_s3
from .constants import RENDERED_CONTENT_BATCH_DELETE_SIZE
from .models import RenderedContent
from .models import RenderedContent, LatestPathMatchIndicator
logger = structlog.get_logger()
@@ -66,9 +69,27 @@ def refresh_content_from_s3(s3_key, cache_key):
@shared_task
def save_rendered_content(cache_key, content_type, content_html, last_updated_at=None):
"""Saves a RenderedContent object to database."""
match_result = get_path_match_from_chain(
cache_key.replace("static_content_", ""), Version.objects.most_recent()
)
indicator = (
LatestPathMatchIndicator.DIRECT_MATCH
if match_result.is_direct_equivalent
else LatestPathMatchIndicator.CUSTOM_MATCH
)
# we don't set the latest_docs_path if it's a direct match, for db size reduction
defaults = {
"content_type": content_type,
"content_html": content_html,
"latest_path_matched_indicator": indicator,
"latest_docs_path": (
match_result.latest_path if not match_result.is_direct_equivalent else None
),
"latest_path_match_class": match_result.matcher,
"modified": timezone.now(),
}
if last_updated_at:

View File

@@ -398,9 +398,7 @@ def test_static_content_blocks_direct_doc_paths(request_factory):
@pytest.mark.django_db
@override_settings(
CACHES=TEST_CACHES,
)
@override_settings(CACHES=TEST_CACHES)
def test_static_content_allows_non_direct_doc_paths(request_factory):
"""Test that non-direct doc paths are allowed and processed normally."""

View File

@@ -71,7 +71,7 @@ from .htmlhelper import (
add_canonical_link,
)
from .markdown import process_md
from .models import RenderedContent
from .models import RenderedContent, SiteSettings
from .tasks import (
clear_rendered_content_cache_by_cache_key,
clear_rendered_content_cache_by_content_type,
@@ -393,6 +393,7 @@ class BaseStaticContentTemplateView(TemplateView):
return {
"content": content_obj.content_html.encode("utf-8"),
"content_type": content_obj.content_type,
"updated": content_obj.modified,
}
except RenderedContent.DoesNotExist:
return None
@@ -607,6 +608,13 @@ class DocLibsTemplateView(VersionAlertMixin, BaseStaticContentTemplateView):
result = self.get_from_database(cache_key)
if not result and (result := self.get_from_s3(content_path)):
self.save_to_database(cache_key, result)
if result:
refresh_start = SiteSettings.load().rendered_content_replacement_start
last_updated = result.get("updated", timezone.now())
if refresh_start and last_updated < refresh_start:
refresh_content_from_s3.delay(
f"/archives/boost_{content_path}", cache_key
)
elif content_data := self.get_from_s3(content_path):
# structure is to allow for redirect/return to be handled in a unified way
result = {

View File

@@ -54,6 +54,7 @@
awscli
gdk
just
kubectl
opentofu
# frontend
nodejs_22 # matches Dockerfile, due for upgrade?

View File

@@ -8,6 +8,7 @@ from django.db.models.functions import Lower
from django.shortcuts import get_object_or_404
from django.urls import reverse
from core.models import RenderedContent
from libraries.constants import (
LATEST_RELEASE_URL_PATH_STR,
MASTER_RELEASE_URL_PATH_STR,
@@ -20,6 +21,7 @@ from libraries.models import (
Library,
LibraryVersion,
)
from libraries.path_matcher.utils import determine_latest_url
from versions.models import Version
logger = structlog.get_logger()
@@ -37,15 +39,33 @@ class VersionAlertMixin:
current_version_kwargs = self.kwargs.copy()
if url_name == "docs-libs-page":
alert_visible = not current_version_kwargs.get("content_path").startswith(
LATEST_RELEASE_URL_PATH_STR
)
allowed_types = getattr(self, "html_content_types", [])
if allowed_types and context.get("content_type") not in allowed_types:
return context
content_path = current_version_kwargs.get("content_path")
alert_visible = not content_path.startswith(LATEST_RELEASE_URL_PATH_STR)
if alert_visible:
content = RenderedContent.objects.filter(
cache_key=f"static_content_{content_path}"
).first()
version_alert_url = (
content.latest_path
if content
else determine_latest_url(
content_path,
Version.objects.most_recent(),
)
)
context["version_alert_url"] = f"/{version_alert_url}"
# TODO: this hack is here because the BoostVersionMixin only handles the
# libraries format (boost-1-90-0-beta-1) for betas, while this path uses
# 1_90_beta1 so we need to retrieve and set the selected_version
# specifically for this use, db slug = "boost-1-90-0-beta1"
# path_slug = 1_90_beta1
path_slug = current_version_kwargs.get("content_path").split("/")[0]
path_slug = content_path.split("/")[0]
if path_slug == LATEST_RELEASE_URL_PATH_STR:
context["selected_version"] = Version.objects.most_recent()
elif path_slug in ("master", "develop"):
@@ -59,7 +79,7 @@ class VersionAlertMixin:
"content_path": re.sub(
r"([_0-9a-zA-Z]+|master|develop)/(\S+)",
rf"{LATEST_RELEASE_URL_PATH_STR}/\2",
current_version_kwargs.get("content_path"),
content_path,
)
}
)
@@ -68,7 +88,10 @@ class VersionAlertMixin:
alert_visible = (
self.kwargs.get("version_slug") != LATEST_RELEASE_URL_PATH_STR
)
context["version_alert_url"] = reverse(url_name, kwargs=current_version_kwargs)
context["version_alert_url"] = reverse(
url_name, kwargs=current_version_kwargs
)
context["version_alert"] = alert_visible
return context

View File

@@ -0,0 +1,25 @@
from .base_path_matcher import BasePathMatcher, PathSegments, PathMatchResult
from .matchers import (
DirectMatcher,
LibsPathToLatestDirectMatcher,
LibsPathToLatestFallbackMatcher,
LibsToAntoraPathDirectMatcher,
DocHtmlBoostPathToFallbackMatcher,
DocHtmlPathToDirectMatcher,
DocHtmlBoostHtmlFallbackPathMatcher,
ToLibsLatestRootFallbackMatcher,
)
__all__ = [
BasePathMatcher,
PathSegments,
PathMatchResult,
DirectMatcher,
LibsPathToLatestDirectMatcher,
LibsPathToLatestFallbackMatcher,
LibsToAntoraPathDirectMatcher,
DocHtmlBoostPathToFallbackMatcher,
DocHtmlPathToDirectMatcher,
DocHtmlBoostHtmlFallbackPathMatcher,
ToLibsLatestRootFallbackMatcher,
]

View File

@@ -0,0 +1,156 @@
import re
from abc import ABCMeta, abstractmethod
from dataclasses import dataclass
from botocore.client import BaseClient
from botocore.exceptions import ClientError
from django.conf import settings
from versions.models import Version
import structlog
logger = structlog.get_logger(__name__)
@dataclass
class PathSegments:
library_name: str
content_path: str
@dataclass
class PathMatchResult:
is_direct_equivalent: bool
latest_path: str
matcher: str
class BasePathMatcher(metaclass=ABCMeta):
"""
Extended class names should follow the format of "(FromDescription)To(ToDescription)(Exact|Index)Matcher".
* ...Direct - should be used when we're going to return a direct matching file in the latest library docs
* ...Fallback - should be used when we're going return an index.htm(l) file in the latest library docs or otherwise
don't mind there being no exact match on the db/s3
Operation:
1. we check to see if the provided path matches the Extended class's path_re regex.
2. if no regex match we move to the next matcher in the chain
3. if regex matches we check the DB to see if a matching path is found and fallback to a checking S3 to see if
it just hasn't been cached.
4. if no match on db or s3 and the matcher is flagged as is_index_fallback=True we return that as a match
5. otherwise we then move on to the next matcher in the chain
class properties:
has_equivalent: default false, set to true if this class provides a direct equivalent path and no path translation
is needed
is_index_fallback: default false, set to true if this matcher accepts that the path may not actually exist.
path_re: returns a compiled regex() as documented on the property
"""
has_equivalent: bool = False
is_index_fallback: bool = False
@property
@abstractmethod
def path_re(self) -> re.Pattern[str]:
"""
returns a Pattern object with group names of 'library_name', 'content_path'
e.g. re.compile(rf"{BOOST_VERSION_REGEX}/libs/(?P<library_name>[\w]+)/(?P<content_path>\S+)")
All groups must be filled, don't necessarily need to be used in your generate_... methods.
"""
raise NotImplementedError
def __init__(self, latest_version: Version, s3_client: BaseClient):
self.latest_version: Version = latest_version
self.s3_client: BaseClient = s3_client
self.next: BasePathMatcher | None = None
self.latest_slug: str = self.latest_version.stripped_boost_url_slug
def set_next(self, next_matcher: "BasePathMatcher"):
self.next = next_matcher
@abstractmethod
def generate_latest_s3_path(self, path: str, segments: PathSegments) -> str:
"""
Generates a string to match the s3/cache_key path which will be checked for existence,
returns something similar to:
static_content_1_84_0/libs/algorithm/doc/html/index.html
static_content_1_84_0/doc/html/accumulators.html
"""
raise NotImplementedError
@abstractmethod
def generate_latest_url(self, path_data: PathSegments) -> str:
"""returns the actual latest url the user should be presented with"""
raise NotImplementedError
def determine_match(self, path: str) -> PathMatchResult:
if (details := self.get_group_items(path)) is not None:
if self.confirm_path_exists(path, details) or self.is_index_fallback:
logger.debug(f"regex match on {self.get_class_name()}")
return self.get_result(details)
logger.debug(f"no regex match determined on {self.get_class_name()}")
if self.next:
return self.next.determine_match(path)
else:
msg = f"No redirect path match for {path=}"
logger.warning(msg)
raise ValueError(msg)
def get_group_items(self, path: str) -> PathSegments | None:
"""
returns tuple (library_name, content_path)
"""
if src_match := self.path_re.match(path):
group_values = src_match.groupdict()
library_name = group_values.get("library_name")
content_path = group_values.get("content_path")
if all([library_name, content_path]):
return PathSegments(library_name, content_path)
return None
def confirm_path_exists(self, path: str, segments: PathSegments) -> bool:
s3_path = self.generate_latest_s3_path(path, segments)
logger.debug(f"{s3_path=}")
return (
self.confirm_db_path_exists(s3_path)
or self.confirm_s3_path_exists(s3_path)
) # fmt: skip
def confirm_s3_path_exists(self, path: str) -> bool:
# s3 stored, e.g. archives/boost_1_90_0/doc/html/accumulators.html
archive_key = path.replace("static_content_", "archives/boost_")
logger.debug(f"Checking S3 for {path=} ~ {archive_key=} ")
try:
bucket_name = settings.STATIC_CONTENT_BUCKET_NAME
self.s3_client.head_object(Bucket=bucket_name, Key=archive_key)
logger.debug(f"S3 key exists: {path}")
return True
except ClientError:
logger.debug(f"S3 key does not exist: {path}")
return False
@staticmethod
def confirm_db_path_exists(path: str) -> bool:
from core.models import RenderedContent
logger.debug(f"{path=}")
if is_path := RenderedContent.objects.filter(cache_key=path).exists():
logger.debug(f"RenderedContent match {is_path=}")
return True
return False
def get_class_name(self):
return self.__class__.__name__
def get_result(self, path_data: PathSegments) -> PathMatchResult:
return PathMatchResult(
self.has_equivalent,
self.generate_latest_url(path_data),
self.get_class_name(),
)
def handle(self, test_path: str) -> PathMatchResult:
return self.determine_match(test_path)

View File

@@ -0,0 +1,222 @@
import os
import re
from core.constants import BOOST_VERSION_REGEX
from libraries.constants import LATEST_RELEASE_URL_PATH_STR
from libraries.path_matcher import BasePathMatcher, PathSegments
class DirectMatcher(BasePathMatcher):
# pseudo-example 1_84_0/*/CXX11.html
# pseudo-expected s3 dest = static_content_1_79_0/*/CXX11.html e.g. 'static_content_1_90_0/doc/html/accumulators.html'
# pseudo-expected final path = doc/libs/latest/*/CXX11.html
has_equivalent = True
path_re = re.compile(
rf"{BOOST_VERSION_REGEX}/(?P<content_path>(?P<library_name>\S+))"
)
def generate_latest_s3_path(self, path: str, segments: PathSegments):
return "/".join([f"static_content_{self.latest_slug}", segments.content_path])
def generate_latest_url(self, path_data: PathSegments) -> str:
return os.path.sep.join(
["doc", "libs", LATEST_RELEASE_URL_PATH_STR, path_data.content_path]
)
class LibsPathToLatestDirectMatcher(BasePathMatcher):
# example 1_84_0/libs/algorithm/doc/html/algorithm/CXX11.html
# expected s3 dest = static_content_1_79_0/libs/algorithm/doc/html/algorithm/CXX11.html
# expected final path = doc/libs/latest/libs/algorithm/doc/html/algorithm/CXX11.html
has_equivalent = False
path_re = re.compile(
rf"{BOOST_VERSION_REGEX}/libs/(?P<library_name>[\w]+)/(?P<content_path>\S+)"
)
def generate_latest_s3_path(self, path: str, segments: PathSegments):
return "/".join(
[
f"static_content_{self.latest_slug}",
"libs",
segments.library_name,
segments.content_path,
]
)
def generate_latest_url(self, path_data: PathSegments) -> str:
return os.path.sep.join(
[
"doc",
"libs",
LATEST_RELEASE_URL_PATH_STR,
"libs",
path_data.library_name,
path_data.content_path,
]
)
class LibsPathToLatestFallbackMatcher(BasePathMatcher):
# example 1_78_0/libs/algorithm/doc/html/header/boost/algorithm/string_regex_hpp.html
# expected s3 dest = static_content_1_79_0/libs/algorithm/index.html
# expected final path = doc/libs/latest/libs/algorithm/index.html
path_re = re.compile(
rf"{BOOST_VERSION_REGEX}/libs/(?P<library_name>[\w]+)/(?P<content_path>\S+)"
)
is_index_fallback = True
def generate_latest_s3_path(self, path, segments: PathSegments):
return "/".join(
[
f"static_content_{self.latest_slug}",
"libs",
segments.library_name,
"index.html",
]
)
def generate_latest_url(self, path_data: PathSegments) -> str:
return os.path.sep.join(
[
"doc",
"libs",
LATEST_RELEASE_URL_PATH_STR,
"libs",
path_data.library_name,
"index.html",
]
)
class LibsToAntoraPathDirectMatcher(BasePathMatcher):
# example 1_85_0/libs/url/doc/html/url/urls/segments.html
# expected s3 dest = static_content_1_79_0/doc/antora/url/urls/segments.html
# expected final dest = doc/libs/latest/doc/antora/url/index.html
# Only the boost urls library redirects to antora for now so the regex in use
# is tightly limited to that. The commented path_re will work when this is
# needed to be more generic, all other things being equal.
# path_re = re.compile(rf"{BOOST_VERSION_REGEX}/libs/(?P<library_name>[\w]+)/(?P<content_path>\S+)")
path_re = re.compile(
rf"{BOOST_VERSION_REGEX}/libs/(?P<library_name>url)/(?P<content_path>\S+)"
)
def generate_latest_s3_path(self, path: str, segments: PathSegments) -> str:
# library name is in content_path
return "/".join(
[
f"static_content_{self.latest_slug}",
"doc",
"antora",
segments.content_path.replace("doc/html/", ""),
]
)
def generate_latest_url(self, path_data: PathSegments) -> str:
# library name is in content_path
return os.path.sep.join(
[
"doc",
"libs",
LATEST_RELEASE_URL_PATH_STR,
"doc",
"antora",
path_data.content_path.replace("doc/html/", ""),
]
)
class DocHtmlBoostPathToFallbackMatcher(BasePathMatcher):
# example 1_64_0/doc/html/boost_process/acknowledgements.html
# expected s3 dest = static_content_1_79_0/libs/process/index.html
# expected final path = doc/libs/latest/libs/process/index.html
path_re = re.compile(
rf"{BOOST_VERSION_REGEX}/doc/html/boost_(?P<library_name>[\w]+)/(?P<content_path>\S+)"
)
is_index_fallback = True
def generate_latest_s3_path(self, path: str, segments: PathSegments) -> str:
return "/".join(
[
f"static_content_{self.latest_slug}",
"libs",
segments.library_name,
"index.html",
]
)
def generate_latest_url(self, path_data: PathSegments) -> str:
return os.path.sep.join(
[
"doc",
"libs",
LATEST_RELEASE_URL_PATH_STR,
"libs",
path_data.library_name,
"index.html",
]
)
class DocHtmlPathToDirectMatcher(BasePathMatcher):
# example = 1_35_0/doc/html/interprocess.html
# expected s3 dest = static_content_1_79_0/doc/html/interprocess.html
# expected final path = doc/libs/latest/doc/html/interprocess.html
path_re = re.compile(
rf"{BOOST_VERSION_REGEX}/(?P<content_path>doc/html/(?!boost_)(?P<library_name>[\w]+.html))"
)
def generate_latest_s3_path(self, path: str, segments: PathSegments) -> str:
return "/".join([f"static_content_{self.latest_slug}", segments.content_path])
def generate_latest_url(self, path_data: PathSegments) -> str:
return os.path.sep.join(
["doc", "libs", LATEST_RELEASE_URL_PATH_STR, path_data.content_path]
)
class DocHtmlBoostHtmlFallbackPathMatcher(BasePathMatcher):
# example 1_34_0/doc/html/boost_math.html
# expected s3 dest = static_content_1_79_0/libs/math/doc/html/index.html
# expected final path = doc/libs/latest/libs/math/doc/html/index.html
path_re = re.compile(
rf"{BOOST_VERSION_REGEX}/(?P<content_path>doc/html)/boost_(?P<library_name>[\w]+).html"
)
is_index_fallback = True
def generate_latest_s3_path(self, path: str, segments: PathSegments) -> str:
return "/".join(
[
f"static_content_{self.latest_slug}",
"libs",
segments.library_name,
segments.content_path,
"index.html",
]
)
def generate_latest_url(self, path_data: PathSegments) -> str:
return os.path.sep.join(
[
"doc",
"libs",
LATEST_RELEASE_URL_PATH_STR,
"libs",
path_data.library_name,
path_data.content_path,
"index.html",
]
)
class ToLibsLatestRootFallbackMatcher(BasePathMatcher):
# any other path not matched will arrive here, values inaccurate, set as needed
path_re = re.compile(r"(?P<content_path>(?P<library_name>\S+))")
is_index_fallback = True
def generate_latest_s3_path(self, path: str, segments: PathSegments) -> str:
return "/".join([f"static_content_{self.latest_slug}", "libs"])
def generate_latest_url(self, path_data: PathSegments) -> str:
# trailing slash here to save a redirect
return f"libraries/{LATEST_RELEASE_URL_PATH_STR}/"

View File

@@ -0,0 +1,42 @@
from libraries.path_matcher.base_path_matcher import PathMatchResult
from libraries.path_matcher.matchers import (
DirectMatcher,
LibsPathToLatestDirectMatcher,
LibsPathToLatestFallbackMatcher,
LibsToAntoraPathDirectMatcher,
DocHtmlBoostPathToFallbackMatcher,
DocHtmlPathToDirectMatcher,
DocHtmlBoostHtmlFallbackPathMatcher,
ToLibsLatestRootFallbackMatcher,
)
from libraries.utils import get_s3_client
from versions.models import Version
def get_path_match_from_chain(url: str, latest_version: Version) -> PathMatchResult:
s3_client = get_s3_client()
# matcher chain in order
matcher_classes = [
DirectMatcher,
LibsPathToLatestDirectMatcher,
LibsToAntoraPathDirectMatcher,
LibsPathToLatestFallbackMatcher,
DocHtmlBoostPathToFallbackMatcher,
DocHtmlPathToDirectMatcher,
DocHtmlBoostHtmlFallbackPathMatcher,
ToLibsLatestRootFallbackMatcher,
]
matchers = [
matcher_class(latest_version, s3_client) for matcher_class in matcher_classes
]
for current, next_matcher in zip(matchers, matchers[1:]):
current.set_next(next_matcher)
result = matchers[0].handle(test_path=url)
return result
def determine_latest_url(url: str, latest_version: Version) -> str:
match_result = get_path_match_from_chain(url, latest_version)
return match_result.latest_path

View File

View File

@@ -0,0 +1,233 @@
import pytest
from unittest.mock import MagicMock, patch
from libraries.path_matcher import (
BasePathMatcher,
DirectMatcher,
LibsPathToLatestDirectMatcher,
LibsPathToLatestFallbackMatcher,
LibsToAntoraPathDirectMatcher,
DocHtmlBoostPathToFallbackMatcher,
DocHtmlPathToDirectMatcher,
DocHtmlBoostHtmlFallbackPathMatcher,
ToLibsLatestRootFallbackMatcher,
)
from libraries.path_matcher.utils import get_path_match_from_chain, determine_latest_url
test_params = [
(
DirectMatcher,
True, # confirm_path_exists result
False, # confirm s3 path exists result
"1_84_0/libs/algorithm/doc/html/algorithm/CXX11.html", # src path
"static_content_1_79_0/libs/algorithm/doc/html/algorithm/CXX11.html", # expected s3 key
True, # is direct equivalent
"doc/libs/latest/libs/algorithm/doc/html/algorithm/CXX11.html", # expected final path
),
(
LibsPathToLatestDirectMatcher,
True, # confirm_path_exists result
False, # confirm s3 path exists result
"1_84_0/libs/algorithm/doc/html/algorithm/CXX11.html", # src path
"static_content_1_79_0/libs/algorithm/doc/html/algorithm/CXX11.html", # expected s3 key
False, # is not a direct equivalent
"doc/libs/latest/libs/algorithm/doc/html/algorithm/CXX11.html", # expected final path
),
(
LibsPathToLatestFallbackMatcher,
False, # confirm_path_exists result
False, # confirm s3 path exists result
"1_78_0/libs/algorithm/doc/html/header/boost/algorithm/string_regex_hpp.html", # src path
"static_content_1_79_0/libs/algorithm/index.html", # expected s3 key
False,
"doc/libs/latest/libs/algorithm/index.html", # expected final path
),
(
LibsToAntoraPathDirectMatcher,
True, # confirm_path_exists result
False, # confirm s3 path exists result
"1_85_0/libs/url/doc/html/url/urls/segments.html", # src path
"static_content_1_79_0/doc/antora/url/urls/segments.html", # expected s3 key
False,
"doc/libs/latest/doc/antora/url/urls/segments.html", # expected final path
),
(
DocHtmlBoostPathToFallbackMatcher,
True, # confirm_path_exists result
False, # confirm s3 path exists result
"1_64_0/doc/html/boost_process/acknowledgements.html", # src path
"static_content_1_79_0/libs/process/index.html", # expected s3 key
False,
"doc/libs/latest/libs/process/index.html", # expected final path
),
(
DocHtmlPathToDirectMatcher,
False, # confirm_path_exists result
True, # confirm s3 path exists result
"1_35_0/doc/html/interprocess.html", # src path
"static_content_1_79_0/doc/html/interprocess.html", # expected s3 key
False,
"doc/libs/latest/doc/html/interprocess.html", # expected final path
),
(
DocHtmlBoostHtmlFallbackPathMatcher,
False, # confirm_path_exists result
True, # confirm s3 path exists result
"1_34_0/doc/html/boost_math.html", # src path
"static_content_1_79_0/libs/math/doc/html/index.html", # expected s3 key
False,
"doc/libs/latest/libs/math/doc/html/index.html", # expected final path
),
(
ToLibsLatestRootFallbackMatcher,
False,
False,
"1_33_1/doc/html/BOOST_VARIANT_LIMIT_TYPES.html",
"static_content_1_79_0/libs",
False,
"libraries/latest/",
),
]
@pytest.mark.parametrize(
"matcher_class,db_path_result,s3_path_result,test_path,expected_s3_key,is_direct_equivalent,expected_final_path",
test_params,
)
def test_libs_path_to_latest_exact_db_path_exists(
matcher_class,
db_path_result,
s3_path_result,
test_path,
expected_s3_key,
is_direct_equivalent,
expected_final_path,
monkeypatch,
version,
):
monkeypatch.setattr(
BasePathMatcher, "confirm_db_path_exists", lambda x, y: db_path_result
)
monkeypatch.setattr(
BasePathMatcher, "confirm_s3_path_exists", lambda x, y: s3_path_result
)
mock_s3_client = MagicMock()
matcher = matcher_class(version, mock_s3_client)
with patch.object(
matcher, "confirm_db_path_exists", wraps=matcher.confirm_db_path_exists
) as spy:
pm = matcher.determine_match(test_path)
spy.assert_called_once_with(expected_s3_key)
assert pm.is_direct_equivalent == is_direct_equivalent
assert pm.latest_path == expected_final_path
chain_data = [
(
"1_84_0/libs/algorithm/doc/html/algorithm/CXX11.html",
"doc/libs/latest/libs/algorithm/doc/html/algorithm/CXX11.html",
True,
DirectMatcher,
),
(
"1_84_0/libs/algorithm/doc/html/algorithm/CXX11.html",
"doc/libs/latest/libs/algorithm/doc/html/algorithm/CXX11.html",
True,
LibsPathToLatestDirectMatcher,
),
(
"1_84_0/libs/algorithm/doc/html/algorithm/nope.html",
"doc/libs/latest/libs/algorithm/index.html",
False,
LibsPathToLatestFallbackMatcher,
),
(
"1_85_0/libs/url/doc/html/url/urls/segments.html",
"doc/libs/latest/doc/antora/url/urls/segments.html",
True,
LibsToAntoraPathDirectMatcher,
),
(
"1_35_0/doc/html/interprocess.html",
"doc/libs/latest/doc/html/interprocess.html",
True,
DocHtmlPathToDirectMatcher,
),
(
"1_64_0/doc/html/boost_process/acknowledgements.html",
"doc/libs/latest/libs/process/index.html",
True,
DocHtmlBoostPathToFallbackMatcher,
),
(
"1_34_0/doc/html/boost_math.html",
"doc/libs/latest/libs/math/doc/html/index.html",
False,
DocHtmlBoostHtmlFallbackPathMatcher,
),
(
"1_XX_Y/does/not/exist",
"libraries/latest/",
False,
ToLibsLatestRootFallbackMatcher,
),
]
@pytest.mark.parametrize(
"test_url,expected_match,db_path_exists,matching_class", chain_data
)
def test_handoff(
test_url, expected_match, db_path_exists, matching_class, monkeypatch, version
):
# default deny
monkeypatch.setattr(BasePathMatcher, "confirm_db_path_exists", lambda x, y: False)
monkeypatch.setattr(BasePathMatcher, "confirm_s3_path_exists", lambda x, y: False)
# Using match_class here because for the likes of the antora case we want to have it match on
# LibsToAntoraPathDirectMatcher specifically, not an earlier matching regex where the key would not be in db/s3.
# Same reason we use match_class(version, mock_s3_client).get_class_name() below rather than a string name for the class
monkeypatch.setattr(
matching_class, "confirm_db_path_exists", lambda x, y: db_path_exists
)
match_result = get_path_match_from_chain(test_url, latest_version=version)
mock_s3_client = MagicMock()
assert match_result.latest_path == expected_match
assert (
match_result.matcher == matching_class(version, mock_s3_client).get_class_name()
)
def test_determine_latest_url(monkeypatch, version):
monkeypatch.setattr(
DocHtmlBoostHtmlFallbackPathMatcher, "confirm_db_path_exists", lambda x, y: True
)
test_url = "1_34_0/doc/html/boost_math.html"
expected_latest_url = "doc/libs/latest/libs/math/doc/html/index.html"
assert determine_latest_url(test_url, version) == expected_latest_url
def test_s3_archive_key_prefix(version):
"""Test that the S3 archive key correctly contains the 'archives/boost_' prefix"""
mock_s3_client = MagicMock()
test_path = "static_content_1_84_0/libs/algorithm/doc/html/algorithm/CXX11.html"
expected_archive_key = (
"archives/boost_1_84_0/libs/algorithm/doc/html/algorithm/CXX11.html"
)
# Create a matcher instance with the mock s3 client
matcher = DirectMatcher(version, mock_s3_client)
matcher.confirm_s3_path_exists(test_path)
mock_s3_client.head_object.assert_called_once()
call_kwargs = mock_s3_client.head_object.call_args[1]
assert call_kwargs["Key"] == expected_archive_key
assert call_kwargs["Key"].startswith("archives/")
assert "archives/boost_" in call_kwargs["Key"]

View File

@@ -3,12 +3,16 @@ import string
import re
from itertools import islice
import boto3
import structlog
import tempfile
from datetime import datetime, timezone
from botocore.client import BaseClient
from dateutil.relativedelta import relativedelta
from dateutil.parser import ParserError, parse
from django.conf import settings
from django.utils.text import slugify
from libraries.constants import (
@@ -377,3 +381,12 @@ def generate_release_report_filename(version_slug: str, published_format: bool =
filename_data.append(datetime.now(timezone.utc).isoformat())
filename = f"{'-'.join(filename_data)}.pdf"
return filename
def get_s3_client() -> BaseClient:
return boto3.client(
"s3",
aws_access_key_id=settings.STATIC_CONTENT_AWS_ACCESS_KEY_ID,
aws_secret_access_key=settings.STATIC_CONTENT_AWS_SECRET_ACCESS_KEY,
region_name=settings.STATIC_CONTENT_REGION,
)

View File

@@ -1,3 +1,3 @@
-c requirements.txt
django-debug-toolbar
pydevd-pycharm==253.29346.142 # pinned to appropriate version for current pycharm
pydevd-pycharm==253.29346.308 # pinned to appropriate version for current pycharm

View File

@@ -10,7 +10,7 @@ django==6.0.2
# django-debug-toolbar
django-debug-toolbar==6.2.0
# via -r ./requirements-dev.in
pydevd-pycharm==253.29346.142
pydevd-pycharm==253.29346.308
# via -r ./requirements-dev.in
sqlparse==0.5.5
# via

View File

@@ -64,6 +64,7 @@ pre-commit
pytest
pytest-cov
pytest-django
pytest-mock
responses
# Packaging

View File

@@ -433,10 +433,13 @@ pytest==9.0.2
# -r ./requirements.in
# pytest-cov
# pytest-django
# pytest-mock
pytest-cov==7.0.0
# via -r ./requirements.in
pytest-django==4.11.1
# via -r ./requirements.in
pytest-mock==3.15.1
# via -r ./requirements.in
python-dateutil==2.9.0.post0
# via
# -r ./requirements.in

View File

@@ -184,13 +184,7 @@ download_latest_db_dump() {
docker compose exec db bash -c "pg_restore -U $DB_USER -d $DB_NAME -v --no-owner --no-privileges /tmp/$DUMP_FILENAME"
# apply any migrations newer than our dumped database
docker compose exec web bash -c "./manage.py migrate"
# update the database to delete all rows from socialaccount_social app, which need to be configured differently locally
echo "Deleting all rows from socialaccount_socialapp table and setting fake passwords..."
docker compose exec web bash -c "./manage.py shell -c 'from allauth.socialaccount.models import SocialApp; SocialApp.objects.all().delete()'"
just manage "set_fake_passwords --password=test"
echo 'from django.contrib.auth import get_user_model; u=get_user_model().objects.get(email="superadmin@boost.org"); u.set_password("foobarone"); u.save()' | docker compose exec -T web python manage.py shell
echo "Database restored successfully from $DUMP_FILENAME"
return 0
}
@@ -199,6 +193,11 @@ if [ "${skip_web_option:-}" != "yes" ]; then
echo "Failed to download and restore latest database dump";
exit 1;
}
docker compose exec web bash -c "DJANGO_SUPERUSER_USERNAME=superadmin DJANGO_SUPERUSER_EMAIL=superadmin@boost.org DJANGO_SUPERUSER_PASSWORD=foobarone ./manage.py createsuperuser --noinput" || true
# update the database to delete all rows from socialaccount_social app, which need to be configured differently locally
echo "Deleting all rows from socialaccount_socialapp table and setting fake passwords..."
docker compose exec web bash -c "./manage.py shell -c 'from allauth.socialaccount.models import SocialApp; SocialApp.objects.all().delete()'"
just manage "set_fake_passwords --password=test"
fi
if [ "${skip_lists_option:-}" != "yes" ]; then

View File

@@ -3,6 +3,13 @@
{% block object-tools-items %}
{{ block.super }}
{% if has_start_content_refresh %}
<li>
<a href="{% url 'admin:core_renderedcontent_start_content_refresh' %}" class="addlink">
{% trans "Start Content Refresh" %}
</a>
</li>
{% endif %}
{% if has_delete_all %}
<li>
<a href="{% url 'admin:core_renderedcontent_delete_all' %}" class="deletelink">

View File

@@ -0,0 +1,33 @@
{% extends "admin/base_site.html" %}
{% load i18n static %}
{% block extrahead %}
{{ block.super }}
<script src="{% static 'admin/js/cancel.js' %}" async></script>
{% endblock %}
{% block bodyclass %}{{ block.super }} app-core model-renderedcontent delete-confirmation{% endblock %}
{% block breadcrumbs %}
<div class="breadcrumbs">
<a href="{% url 'admin:index' %}">{% translate 'Home' %}</a>
&rsaquo; <a href="{% url 'admin:core_renderedcontent_changelist' %}">{% translate 'Rendered Contents' %}</a>
&rsaquo; {% translate 'Start Content Refresh' %}
</div>
{% endblock %}
{% block content %}
<p>"Are you sure you want to mark the content refresh start time?</p>
<p>
This will set the rendered_content_replacement_start timestamp in Site Settings to the current time after
which page loads will trigger a content refresh. Note: the first view in this case will not show the refreshed
content.
</p>
<form method="post">{% csrf_token %}
<div>
<input type="hidden" name="post" value="yes">
<input type="submit" value="Yes, I'm sure">
<a href="#" class="button cancel-link">No, take me back</a>
</div>
</form>
{% endblock %}