Add libraries redirect (#1770)

2026-01-19 04:42:17 +00:00 · 2025-05-12 12:21:07 -04:00
parent d0dd59add6
commit ff723c4a43
8 changed files with 281 additions and 27 deletions
--- a/config/settings.py
+++ b/config/settings.py
@@ -388,20 +388,11 @@ if LOCAL_DEVELOPMENT:
        ]


-# Allow Allauth to use HTTPS when deployed but HTTP for local dev
-SECURE_PROXY_SSL_HEADER_NAME = env("SECURE_PROXY_SSL_HEADER_NAME", default=None)
-SECURE_PROXY_SSL_HEADER_VALUE = env("SECURE_PROXY_SSL_HEADER_VALUE", default=None)
-SECURE_SSL_REDIRECT = env("SECURE_SSL_REDIRECT", default=False)
-
 if not LOCAL_DEVELOPMENT:
    ACCOUNT_DEFAULT_HTTP_PROTOCOL = "https"
-
-if all(
-    [SECURE_PROXY_SSL_HEADER_NAME, SECURE_PROXY_SSL_HEADER_VALUE, SECURE_SSL_REDIRECT]
-):
    SECURE_PROXY_SSL_HEADER = (
-        SECURE_PROXY_SSL_HEADER_NAME,
-        SECURE_PROXY_SSL_HEADER_VALUE,
+        "HTTP_X_FORWARDED_PROTO",
+        ACCOUNT_DEFAULT_HTTP_PROTOCOL,
    )

 # Admin banner configuration
--- a/config/urls.py
+++ b/config/urls.py
@@ -32,6 +32,7 @@ from core.views import (
    StaticContentTemplateView,
    UserGuideTemplateView,
    BoostDevelopmentView,
+    ModernizedDocsView,
 )
 from libraries.api import LibrarySearchView
 from libraries.views import (
@@ -345,6 +346,11 @@ urlpatterns = (
        ),
        # Internal functions
        path("internal/clear-cache/", ClearCacheView.as_view(), name="clear-cache"),
+        path(
+            "internal/modernized-docs/<path:content_path>",
+            ModernizedDocsView.as_view(),
+            name="modernized_docs",
+        ),
    ]
    + [
        # Redirects for old boost.org urls.
--- a/core/htmlhelper.py
+++ b/core/htmlhelper.py
@@ -1,7 +1,8 @@
 import re

-from bs4 import BeautifulSoup, Comment
+from bs4 import BeautifulSoup, Comment, Tag
 from django.template.loader import render_to_string
+from django.templatetags.static import static

 from core.boostrenderer import get_body_from_html
 from core.constants import SourceDocType
@@ -325,6 +326,71 @@ def remove_library_boostlook(soup):
    return soup


+def modernize_preprocessor_docs(soup: BeautifulSoup) -> tuple[BeautifulSoup, bool]:
+    """Special case handling for Boost.Preprocessor docs.
+
+    Returns a two-tuple:
+        0. BeautifulSoup object
+        1. Boolean indicating whether framesets were present (and modified).
+    """
+    # Only transform if <frameset> is present
+    if not soup.find("frameset"):
+        return soup, False
+
+    # Create a new empty soup document
+    new_soup = BeautifulSoup("", "html.parser")
+    html = new_soup.new_tag("html", lang="en")
+    head = new_soup.new_tag("head")
+    body = new_soup.new_tag("body")
+    html.append(head)
+    html.append(body)
+    new_soup.append(html)
+
+    page_title = soup.title.string if soup.title else "Boost"
+    head.append(new_soup.new_tag("meta", charset="utf-8"))
+    title_tag = new_soup.new_tag("title")
+    title_tag.string = page_title
+    head.append(title_tag)
+    css_link = BeautifulSoup(
+        f'<link rel="stylesheet" href={static("css/preprocessing_fixes.css")} type="text/css">'
+    )
+    head.append(css_link)
+
+    # Add HTMX
+    htmx_script = new_soup.new_tag("script", src="https://unpkg.com/htmx.org@1.9.2")
+    head.append(htmx_script)
+
+    def _add_htmx_loading_div(_soup: BeautifulSoup, html_id: str, filename: str) -> Tag:
+        div = _soup.new_tag(
+            "div",
+            id=html_id,
+            **{
+                "hx-get": filename,
+                "hx-trigger": "load",
+                "hx-select": "body > *",
+                "hx-swap": "innerHTML",
+                "hx-boost": "true",
+            },
+        )
+        div.string = "Loading..."
+        return div
+
+    header = _add_htmx_loading_div(new_soup, "header", "top.html")
+    body.append(header)
+
+    # Wrapper for sidebar + main
+    wrapper = new_soup.new_tag("div", attrs={"class": "content-wrapper"})
+    sidebar = _add_htmx_loading_div(new_soup, "sidebar", "contents.html")
+    wrapper.append(sidebar)
+
+    main = _add_htmx_loading_div(new_soup, "main", "title.html")
+    wrapper.append(main)
+
+    body.append(wrapper)
+
+    return new_soup, True
+
+
 def format_nested_lists(soup):
    """Flattens nested lists"""
    try:
--- a/core/views.py
+++ b/core/views.py
@@ -1,5 +1,6 @@
 import os
 import re
+from urllib.parse import urljoin

 import structlog
 from bs4 import BeautifulSoup
@@ -38,6 +39,7 @@ from .htmlhelper import (
    modernize_legacy_page,
    convert_name_to_id,
    remove_library_boostlook,
+    modernize_preprocessor_docs,
 )
 from .markdown import process_md
 from .models import RenderedContent
@@ -428,23 +430,35 @@ class StaticContentTemplateView(BaseStaticContentTemplateView):
        return content


+# possible library versions are: boost_1_53_0_beta1, 1_82_0, 1_55_0b1
+BOOST_LIB_PATH_RE = re.compile(r"^(boost_){0,1}([0-9_]*[0-9]+[^/]*)/(.*)")
+
+
+def normalize_boost_doc_path(content_path: str) -> str:
+    if content_path.startswith(LATEST_RELEASE_URL_PATH_STR):
+        version = Version.objects.most_recent()
+        content_path = content_path.replace(
+            f"{LATEST_RELEASE_URL_PATH_STR}/", f"{version.stripped_boost_url_slug}/"
+        )
+    # Special case for Boost.Process
+    if content_path == "1_88_0/doc/html/process.html":
+        content_path = "1_88_0/libs/process/doc/html/index.html"
+
+    # Match versioned library paths
+    matches = BOOST_LIB_PATH_RE.match(content_path)
+    if matches:
+        groups = matches.groups()
+        if groups and not groups[0]:
+            content_path = f"boost_{content_path}"
+
+    return f"/archives/{content_path}"
+
+
 class DocLibsTemplateView(BaseStaticContentTemplateView):
-    # possible library versions are: boost_1_53_0_beta1, 1_82_0, 1_55_0b1
-    boost_lib_path_re = re.compile(r"^(boost_){0,1}([0-9_]*[0-9]+[^/]*)/(.*)")
    # is_iframe_view = False

    def get_from_s3(self, content_path):
-        # Fix special case
-        if content_path == "1_88_0/doc/html/process.html":
-            content_path = "1_88_0/libs/process/doc/html/index.html"
-        # perform URL matching/mapping, perhaps extract the version from content_path
-        matches = self.boost_lib_path_re.match(content_path)
-        if matches:
-            groups = matches.groups()
-            if groups and not groups[0]:
-                content_path = f"boost_{content_path}"
-
-        legacy_url = f"/archives/{content_path}"
+        legacy_url = normalize_boost_doc_path(content_path)
        return super().get_from_s3(legacy_url)

    def process_content(self, content):
@@ -489,7 +503,22 @@ class DocLibsTemplateView(BaseStaticContentTemplateView):
            soup.find("head").append(
                soup.new_tag("script", src=f"{STATIC_URL}js/theme_handling.js")
            )
+            if "libs/preprocessor" in self.request.path:
+                # Temporarily only run this on the preprocessor docs
+                soup, should_use_modernized_iframe = modernize_preprocessor_docs(soup)
+            else:
+                should_use_modernized_iframe = False
            context["content"] = soup.prettify()
+            if should_use_modernized_iframe:
+                modernized_url = reverse(
+                    "modernized_docs",
+                    kwargs={"content_path": self.kwargs["content_path"]},
+                )
+                return render_to_string(
+                    "docsiframe.html",
+                    {"iframe_url": modernized_url},
+                    request=self.request,
+                )
        else:
            # Potentially pass version if needed for HTML modification.
            # We disable plausible to prevent redundant 'about:srcdoc' tracking,
@@ -556,6 +585,111 @@ class UserGuideTemplateView(BaseStaticContentTemplateView):
        return render_to_string("docsiframe.html", context, request=self.request)


+class ModernizedDocsView(View):
+    """Special case view for handling sub-pages of the Boost.Preprocessor docs."""
+
+    def get(self, request, content_path):
+        soup, response = self._load_and_transform_html(content_path, request)
+        if response:
+            return response  # Early return for non-HTML content
+
+        self._inject_base_tag(soup, request)
+        self._rewrite_links(soup, content_path)
+        self._inject_script(soup)
+
+        html = str(soup)
+        return HttpResponse(html, content_type="text/html")
+
+    def _load_and_transform_html(self, content_path, request):
+        legacy_url = normalize_boost_doc_path(content_path)
+        try:
+            result = get_content_from_s3(key=legacy_url)
+        except ContentNotFoundException:
+            raise Http404("Not found")
+
+        content = result.get("content")
+        content_type = result.get("content_type", "")
+
+        if not content:
+            return None, HttpResponse(
+                content or "", content_type=content_type or "text/plain"
+            )
+
+        html = content.decode(chardet.detect(content)["encoding"])
+
+        if content_type.startswith("text/x-c"):
+            soup = self._process_cpp_code(html)
+            return None, HttpResponse(soup, content_type="text/plain")
+
+        soup = BeautifulSoup(html, "html.parser")
+        soup = convert_name_to_id(soup)
+        soup, _ = modernize_preprocessor_docs(soup)
+        return soup, None
+
+    def _process_cpp_code(self, html):
+        lines = html.strip().splitlines()
+        code_block = "\n".join(lines)
+        soup = BeautifulSoup("", "html.parser")
+        html = soup.new_tag("html")
+        head = soup.new_tag("head")
+        body = soup.new_tag("body")
+        code = soup.new_tag("code", **{"class": "language-cpp"})
+        code.string = code_block
+        pre = soup.new_tag("pre")
+        pre.append(code)
+        body.append(pre)
+        html.append(head)
+        html.append(body)
+        soup.append(html)
+        return soup
+
+    def _inject_base_tag(self, soup, request):
+        if soup.head and not soup.head.find("base"):
+            base_path = request.path.rsplit("/", 1)[0] + "/"
+            base_href = urljoin(request.build_absolute_uri("/"), base_path.lstrip("/"))
+            if not settings.LOCAL_DEVELOPMENT:
+                # Slightly hacky, but it's tricky to get this right inside the iframe
+                base_href = base_href.replace("http://", "https://")
+            base_tag = soup.new_tag("base", href=base_href)
+            soup.head.insert(0, base_tag)
+
+    def _inject_script(self, soup):
+        script_tag = soup.new_tag(
+            "script", src=f"{settings.STATIC_URL}js/theme_handling.js"
+        )
+        if soup.head:
+            soup.head.append(script_tag)
+
+    def _rewrite_links(self, soup, content_path):
+        """Turn anchor tags meant to use framesets into htmx-driven links"""
+
+        def _set_htmx_attrs(tag, _target):
+            tag["hx-target"] = _target
+            tag["hx-swap"] = "innerHTML show:none"
+
+        base_content_path = content_path.rsplit("/", 1)[0] + "/"
+        for a in soup.find_all("a"):
+            target = a.get("target")
+            href = a.get("href", "")
+
+            if target in ("_top", "_parent"):
+                new_path = urljoin(base_content_path, href)
+                a["href"] = reverse("docs-libs-page", kwargs={"content_path": new_path})
+                a["target"] = "_parent"
+            elif target == "index":
+                _set_htmx_attrs(a, "#sidebar")
+            elif target == "desc":
+                _set_htmx_attrs(a, "#main")
+            elif not target:
+                if content_path.endswith("contents.html"):
+                    _set_htmx_attrs(a, "#sidebar")
+                else:
+                    _set_htmx_attrs(a, "#main")
+
+            if target and a["target"] != "_parent":
+                del a["target"]
+
+
 class ImageView(View):
    def get(self, request, *args, **kwargs):
        # TODO: Add caching logic
--- a/kube/boost/templates/configmap-nginx.yaml
+++ b/kube/boost/templates/configmap-nginx.yaml
@@ -129,6 +129,7 @@ data:
      location = /development/separate_compilation.html { return 301 /doc/contributor-guide/design-guide/separate-compilation.html; }
      location = /development/library_metadata.html { return 301 /doc/contributor-guide/requirements/library-metadata.html; }
      location = /doc/ { return 301 /libraries/; }
+      location = /libs/ { return 301 /libraries/; }
      location = /doc/libs/ { return 301 /libraries/; }
      location = /build/ { return 301 /tools/build/; }
      location = /more/lib_guide.htm { return 301 /doc/contributor-guide/index.html; }
--- a/static/css/preprocessing_fixes.css
+++ b/static/css/preprocessing_fixes.css
@@ -0,0 +1,46 @@
+html, body {
+  margin: 0;
+  max-height: 100%;
+  width: 100%;
+}
+body {
+  display: grid;
+  grid-template-rows: auto 1fr;
+  grid-template-columns: 1fr;
+}
+.content-wrapper {
+  display: grid;
+  grid-template-columns: 250px 1fr;
+  height: 100%;
+}
+#sidebar {
+  width: 250px;
+  overflow-y: auto;
+  border-right: 1px solid #ccc;
+}
+#sidebar > h4 > a {
+  color: black;
+  text-decoration: none;
+}
+#main {
+  overflow-y: auto;
+  padding: 1em;
+}
+#header {
+  background: #f0f0f0;
+  padding: 0.5em;
+}
+
+/* TODO: quick basic code styling here; integrate boostlook instead? */
+pre > code {
+  display: block;
+  background-color: #f5f5f5;
+  color: #232a56;
+  padding: 1rem;
+  border-radius: 8px;
+  font-family: monospace;
+  font-size: 0.875rem;
+  overflow-x: auto;
+  white-space: pre-wrap;
+  line-height: 1.5;
+}
--- a/templates/docsiframe.html
+++ b/templates/docsiframe.html
@@ -2,8 +2,13 @@

 {% block content %}
    <iframe
-        srcdoc="{{ content }}"
-        onload="iframeCustomizations(this)"
+        {% if iframe_url %}
+          src="{{ iframe_url }}"
+          style="display: block; width: 100%; min-height: 600px; border: none;"
+        {% else %}
+          srcdoc="{{ content }}"
+          onload="iframeCustomizations(this)"
+        {% endif %}
        id="docsiframe"
    ></iframe>
    <script>
--- a/yarn.lock
+++ b/yarn.lock
@@ -369,6 +369,11 @@ fraction.js@^4.3.7:
  resolved "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.7.tgz"
  integrity sha512-ZsDfxO51wGAXREY55a7la9LScWpwv9RxIrYABrlvOFBlH/ShPnrtsXeuUIfXKKOVicNxQ+o8JTbJvjS4M89yew==

+fsevents@~2.3.2:
+  version "2.3.3"
+  resolved "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz"
+  integrity sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==
+
 function-bind@^1.1.2:
  version "1.1.2"
  resolved "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz"