From ff723c4a436d67c9d4ef5dcc0d00ed8c9ede10fd Mon Sep 17 00:00:00 2001 From: Greg Kaleka Date: Mon, 12 May 2025 12:21:07 -0400 Subject: [PATCH] Add libraries redirect (#1770) --- config/settings.py | 13 +- config/urls.py | 6 + core/htmlhelper.py | 68 ++++++++- core/views.py | 160 ++++++++++++++++++++-- kube/boost/templates/configmap-nginx.yaml | 1 + static/css/preprocessing_fixes.css | 46 +++++++ templates/docsiframe.html | 9 +- yarn.lock | 5 + 8 files changed, 281 insertions(+), 27 deletions(-) create mode 100644 static/css/preprocessing_fixes.css diff --git a/config/settings.py b/config/settings.py index 82ec1b06..992ee2a5 100755 --- a/config/settings.py +++ b/config/settings.py @@ -388,20 +388,11 @@ if LOCAL_DEVELOPMENT: ] -# Allow Allauth to use HTTPS when deployed but HTTP for local dev -SECURE_PROXY_SSL_HEADER_NAME = env("SECURE_PROXY_SSL_HEADER_NAME", default=None) -SECURE_PROXY_SSL_HEADER_VALUE = env("SECURE_PROXY_SSL_HEADER_VALUE", default=None) -SECURE_SSL_REDIRECT = env("SECURE_SSL_REDIRECT", default=False) - if not LOCAL_DEVELOPMENT: ACCOUNT_DEFAULT_HTTP_PROTOCOL = "https" - -if all( - [SECURE_PROXY_SSL_HEADER_NAME, SECURE_PROXY_SSL_HEADER_VALUE, SECURE_SSL_REDIRECT] -): SECURE_PROXY_SSL_HEADER = ( - SECURE_PROXY_SSL_HEADER_NAME, - SECURE_PROXY_SSL_HEADER_VALUE, + "HTTP_X_FORWARDED_PROTO", + ACCOUNT_DEFAULT_HTTP_PROTOCOL, ) # Admin banner configuration diff --git a/config/urls.py b/config/urls.py index 9d428a3c..657d1384 100755 --- a/config/urls.py +++ b/config/urls.py @@ -32,6 +32,7 @@ from core.views import ( StaticContentTemplateView, UserGuideTemplateView, BoostDevelopmentView, + ModernizedDocsView, ) from libraries.api import LibrarySearchView from libraries.views import ( @@ -345,6 +346,11 @@ urlpatterns = ( ), # Internal functions path("internal/clear-cache/", ClearCacheView.as_view(), name="clear-cache"), + path( + "internal/modernized-docs/", + ModernizedDocsView.as_view(), + name="modernized_docs", + ), ] + [ # Redirects for old boost.org urls. diff --git a/core/htmlhelper.py b/core/htmlhelper.py index 0eebf231..e65aa94d 100644 --- a/core/htmlhelper.py +++ b/core/htmlhelper.py @@ -1,7 +1,8 @@ import re -from bs4 import BeautifulSoup, Comment +from bs4 import BeautifulSoup, Comment, Tag from django.template.loader import render_to_string +from django.templatetags.static import static from core.boostrenderer import get_body_from_html from core.constants import SourceDocType @@ -325,6 +326,71 @@ def remove_library_boostlook(soup): return soup +def modernize_preprocessor_docs(soup: BeautifulSoup) -> tuple[BeautifulSoup, bool]: + """Special case handling for Boost.Preprocessor docs. + + Returns a two-tuple: + 0. BeautifulSoup object + 1. Boolean indicating whether framesets were present (and modified). + """ + # Only transform if is present + if not soup.find("frameset"): + return soup, False + + # Create a new empty soup document + new_soup = BeautifulSoup("", "html.parser") + html = new_soup.new_tag("html", lang="en") + head = new_soup.new_tag("head") + body = new_soup.new_tag("body") + html.append(head) + html.append(body) + new_soup.append(html) + + page_title = soup.title.string if soup.title else "Boost" + head.append(new_soup.new_tag("meta", charset="utf-8")) + title_tag = new_soup.new_tag("title") + title_tag.string = page_title + head.append(title_tag) + css_link = BeautifulSoup( + f'' + ) + head.append(css_link) + + # Add HTMX + htmx_script = new_soup.new_tag("script", src="https://unpkg.com/htmx.org@1.9.2") + head.append(htmx_script) + + def _add_htmx_loading_div(_soup: BeautifulSoup, html_id: str, filename: str) -> Tag: + div = _soup.new_tag( + "div", + id=html_id, + **{ + "hx-get": filename, + "hx-trigger": "load", + "hx-select": "body > *", + "hx-swap": "innerHTML", + "hx-boost": "true", + }, + ) + div.string = "Loading..." + return div + + header = _add_htmx_loading_div(new_soup, "header", "top.html") + body.append(header) + + # Wrapper for sidebar + main + wrapper = new_soup.new_tag("div", attrs={"class": "content-wrapper"}) + sidebar = _add_htmx_loading_div(new_soup, "sidebar", "contents.html") + wrapper.append(sidebar) + + main = _add_htmx_loading_div(new_soup, "main", "title.html") + wrapper.append(main) + + body.append(wrapper) + + return new_soup, True + + def format_nested_lists(soup): """Flattens nested lists""" try: diff --git a/core/views.py b/core/views.py index d9b3b694..d8355da1 100644 --- a/core/views.py +++ b/core/views.py @@ -1,5 +1,6 @@ import os import re +from urllib.parse import urljoin import structlog from bs4 import BeautifulSoup @@ -38,6 +39,7 @@ from .htmlhelper import ( modernize_legacy_page, convert_name_to_id, remove_library_boostlook, + modernize_preprocessor_docs, ) from .markdown import process_md from .models import RenderedContent @@ -428,23 +430,35 @@ class StaticContentTemplateView(BaseStaticContentTemplateView): return content +# possible library versions are: boost_1_53_0_beta1, 1_82_0, 1_55_0b1 +BOOST_LIB_PATH_RE = re.compile(r"^(boost_){0,1}([0-9_]*[0-9]+[^/]*)/(.*)") + + +def normalize_boost_doc_path(content_path: str) -> str: + if content_path.startswith(LATEST_RELEASE_URL_PATH_STR): + version = Version.objects.most_recent() + content_path = content_path.replace( + f"{LATEST_RELEASE_URL_PATH_STR}/", f"{version.stripped_boost_url_slug}/" + ) + # Special case for Boost.Process + if content_path == "1_88_0/doc/html/process.html": + content_path = "1_88_0/libs/process/doc/html/index.html" + + # Match versioned library paths + matches = BOOST_LIB_PATH_RE.match(content_path) + if matches: + groups = matches.groups() + if groups and not groups[0]: + content_path = f"boost_{content_path}" + + return f"/archives/{content_path}" + + class DocLibsTemplateView(BaseStaticContentTemplateView): - # possible library versions are: boost_1_53_0_beta1, 1_82_0, 1_55_0b1 - boost_lib_path_re = re.compile(r"^(boost_){0,1}([0-9_]*[0-9]+[^/]*)/(.*)") # is_iframe_view = False def get_from_s3(self, content_path): - # Fix special case - if content_path == "1_88_0/doc/html/process.html": - content_path = "1_88_0/libs/process/doc/html/index.html" - # perform URL matching/mapping, perhaps extract the version from content_path - matches = self.boost_lib_path_re.match(content_path) - if matches: - groups = matches.groups() - if groups and not groups[0]: - content_path = f"boost_{content_path}" - - legacy_url = f"/archives/{content_path}" + legacy_url = normalize_boost_doc_path(content_path) return super().get_from_s3(legacy_url) def process_content(self, content): @@ -489,7 +503,22 @@ class DocLibsTemplateView(BaseStaticContentTemplateView): soup.find("head").append( soup.new_tag("script", src=f"{STATIC_URL}js/theme_handling.js") ) + if "libs/preprocessor" in self.request.path: + # Temporarily only run this on the preprocessor docs + soup, should_use_modernized_iframe = modernize_preprocessor_docs(soup) + else: + should_use_modernized_iframe = False context["content"] = soup.prettify() + if should_use_modernized_iframe: + modernized_url = reverse( + "modernized_docs", + kwargs={"content_path": self.kwargs["content_path"]}, + ) + return render_to_string( + "docsiframe.html", + {"iframe_url": modernized_url}, + request=self.request, + ) else: # Potentially pass version if needed for HTML modification. # We disable plausible to prevent redundant 'about:srcdoc' tracking, @@ -556,6 +585,111 @@ class UserGuideTemplateView(BaseStaticContentTemplateView): return render_to_string("docsiframe.html", context, request=self.request) +class ModernizedDocsView(View): + """Special case view for handling sub-pages of the Boost.Preprocessor docs.""" + + def get(self, request, content_path): + soup, response = self._load_and_transform_html(content_path, request) + if response: + return response # Early return for non-HTML content + + self._inject_base_tag(soup, request) + self._rewrite_links(soup, content_path) + self._inject_script(soup) + + html = str(soup) + return HttpResponse(html, content_type="text/html") + + def _load_and_transform_html(self, content_path, request): + legacy_url = normalize_boost_doc_path(content_path) + try: + result = get_content_from_s3(key=legacy_url) + except ContentNotFoundException: + raise Http404("Not found") + + content = result.get("content") + content_type = result.get("content_type", "") + + if not content: + return None, HttpResponse( + content or "", content_type=content_type or "text/plain" + ) + + html = content.decode(chardet.detect(content)["encoding"]) + + if content_type.startswith("text/x-c"): + soup = self._process_cpp_code(html) + return None, HttpResponse(soup, content_type="text/plain") + + soup = BeautifulSoup(html, "html.parser") + soup = convert_name_to_id(soup) + soup, _ = modernize_preprocessor_docs(soup) + return soup, None + + def _process_cpp_code(self, html): + lines = html.strip().splitlines() + code_block = "\n".join(lines) + soup = BeautifulSoup("", "html.parser") + html = soup.new_tag("html") + head = soup.new_tag("head") + body = soup.new_tag("body") + code = soup.new_tag("code", **{"class": "language-cpp"}) + code.string = code_block + pre = soup.new_tag("pre") + pre.append(code) + body.append(pre) + html.append(head) + html.append(body) + soup.append(html) + return soup + + def _inject_base_tag(self, soup, request): + if soup.head and not soup.head.find("base"): + base_path = request.path.rsplit("/", 1)[0] + "/" + base_href = urljoin(request.build_absolute_uri("/"), base_path.lstrip("/")) + if not settings.LOCAL_DEVELOPMENT: + # Slightly hacky, but it's tricky to get this right inside the iframe + base_href = base_href.replace("http://", "https://") + base_tag = soup.new_tag("base", href=base_href) + soup.head.insert(0, base_tag) + + def _inject_script(self, soup): + script_tag = soup.new_tag( + "script", src=f"{settings.STATIC_URL}js/theme_handling.js" + ) + if soup.head: + soup.head.append(script_tag) + + def _rewrite_links(self, soup, content_path): + """Turn anchor tags meant to use framesets into htmx-driven links""" + + def _set_htmx_attrs(tag, _target): + tag["hx-target"] = _target + tag["hx-swap"] = "innerHTML show:none" + + base_content_path = content_path.rsplit("/", 1)[0] + "/" + for a in soup.find_all("a"): + target = a.get("target") + href = a.get("href", "") + + if target in ("_top", "_parent"): + new_path = urljoin(base_content_path, href) + a["href"] = reverse("docs-libs-page", kwargs={"content_path": new_path}) + a["target"] = "_parent" + elif target == "index": + _set_htmx_attrs(a, "#sidebar") + elif target == "desc": + _set_htmx_attrs(a, "#main") + elif not target: + if content_path.endswith("contents.html"): + _set_htmx_attrs(a, "#sidebar") + else: + _set_htmx_attrs(a, "#main") + + if target and a["target"] != "_parent": + del a["target"] + + class ImageView(View): def get(self, request, *args, **kwargs): # TODO: Add caching logic diff --git a/kube/boost/templates/configmap-nginx.yaml b/kube/boost/templates/configmap-nginx.yaml index 64048020..929c0826 100644 --- a/kube/boost/templates/configmap-nginx.yaml +++ b/kube/boost/templates/configmap-nginx.yaml @@ -129,6 +129,7 @@ data: location = /development/separate_compilation.html { return 301 /doc/contributor-guide/design-guide/separate-compilation.html; } location = /development/library_metadata.html { return 301 /doc/contributor-guide/requirements/library-metadata.html; } location = /doc/ { return 301 /libraries/; } + location = /libs/ { return 301 /libraries/; } location = /doc/libs/ { return 301 /libraries/; } location = /build/ { return 301 /tools/build/; } location = /more/lib_guide.htm { return 301 /doc/contributor-guide/index.html; } diff --git a/static/css/preprocessing_fixes.css b/static/css/preprocessing_fixes.css new file mode 100644 index 00000000..bc6006e5 --- /dev/null +++ b/static/css/preprocessing_fixes.css @@ -0,0 +1,46 @@ +html, body { + margin: 0; + max-height: 100%; + width: 100%; +} +body { + display: grid; + grid-template-rows: auto 1fr; + grid-template-columns: 1fr; +} +.content-wrapper { + display: grid; + grid-template-columns: 250px 1fr; + height: 100%; +} +#sidebar { + width: 250px; + overflow-y: auto; + border-right: 1px solid #ccc; +} +#sidebar > h4 > a { + color: black; + text-decoration: none; +} +#main { + overflow-y: auto; + padding: 1em; +} +#header { + background: #f0f0f0; + padding: 0.5em; +} + +/* TODO: quick basic code styling here; integrate boostlook instead? */ +pre > code { + display: block; + background-color: #f5f5f5; + color: #232a56; + padding: 1rem; + border-radius: 8px; + font-family: monospace; + font-size: 0.875rem; + overflow-x: auto; + white-space: pre-wrap; + line-height: 1.5; +} diff --git a/templates/docsiframe.html b/templates/docsiframe.html index 5bbbacd8..d5b9b755 100644 --- a/templates/docsiframe.html +++ b/templates/docsiframe.html @@ -2,8 +2,13 @@ {% block content %}