diff --git a/core/boostrenderer.py b/core/boostrenderer.py index d1b2236c..44d6abc4 100644 --- a/core/boostrenderer.py +++ b/core/boostrenderer.py @@ -1,5 +1,6 @@ import boto3 from botocore.exceptions import ClientError +from bs4 import BeautifulSoup import json import os import re @@ -17,6 +18,27 @@ from pygments.formatters.html import HtmlFormatter logger = structlog.get_logger() +def get_body_from_html(html_string: str) -> str: + """Use BeautifulSoup to get the body content from an HTML document, without + the tag. + + We strip out the tag because we want to use our main Boost template, + which includes its own tag. + + Args: + html_string (str): The HTML document as a string + + Returns: + str: The body content as a string + """ + soup = BeautifulSoup(html_string, "html.parser") + body = soup.find("body") + body_content = "" + if body: + body_content = "".join(str(tag) for tag in body.contents) + return body_content + + def get_content_from_s3(key=None, bucket_name=None): """ Get content from S3. Returns the decoded file contents if able diff --git a/core/tests/test_renderer.py b/core/tests/test_renderer.py index 3c156e16..0f716b85 100644 --- a/core/tests/test_renderer.py +++ b/core/tests/test_renderer.py @@ -1,4 +1,12 @@ -from ..boostrenderer import get_content_type, get_s3_keys +from ..boostrenderer import get_body_from_html, get_content_type, get_s3_keys + + +def test_get_body_from_html(): + html_string = ( + "Test

Test

" + ) + body_content = get_body_from_html(html_string) + assert body_content == "

Test

" def test_get_content_type(): diff --git a/core/views.py b/core/views.py index 1b8be2d3..a74eea68 100644 --- a/core/views.py +++ b/core/views.py @@ -8,7 +8,7 @@ from django.http import Http404, HttpResponse, HttpResponseNotFound from django.shortcuts import render from django.views.generic import TemplateView, View -from .boostrenderer import get_content_from_s3 +from .boostrenderer import get_body_from_html, get_content_from_s3 from .markdown import process_md from .tasks import adoc_to_html @@ -163,6 +163,7 @@ class StaticContentTemplateView(View): # Content is a byte string, decode it using UTF-8 encoding html_content = html_content.decode("utf-8") - context = {"content": html_content, "content_type": "text/html"} - + # Extract only the contents of the body tag from the HTML + content = get_body_from_html(html_content) + context = {"content": content, "content_type": "text/html"} return render(request, "adoc_content.html", context) diff --git a/docker-compose.yml b/docker-compose.yml index 828a7b60..1d84c00c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -28,6 +28,7 @@ services: command: [ "/bin/bash", "/code/docker/compose-start.sh" ] depends_on: - db + - redis environment: - "LOCAL_DEVELOPMENT=true" - "DOCKER_DIR=/code/docker" diff --git a/requirements.in b/requirements.in index 5c816f80..7319fcb9 100755 --- a/requirements.in +++ b/requirements.in @@ -49,9 +49,10 @@ minio # Packaging pip-tools==6.13.0 -# Markdown and Frontmatter +# Parsing content from external sources (like S3) mistletoe python-frontmatter +beautifulsoup4 #Forum django-machina>=1.2 diff --git a/requirements.txt b/requirements.txt index 95463363..d1835ec2 100755 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,8 @@ async-timeout==4.0.2 # via redis backcall==0.2.0 # via ipython +beautifulsoup4==4.12.2 + # via -r ./requirements.in billiard==3.6.4.0 # via celery black==22.3 @@ -288,6 +290,8 @@ six==1.16.0 # django-rest-auth # fs # python-dateutil +soupsieve==2.4.1 + # via beautifulsoup4 sqlparse==0.4.4 # via django stack-data==0.6.2