diff --git a/core/boostrenderer.py b/core/boostrenderer.py
index d1b2236c..44d6abc4 100644
--- a/core/boostrenderer.py
+++ b/core/boostrenderer.py
@@ -1,5 +1,6 @@
import boto3
from botocore.exceptions import ClientError
+from bs4 import BeautifulSoup
import json
import os
import re
@@ -17,6 +18,27 @@ from pygments.formatters.html import HtmlFormatter
logger = structlog.get_logger()
+def get_body_from_html(html_string: str) -> str:
+ """Use BeautifulSoup to get the body content from an HTML document, without
+ the
tag.
+
+ We strip out the tag because we want to use our main Boost template,
+ which includes its own tag.
+
+ Args:
+ html_string (str): The HTML document as a string
+
+ Returns:
+ str: The body content as a string
+ """
+ soup = BeautifulSoup(html_string, "html.parser")
+ body = soup.find("body")
+ body_content = ""
+ if body:
+ body_content = "".join(str(tag) for tag in body.contents)
+ return body_content
+
+
def get_content_from_s3(key=None, bucket_name=None):
"""
Get content from S3. Returns the decoded file contents if able
diff --git a/core/tests/test_renderer.py b/core/tests/test_renderer.py
index 3c156e16..0f716b85 100644
--- a/core/tests/test_renderer.py
+++ b/core/tests/test_renderer.py
@@ -1,4 +1,12 @@
-from ..boostrenderer import get_content_type, get_s3_keys
+from ..boostrenderer import get_body_from_html, get_content_type, get_s3_keys
+
+
+def test_get_body_from_html():
+ html_string = (
+ "TestTest
"
+ )
+ body_content = get_body_from_html(html_string)
+ assert body_content == "Test
"
def test_get_content_type():
diff --git a/core/views.py b/core/views.py
index 1b8be2d3..a74eea68 100644
--- a/core/views.py
+++ b/core/views.py
@@ -8,7 +8,7 @@ from django.http import Http404, HttpResponse, HttpResponseNotFound
from django.shortcuts import render
from django.views.generic import TemplateView, View
-from .boostrenderer import get_content_from_s3
+from .boostrenderer import get_body_from_html, get_content_from_s3
from .markdown import process_md
from .tasks import adoc_to_html
@@ -163,6 +163,7 @@ class StaticContentTemplateView(View):
# Content is a byte string, decode it using UTF-8 encoding
html_content = html_content.decode("utf-8")
- context = {"content": html_content, "content_type": "text/html"}
-
+ # Extract only the contents of the body tag from the HTML
+ content = get_body_from_html(html_content)
+ context = {"content": content, "content_type": "text/html"}
return render(request, "adoc_content.html", context)
diff --git a/docker-compose.yml b/docker-compose.yml
index 828a7b60..1d84c00c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -28,6 +28,7 @@ services:
command: [ "/bin/bash", "/code/docker/compose-start.sh" ]
depends_on:
- db
+ - redis
environment:
- "LOCAL_DEVELOPMENT=true"
- "DOCKER_DIR=/code/docker"
diff --git a/requirements.in b/requirements.in
index 5c816f80..7319fcb9 100755
--- a/requirements.in
+++ b/requirements.in
@@ -49,9 +49,10 @@ minio
# Packaging
pip-tools==6.13.0
-# Markdown and Frontmatter
+# Parsing content from external sources (like S3)
mistletoe
python-frontmatter
+beautifulsoup4
#Forum
django-machina>=1.2
diff --git a/requirements.txt b/requirements.txt
index 95463363..d1835ec2 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,6 +16,8 @@ async-timeout==4.0.2
# via redis
backcall==0.2.0
# via ipython
+beautifulsoup4==4.12.2
+ # via -r ./requirements.in
billiard==3.6.4.0
# via celery
black==22.3
@@ -288,6 +290,8 @@ six==1.16.0
# django-rest-auth
# fs
# python-dateutil
+soupsieve==2.4.1
+ # via beautifulsoup4
sqlparse==0.4.4
# via django
stack-data==0.6.2