mirror of
https://github.com/boostorg/website-v2.git
synced 2026-01-19 04:42:17 +00:00
Extract content of HTML body from rendered asciidoc content (Part of #394)
- Make web service depend on redis - Add function to extract body from html content - In view, extract body before returning converted ascciidoc in response
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -17,6 +18,27 @@ from pygments.formatters.html import HtmlFormatter
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
def get_body_from_html(html_string: str) -> str:
|
||||
"""Use BeautifulSoup to get the body content from an HTML document, without
|
||||
the <body> tag.
|
||||
|
||||
We strip out the <body> tag because we want to use our main Boost template,
|
||||
which includes its own <body> tag.
|
||||
|
||||
Args:
|
||||
html_string (str): The HTML document as a string
|
||||
|
||||
Returns:
|
||||
str: The body content as a string
|
||||
"""
|
||||
soup = BeautifulSoup(html_string, "html.parser")
|
||||
body = soup.find("body")
|
||||
body_content = ""
|
||||
if body:
|
||||
body_content = "".join(str(tag) for tag in body.contents)
|
||||
return body_content
|
||||
|
||||
|
||||
def get_content_from_s3(key=None, bucket_name=None):
|
||||
"""
|
||||
Get content from S3. Returns the decoded file contents if able
|
||||
|
||||
@@ -1,4 +1,12 @@
|
||||
from ..boostrenderer import get_content_type, get_s3_keys
|
||||
from ..boostrenderer import get_body_from_html, get_content_type, get_s3_keys
|
||||
|
||||
|
||||
def test_get_body_from_html():
|
||||
html_string = (
|
||||
"<html><head><title>Test</title></head><body><h1>Test</h1></body></html>"
|
||||
)
|
||||
body_content = get_body_from_html(html_string)
|
||||
assert body_content == "<h1>Test</h1>"
|
||||
|
||||
|
||||
def test_get_content_type():
|
||||
|
||||
@@ -8,7 +8,7 @@ from django.http import Http404, HttpResponse, HttpResponseNotFound
|
||||
from django.shortcuts import render
|
||||
from django.views.generic import TemplateView, View
|
||||
|
||||
from .boostrenderer import get_content_from_s3
|
||||
from .boostrenderer import get_body_from_html, get_content_from_s3
|
||||
from .markdown import process_md
|
||||
from .tasks import adoc_to_html
|
||||
|
||||
@@ -163,6 +163,7 @@ class StaticContentTemplateView(View):
|
||||
# Content is a byte string, decode it using UTF-8 encoding
|
||||
html_content = html_content.decode("utf-8")
|
||||
|
||||
context = {"content": html_content, "content_type": "text/html"}
|
||||
|
||||
# Extract only the contents of the body tag from the HTML
|
||||
content = get_body_from_html(html_content)
|
||||
context = {"content": content, "content_type": "text/html"}
|
||||
return render(request, "adoc_content.html", context)
|
||||
|
||||
@@ -28,6 +28,7 @@ services:
|
||||
command: [ "/bin/bash", "/code/docker/compose-start.sh" ]
|
||||
depends_on:
|
||||
- db
|
||||
- redis
|
||||
environment:
|
||||
- "LOCAL_DEVELOPMENT=true"
|
||||
- "DOCKER_DIR=/code/docker"
|
||||
|
||||
@@ -49,9 +49,10 @@ minio
|
||||
# Packaging
|
||||
pip-tools==6.13.0
|
||||
|
||||
# Markdown and Frontmatter
|
||||
# Parsing content from external sources (like S3)
|
||||
mistletoe
|
||||
python-frontmatter
|
||||
beautifulsoup4
|
||||
|
||||
#Forum
|
||||
django-machina>=1.2
|
||||
|
||||
@@ -16,6 +16,8 @@ async-timeout==4.0.2
|
||||
# via redis
|
||||
backcall==0.2.0
|
||||
# via ipython
|
||||
beautifulsoup4==4.12.2
|
||||
# via -r ./requirements.in
|
||||
billiard==3.6.4.0
|
||||
# via celery
|
||||
black==22.3
|
||||
@@ -288,6 +290,8 @@ six==1.16.0
|
||||
# django-rest-auth
|
||||
# fs
|
||||
# python-dateutil
|
||||
soupsieve==2.4.1
|
||||
# via beautifulsoup4
|
||||
sqlparse==0.4.4
|
||||
# via django
|
||||
stack-data==0.6.2
|
||||
|
||||
Reference in New Issue
Block a user