Files
website-v2/core/views.py
Lacey Williams Henschel 825dbc019b Cache asciidoc content in db (Part of #394)
- Add RenderedContent model and related helper methods
- Change StaticContentView `get()` logic to try the cache, then the db, then S3
- Change StaticContentView to update db appropriately
- Refactoring for readability/maintainability
2023-06-09 14:43:05 -07:00

244 lines
8.3 KiB
Python

import os.path
import structlog
import tempfile
from dateutil.parser import parse
from django.conf import settings
from django.core.cache import caches
from django.http import Http404, HttpResponse, HttpResponseNotFound
from django.views.generic import TemplateView
from .boostrenderer import get_body_from_html, get_content_from_s3
from .markdown import process_md
from .models import RenderedContent
from .tasks import adoc_to_html
logger = structlog.get_logger()
class MarkdownTemplateView(TemplateView):
template_name = "markdown_template.html"
content_dir = settings.BASE_CONTENT
def build_path(self):
"""
Builds the path from URL kwargs
"""
content_path = self.kwargs.get("content_path")
if not content_path:
return
# If the request includes the file extension, return that
if content_path[-5:] == ".html" or content_path[-3:] == ".md":
return f"{self.content_dir}/{content_path}"
# Trim any trailing slashes
if content_path[-1] == "/":
content_path = content_path[:-1]
# Can we find a markdown file with this path?
path = f"{self.content_dir}/{content_path}.md"
# Note: The get() method also checks isfile(), but since we need to try multiple
# paths/extensions, we need to call it here as well.
if os.path.isfile(path):
return path
# Can we find an HTML file with this path?
path = f"{self.content_dir}/{content_path}.html"
if os.path.isfile(path):
return path
# Can we find an index file with this path?
path = f"{self.content_dir}/{content_path}/index.html"
if os.path.isfile(path):
return path
# If we get here, there is nothing else for us to try.
return
def get(self, request, *args, **kwargs):
"""
Verifies the file and returns the frontmatter and content
"""
path = self.build_path()
# Avoids a TypeError from os.path.isfile if there is no path
if not path:
logger.info(
"markdown_template_view_no_valid_path",
content_path=kwargs.get("content_path"),
status_code=404,
)
raise Http404("Page not found")
if not os.path.isfile(path):
logger.info(
"markdown_template_view_no_valid_file",
content_path=kwargs.get("content_path"),
path=path,
status_code=404,
)
raise Http404("Post not found")
context = {}
context["frontmatter"], context["content"] = process_md(path)
logger.info(
"markdown_template_view_success",
content_path=kwargs.get("content_path"),
path=path,
status_code=200,
)
return self.render_to_response(context)
class ContentNotFoundException(Exception):
pass
class StaticContentTemplateView(TemplateView):
template_name = "adoc_content.html"
def get(self, request, *args, **kwargs):
"""Returns static content that originates in S3, but is cached in a couple of
different places.
Any valid S3 key to the S3 bucket apecified in settings can be returned by
this view. Pages like the Help page are stored in S3 and rendered via
this view, for example.
See the *_static_config.json files for URL mappings to specific S3 keys.
"""
content_path = self.kwargs.get("content_path")
try:
self.content_dict = self.get_content(content_path)
except ContentNotFoundException:
logger.info(
"get_content_from_s3_view_not_in_cache",
content_path=content_path,
status_code=404,
)
return HttpResponseNotFound("Page not found")
return super().get(request, *args, **kwargs)
def get_template_names(self):
"""Returns the template name."""
content_type = self.content_dict.get("content_type")
if content_type == "text/asciidoc":
return [self.template_name]
return []
def get_context_data(self, **kwargs):
"""Returns the content and content type for the template. In some cases,
changes the content type."""
context = super().get_context_data(**kwargs)
content_type = self.content_dict.get("content_type")
content = self.content_dict.get("content")
if content_type == "text/asciidoc":
content_type = "text/html"
context.update({"content": content, "content_type": content_type})
logger.info(
"get_content_from_s3_view_success", key=self.kwargs.get("content_path")
)
return context
def render_to_response(self, context, **response_kwargs):
"""Return the HTML response with a template, or just the content directly."""
if self.get_template_names():
return super().render_to_response(context, **response_kwargs)
else:
return HttpResponse(
context["content"], content_type=context["content_type"]
)
def get_content(self, content_path):
"""Returns content from cache, database, or S3"""
static_content_cache = caches["static_content"]
cache_key = f"static_content_{content_path}"
result = self.get_from_cache(static_content_cache, cache_key)
if result is None:
result = self.get_from_database(cache_key)
if result is None:
result = self.get_from_s3(content_path, cache_key)
if result is None:
logger.info(
"get_content_from_s3_view_no_valid_object",
key=content_path,
status_code=404,
)
raise ContentNotFoundException("Content not found")
return result
def get_from_cache(self, static_content_cache, cache_key):
cached_result = static_content_cache.get(cache_key)
return cached_result if cached_result else None
def get_from_database(self, cache_key):
try:
content_obj = RenderedContent.objects.get(cache_key=cache_key)
return {
"content": content_obj.content_html,
"content_type": content_obj.content_type,
}
except RenderedContent.DoesNotExist:
return None
def get_from_s3(self, content_path, cache_key):
result = get_content_from_s3(key=content_path)
if result and result.get("content"):
self.update_or_create_content(result, cache_key)
return result
return
def update_or_create_content(self, result, cache_key):
content = result.get("content")
content_type = result.get("content_type")
last_updated_at_raw = result.get("last_updated_at")
if content_type == "text/asciidoc":
content = self.convert_adoc_to_html(content, cache_key)
last_updated_at = (
parse(last_updated_at_raw) if last_updated_at_raw else None
)
defaults = {"content_html": content, "content_type": content_type}
if last_updated_at:
defaults["last_updated_at"] = last_updated_at
content_obj, created = RenderedContent.objects.update_or_create(
cache_key=cache_key, defaults=defaults
)
logger.info(
"get_content_from_s3_view_saved_to_db",
cache_key=cache_key,
content_type=content_type,
status_code=200,
obj_id=content_obj.id,
created=created,
)
result["content"] = content
result["content_type"] = content_type
def convert_adoc_to_html(self, content, cache_key):
"""Renders asciidoc content to HTML."""
# Write the content to a temporary file
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
if isinstance(content, str):
content = content.encode()
temp_file.write(content)
html_content = adoc_to_html(temp_file.name, delete_file=True)
if isinstance(html_content, bytes):
html_content = html_content.decode("utf-8")
# Extract only the contents of the body tag that we want from the HTML
return get_body_from_html(html_content)