Files
website-v2/core/tasks.py
2025-10-20 09:46:06 -07:00

118 lines
3.7 KiB
Python

import structlog
from celery import shared_task
from dateutil.parser import parse
from django.core.cache import caches
from core.asciidoc import convert_adoc_to_html
from .boostrenderer import get_content_from_s3
from .constants import RENDERED_CONTENT_BATCH_DELETE_SIZE
from .models import RenderedContent
logger = structlog.get_logger()
@shared_task
def clear_rendered_content_cache_by_cache_key(cache_key):
"""Deletes a RenderedContent object by its cache key from redis and
database."""
cache = caches["static_content"]
cache.delete(cache_key)
RenderedContent.objects.delete_by_cache_key(cache_key)
@shared_task
def clear_rendered_content_cache_by_content_type(content_type):
"""Deletes all RenderedContent objects for a given content type from redis
and database."""
RenderedContent.objects.clear_cache_by_content_type(content_type)
RenderedContent.objects.delete_by_content_type(content_type)
@shared_task
def clear_static_content_cache():
"""Runs the manager method to clear the static content cache"""
RenderedContent.objects.clear_cache_by_cache_type_and_date(
cache_type="static_content_"
)
@shared_task
def refresh_content_from_s3(s3_key, cache_key):
"""Calls S3 with the s3_key, then saves the result to the
RenderedContent object with the given cache_key."""
content_dict = get_content_from_s3(key=s3_key)
content = content_dict.get("content")
if content_dict and content:
content_type = content_dict.get("content_type")
if content_type == "text/asciidoc":
content = convert_adoc_to_html(content)
last_updated_at_raw = content_dict.get("last_updated_at")
last_updated_at = parse(last_updated_at_raw) if last_updated_at_raw else None
# Clear the cache because we're going to update it.
clear_rendered_content_cache_by_cache_key(cache_key)
# Update the rendered content.
save_rendered_content(
cache_key, content_type, content, last_updated_at=last_updated_at
)
# Cache the refreshed rendered content
cache = caches["static_content"]
cache.set(cache_key, {"content": content, "content_type": content_type})
@shared_task
def save_rendered_content(cache_key, content_type, content_html, last_updated_at=None):
"""Saves a RenderedContent object to database."""
defaults = {
"content_type": content_type,
"content_html": content_html,
}
if last_updated_at:
defaults["last_updated_at"] = last_updated_at
obj, created = RenderedContent.objects.update_or_create(
cache_key=cache_key[:255], defaults=defaults
)
logger.info(
"content_saved_to_rendered_content",
cache_key=cache_key,
content_type=content_type,
status_code=200,
obj_id=obj.id,
obj_created=created,
)
@shared_task
def delete_all_rendered_content():
"""
Deletes all RenderedContent objects, in batches to avoid locking the entire table.
"""
from django.db import connection
deleted_count = 0
while True:
pks = RenderedContent.objects.values_list("pk", flat=True)[
:RENDERED_CONTENT_BATCH_DELETE_SIZE
]
if not pks:
break
batch_size, _ = RenderedContent.objects.filter(pk__in=pks).delete()
deleted_count += batch_size
logger.info(f"batch deleted {batch_size=} {deleted_count=}")
# Reset auto-increment sequence to 1
with connection.cursor() as cursor:
cursor.execute(
f"ALTER SEQUENCE {RenderedContent._meta.db_table}_id_seq RESTART WITH 1"
)
logger.info("all_rendered_content_deleted", total_count=deleted_count)
return deleted_count