mirror of
https://github.com/boostorg/website-v2.git
synced 2026-01-19 04:42:17 +00:00
Clear the static content database cache nightly of old files
- Add `created` and `modified` fields to `RenderedContent` models - Add caching and RenderedContent docs - Change cache key for library description rendered content - Add `CLEAR_STATIC_CONTENT_CACHE_DAYS` setting - Add manager method and task to clear static content cache - Move task scheduler to main app - Add daily task to clear rendered content cache - Use created date and not updated date
This commit is contained in:
committed by
Lacey Henschel
parent
330d53426d
commit
0a9e0a2a7f
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
|
||||
from celery import Celery
|
||||
from celery.schedules import crontab
|
||||
|
||||
|
||||
# set the default Django settings module for the 'celery' program.
|
||||
@@ -21,3 +22,19 @@ app.autodiscover_tasks()
|
||||
@app.task(bind=True)
|
||||
def debug_task(self):
|
||||
print(f"Request: {self.request!r}")
|
||||
|
||||
|
||||
# Schedule Celery tasks
|
||||
@app.on_after_configure.connect
|
||||
def setup_periodic_tasks(sender, **kwargs):
|
||||
# Update library data from GitHub. Executes daily at 7:05 AM
|
||||
sender.add_periodic_task(
|
||||
crontab(hour=7, minute=5),
|
||||
"libraries.tasks.update_libraries",
|
||||
)
|
||||
|
||||
# Clear the static content database cache. Executs daily at 4:05 AM.
|
||||
sender.add_periodic_task(
|
||||
crontab(hour=4, minute=5),
|
||||
"core.tasks.clear_static_content_cache",
|
||||
)
|
||||
|
||||
@@ -264,6 +264,9 @@ CACHES = {
|
||||
},
|
||||
}
|
||||
|
||||
# Default interval by which to clear the static content cache
|
||||
CLEAR_STATIC_CONTENT_CACHE_DAYS = 7
|
||||
|
||||
# Mailman API credentials
|
||||
MAILMAN_REST_API_URL = env("MAILMAN_REST_API_URL", default="http://localhost:8001")
|
||||
MAILMAN_REST_API_USER = env("MAILMAN_REST_API_USER", default="restadmin")
|
||||
|
||||
@@ -3,10 +3,29 @@ import structlog
|
||||
from django.core.cache import caches
|
||||
from django.db import models
|
||||
|
||||
from django.utils import timezone
|
||||
import datetime
|
||||
from django.conf import settings
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class RenderedContentManager(models.Manager):
|
||||
def clear_cache_by_cache_type_and_date(
|
||||
self,
|
||||
cache_type="static_content_",
|
||||
older_than_days=settings.CLEAR_STATIC_CONTENT_CACHE_DAYS,
|
||||
):
|
||||
older_than = timezone.now() - datetime.timedelta(days=older_than_days)
|
||||
deleted_count, _ = self.filter(
|
||||
cache_key__startswith=cache_type, created__lte=older_than
|
||||
).delete()
|
||||
logger.info(
|
||||
"rendered_content_manager_clear_cache_by_cache_type_and_date",
|
||||
cache_type=cache_type,
|
||||
count=deleted_count,
|
||||
)
|
||||
|
||||
def clear_cache_by_content_type(self, content_type):
|
||||
"""Clears the static content cache of all rendered content of a given type."""
|
||||
cache = caches["static_content"]
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
# Generated by Django 4.2.2 on 2024-01-05 22:28
|
||||
|
||||
from django.db import migrations
|
||||
import django.utils.timezone
|
||||
import django_extensions.db.fields
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("core", "0001_initial"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="renderedcontent",
|
||||
name="created",
|
||||
field=django_extensions.db.fields.CreationDateTimeField(
|
||||
auto_now_add=True,
|
||||
default=django.utils.timezone.now,
|
||||
verbose_name="created",
|
||||
),
|
||||
preserve_default=False,
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="renderedcontent",
|
||||
name="modified",
|
||||
field=django_extensions.db.fields.ModificationDateTimeField(
|
||||
auto_now=True, verbose_name="modified"
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -1,16 +1,21 @@
|
||||
from django.db import models
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from django_extensions.db.models import TimeStampedModel
|
||||
|
||||
from .managers import RenderedContentManager
|
||||
|
||||
|
||||
class RenderedContent(models.Model):
|
||||
class RenderedContent(TimeStampedModel):
|
||||
"""Stores a copy of rendered content. Generally, this content is retrieved
|
||||
from the S3 buckets and, if necessary, converted to HTML.
|
||||
|
||||
This model is intended to be used as a cache. If the content is not found,
|
||||
it will be retrieved from S3 and stored in this model. If the content is
|
||||
found, it will be returned from this model."""
|
||||
found, it will be returned from this model.
|
||||
|
||||
TimeStampedModel adds `created` and `modified` fields:
|
||||
https://django-extensions.readthedocs.io/en/latest/model_extensions.html
|
||||
"""
|
||||
|
||||
cache_key = models.CharField(
|
||||
max_length=255,
|
||||
|
||||
@@ -34,6 +34,14 @@ def clear_rendered_content_cache_by_content_type(content_type):
|
||||
RenderedContent.objects.delete_by_content_type(content_type)
|
||||
|
||||
|
||||
@shared_task
|
||||
def clear_static_content_cache():
|
||||
"""Runs the manager method to clear the static content cache"""
|
||||
RenderedContent.objects.clear_cache_by_cache_type_and_date(
|
||||
cache_type="static_content_"
|
||||
)
|
||||
|
||||
|
||||
@shared_task
|
||||
def refresh_content_from_s3(s3_key, cache_key):
|
||||
"""Calls S3 with the s3_key, then saves the result to the
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import datetime
|
||||
from model_bakery import baker
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.cache import caches
|
||||
from django.test import override_settings
|
||||
from django.utils import timezone
|
||||
|
||||
from ..models import RenderedContent
|
||||
|
||||
@@ -57,3 +60,21 @@ def test_delete_by_cache_key():
|
||||
|
||||
assert RenderedContent.objects.filter(cache_key="keep").exists()
|
||||
assert not RenderedContent.objects.filter(cache_key="clear").exists()
|
||||
|
||||
|
||||
def test_clear_cache_by_cache_type_and_date(rendered_content):
|
||||
cache_type = "cache-key"
|
||||
older_than_days = settings.CLEAR_STATIC_CONTENT_CACHE_DAYS
|
||||
|
||||
# Create old cache entry
|
||||
old_date = timezone.now() - datetime.timedelta(days=older_than_days + 1)
|
||||
old_content = baker.make("core.RenderedContent", cache_key=f"{cache_type}_old")
|
||||
old_content.created = old_date
|
||||
old_content.save()
|
||||
|
||||
initial_count = RenderedContent.objects.count()
|
||||
RenderedContent.objects.clear_cache_by_cache_type_and_date(cache_type=cache_type)
|
||||
final_count = RenderedContent.objects.count()
|
||||
assert final_count == initial_count - 1
|
||||
assert not RenderedContent.objects.filter(cache_key=f"{cache_type}_old").exists()
|
||||
assert RenderedContent.objects.filter(cache_key=rendered_content.cache_key).exists()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Documentation for the Boost Website
|
||||
|
||||
- [API Documentation](./api.md) - We don't have many API endpoints, but the ones we do have are documented here
|
||||
- [Caching and the `RenderedContent` model](./caching_rendered_content.md)
|
||||
- [Dependency Management](./dependencies.md)
|
||||
- [Development Setup Notes](./development_setup_notes.md)
|
||||
- [Environment Variables](./env_vars.md)
|
||||
|
||||
12
docs/caching_rendered_content.md
Normal file
12
docs/caching_rendered_content.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# Caching and the `RenderedContent` model
|
||||
|
||||
This model is mostly used as a database cache or backup for data that is retrieved from GitHub or from the S3 buckets.
|
||||
|
||||
See [Static Content](./static_content.md) for more information about retrieving static content from S3.
|
||||
|
||||
Usage:
|
||||
|
||||
- Cache static content (like asciidoc content, library documentation, the help pages, anything that is rendered from S3). The `cache_key` field will be prefixed with `static_content_`.
|
||||
- There is a Celery task to clear this database cache for all rows older than 7 days, which is set up to run daily.
|
||||
- Cache a copy of the library description (from the library asciidoc or other readme file). This enables us to load a library description even if the GitHub API goes down. The `cache_key` field will be prefixed with `library_description_`. Because these descriptions are primarily for past versions, they will not update, they will not be deleted from the database cache, and there is no need to retrieve them from GitHub fresh every time.
|
||||
- Store a copy of the release notes for each Boost version. Because the release notes are for past versions, they will not update, they will not be deleted from the database cache, and there is no need to retrieve them from GitHub fresh every time. The `cache_key` field will be prefixed with `release_notes_`.
|
||||
@@ -54,3 +54,9 @@ Take a look at this sample `{env}_static_config.json` file:
|
||||
- `/site/index.html`
|
||||
|
||||
We first try to retrieve the static content using the exact S3 key specified in the site-to-S3 mapping. If we can't find the content using that key, we will try alternative S3 keys based on the `site_path` and `s3_path` properties in the `{env}_static_config.json` file.
|
||||
|
||||
## Caching
|
||||
|
||||
See [Caching and the `RenderedContent` model](./caching_rendered_content.md) for how Django-side caching is handled.
|
||||
|
||||
Cacching is also handled via Fastly CDN.
|
||||
|
||||
@@ -148,7 +148,7 @@ class Library(models.Model):
|
||||
|
||||
# Try to get the content from the cache first
|
||||
static_content_cache = caches["static_content"]
|
||||
cache_key = f"static_content_{self.github_repo}_{tag}"
|
||||
cache_key = f"library_description_{self.github_repo}_{tag}"
|
||||
cached_result = static_content_cache.get(cache_key)
|
||||
if cached_result:
|
||||
return cached_result
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import structlog
|
||||
from celery.schedules import crontab
|
||||
|
||||
from config.celery import app
|
||||
from core.boostrenderer import get_content_from_s3
|
||||
@@ -66,15 +65,6 @@ def get_and_store_library_version_documentation_urls_for_version(version_pk):
|
||||
continue
|
||||
|
||||
|
||||
@app.on_after_configure.connect
|
||||
def setup_periodic_tasks(sender, **kwargs):
|
||||
# Executes daily at 7:05 AM
|
||||
sender.add_periodic_task(
|
||||
crontab(hour=7, minute=5),
|
||||
update_libraries.s(),
|
||||
)
|
||||
|
||||
|
||||
@app.task
|
||||
def update_libraries(update_all=False):
|
||||
"""Update local libraries from GitHub Boost libraries.
|
||||
|
||||
Reference in New Issue
Block a user