Files
website-v2/libraries/tasks.py
Lacey Williams Henschel 0a9e0a2a7f Clear the static content database cache nightly of old files
- Add `created` and `modified` fields to `RenderedContent` models
- Add caching and RenderedContent docs
- Change cache key for library description rendered content
- Add `CLEAR_STATIC_CONTENT_CACHE_DAYS` setting
- Add manager method and task to clear static content cache
- Move task scheduler to main app
- Add daily task to clear rendered content cache
- Use created date and not updated date
2024-01-10 12:23:17 -08:00

85 lines
3.2 KiB
Python

import structlog
from config.celery import app
from core.boostrenderer import get_content_from_s3
from core.htmlhelper import get_library_documentation_urls
from libraries.github import LibraryUpdater
from libraries.models import LibraryVersion
from libraries.utils import get_first_last_day_last_month
from versions.models import Version
logger = structlog.getLogger(__name__)
@app.task
def get_and_store_library_version_documentation_urls_for_version(version_pk):
"""
Store the url paths to the documentation for each library in a given version.
The url paths are retrieved from the `libraries.htm` file in the docs stored in
S3 for the given version.
Retrieve the libraries from the "Libraries Listed Alphabetically" section of the
HTML file. Loop through the unordered list of libraries and save the url path
to the docs for that library to the database.
Background: There are enough small exceptions to how the paths to the docs for each
library are generated, so the easiest thing to do is to access the list of libraries
for a particular release, scrape the url paths to their docs, and save those to the
database.
"""
try:
version = Version.objects.get(pk=version_pk)
except Version.DoesNotExist:
raise
base_path = f"doc/libs/{version.boost_url_slug}/libs/"
key = f"{base_path}libraries.htm"
result = get_content_from_s3(key)
if not result:
raise ValueError(f"Could not get content from S3 for {key}")
content = result["content"]
library_tags = get_library_documentation_urls(content)
library_versions = LibraryVersion.objects.filter(version=version)
for library_name, url_path in library_tags:
try:
# In most cases, the name matches close enough to get the correct object
library_version = library_versions.get(library__name__iexact=library_name)
library_version.documentation_url = f"/{base_path}{url_path}"
library_version.save()
except LibraryVersion.DoesNotExist:
logger.info(
"get_library_version_documentation_urls_version_does_not_exist",
library_name=library_name,
version_slug=version.slug,
)
continue
except LibraryVersion.MultipleObjectsReturned:
logger.info(
"get_library_version_documentation_urls_multiple_objects_returned",
library_name=library_name,
version_slug=version.slug,
)
continue
@app.task
def update_libraries(update_all=False):
"""Update local libraries from GitHub Boost libraries.
Use the LibraryUpdater, which retrieves the active boost libraries from the
Boost GitHub repo, to update the models with the latest information on that
library (repo) along with its issues, pull requests, and related objects
from GitHub.
"""
updater = LibraryUpdater()
if update_all:
updater.update_libraries()
logger.info("libraries_tasks_update_all_libraries_finished")
else:
since, until = get_first_last_day_last_month()
updater.update_libraries(since=since, until=until)
logger.info("libraries_tasks_update_libraries_finished")