Files
website-v2/libraries/tasks.py
2023-11-08 16:31:01 -08:00

95 lines
3.5 KiB
Python

import structlog
from celery.schedules import crontab
from config.celery import app
from core.boostrenderer import get_content_from_s3
from core.htmlhelper import get_library_documentation_urls
from libraries.github import LibraryUpdater
from libraries.models import LibraryVersion
from libraries.utils import get_first_last_day_last_month
from versions.models import Version
logger = structlog.getLogger(__name__)
@app.task
def get_and_store_library_version_documentation_urls_for_version(version_pk):
"""
Store the url paths to the documentation for each library in a given version.
The url paths are retrieved from the `libraries.htm` file in the docs stored in
S3 for the given version.
Retrieve the libraries from the "Libraries Listed Alphabetically" section of the
HTML file. Loop through the unordered list of libraries and save the url path
to the docs for that library to the database.
Background: There are enough small exceptions to how the paths to the docs for each
library are generated, so the easiest thing to do is to access the list of libraries
for a particular release, scrape the url paths to their docs, and save those to the
database.
"""
try:
version = Version.objects.get(pk=version_pk)
except Version.DoesNotExist:
raise
base_path = f"doc/libs/{version.boost_url_slug}/libs/"
key = f"{base_path}libraries.htm"
result = get_content_from_s3(key)
if not result:
raise ValueError(f"Could not get content from S3 for {key}")
content = result["content"]
library_tags = get_library_documentation_urls(content)
library_versions = LibraryVersion.objects.filter(version=version)
for library_name, url_path in library_tags:
try:
# In most cases, the name matches close enough to get the correct object
library_version = library_versions.get(library__name__iexact=library_name)
library_version.documentation_url = f"/{base_path}{url_path}"
library_version.save()
except LibraryVersion.DoesNotExist:
logger.info(
"get_library_version_documentation_urls_version_does_not_exist",
library_name=library_name,
version_slug=version.slug,
)
continue
except LibraryVersion.MultipleObjectsReturned:
logger.info(
"get_library_version_documentation_urls_multiple_objects_returned",
library_name=library_name,
version_slug=version.slug,
)
continue
@app.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
# Executes daily at 7:05 AM
sender.add_periodic_task(
crontab(hour=7, minute=5),
update_libraries.s(),
)
@app.task
def update_libraries(update_all=False):
"""Update local libraries from GitHub Boost libraries.
Use the LibraryUpdater, which retrieves the active boost libraries from the
Boost GitHub repo, to update the models with the latest information on that
library (repo) along with its issues, pull requests, and related objects
from GitHub.
"""
updater = LibraryUpdater()
if update_all:
updater.update_libraries()
logger.info("libraries_tasks_update_all_libraries_finished")
else:
since, until = get_first_last_day_last_month()
updater.update_libraries(since=since, until=until)
logger.info("libraries_tasks_update_libraries_finished")