mirror of
https://github.com/boostorg/website-v2.git
synced 2026-02-27 17:42:08 +00:00
- Add documentation_url to LibraryVersion table - Retrieve the documentation_url in the view - If the documentation_url is not present, show the Version documentation_url - Hide the "https://" in the template - Add Celery task to retrieve the documentation_urls from the libraries page in S3 and match them to import_library_versions - Add HTML helper function to extract the libraries and docs URLs from the HTML - Add one-time command to import docs urls - Load the documentation_urls in the command that loads all library-versions - Add a min-release option to the command that loads library-versions - Document the new command and changes to the existing command
94 lines
3.3 KiB
Python
94 lines
3.3 KiB
Python
import structlog
|
|
from celery.schedules import crontab
|
|
|
|
from config.celery import app
|
|
from core.boostrenderer import get_content_from_s3
|
|
from core.htmlhelper import get_library_documentation_urls
|
|
from libraries.github import LibraryUpdater
|
|
from libraries.models import LibraryVersion
|
|
from libraries.utils import get_first_last_day_last_month
|
|
from versions.models import Version
|
|
|
|
|
|
logger = structlog.getLogger(__name__)
|
|
|
|
|
|
@app.task
|
|
def get_and_store_library_version_documentation_urls_for_version(version_pk):
|
|
"""
|
|
Store the url paths to the documentation for each library in a given version.
|
|
The url paths are retrieved from the `libraries.htm` file in the docs stored in
|
|
S3 for the given version.
|
|
|
|
Retrieve the libraries from the "Libraries Listed Alphabetically" section of the
|
|
HTML file. Loop through the unordered list of libraries and save the url path
|
|
to the docs for that library to the database.
|
|
|
|
Background: There are enough small exceptions to how the paths to the docs for each
|
|
library are generated, so the easiest thing to do is to access the list of libraries
|
|
for a particular release, scrape the url paths to their docs, and save those to the
|
|
database.
|
|
"""
|
|
try:
|
|
version = Version.objects.get(pk=version_pk)
|
|
except Version.DoesNotExist:
|
|
raise
|
|
|
|
base_path = f"doc/libs/{version.boost_url_slug}/libs/"
|
|
key = f"{base_path}libraries.htm"
|
|
result = get_content_from_s3(key)
|
|
|
|
if not result:
|
|
raise ValueError(f"Could not get content from S3 for {key}")
|
|
|
|
content = result["content"]
|
|
library_tags = get_library_documentation_urls(content)
|
|
library_versions = LibraryVersion.objects.filter(version=version)
|
|
|
|
for library_name, url_path in library_tags:
|
|
try:
|
|
# In most cases, the name matches close enough to get the correct object
|
|
library_version = library_versions.get(library__name__iexact=library_name)
|
|
library_version.documentation_url = f"/{base_path}{url_path}"
|
|
library_version.save()
|
|
except LibraryVersion.DoesNotExist:
|
|
logger.info(
|
|
"get_library_version_documentation_urls_version_does_not_exist",
|
|
library_name=library_name,
|
|
version_slug=version.slug,
|
|
)
|
|
continue
|
|
except LibraryVersion.MultipleObjectsReturned:
|
|
logger.info(
|
|
"get_library_version_documentation_urls_multiple_objects_returned",
|
|
library_name=library_name,
|
|
version_slug=version.slug,
|
|
)
|
|
continue
|
|
|
|
|
|
@app.on_after_configure.connect
|
|
def setup_periodic_tasks(sender, **kwargs):
|
|
# Executes every 5th of the month at 7:30 a.m.
|
|
sender.add_periodic_task(
|
|
crontab(hour=7, minute=30, day_of_month=5),
|
|
update_libraries.s(),
|
|
)
|
|
|
|
|
|
@app.task
|
|
def update_libraries():
|
|
"""Update local libraries from GitHub Boost libraries.
|
|
|
|
Use the LibraryUpdater, which retrieves the active boost libraries from the
|
|
Boost GitHub repo, to update the models with the latest information on that
|
|
library (repo) along with its issues, pull requests, and related objects
|
|
from GitHub.
|
|
|
|
"""
|
|
since, until = get_first_last_day_last_month()
|
|
updater = LibraryUpdater()
|
|
updater.update_libraries(since=since, until=until)
|
|
|
|
logger.info("libraries_tasks_update_libraries_finished")
|