diff --git a/docs/syncing_data_with_github.md b/docs/syncing_data_with_github.md index 2082f149..dd1886a7 100644 --- a/docs/syncing_data_with_github.md +++ b/docs/syncing_data_with_github.md @@ -21,15 +21,15 @@ Not all tags are official GitHub **Releases**, however, and this impacts where w To retrieve releases and tags, run: ```bash -./manage.py import_releases +./manage.py import_versions ``` -This will: +Note the command enqueues a celery task rather than running synchronously. The task will: -- Delete existing Versions and LibraryVersions +- Delete existing Versions and LibraryVersions if you pass `--delete-versions` to the command - Retrieve tags and releases from the Boost GitHub repo - Create new Versions for each tag and release that is not a beta or rc release -- Create a new LibraryVersion for each Library **but not for historical versions** +- Create a new LibraryVersion for each Library (including for historical versions unless you pass `--new`) ## Library data diff --git a/libraries/constants.py b/libraries/constants.py index ae30e027..0bc3a066 100644 --- a/libraries/constants.py +++ b/libraries/constants.py @@ -288,6 +288,14 @@ LIBRARY_DOCS_EXCEPTIONS = { ], } +# Mapping for duplicate library categories. If a library has one of the keys +# in this mapping as its category, the associated canonical category should be used. +# key: Duplicate category's Category.slug +# value: Category.slug of the canonical category +CATEGORY_OVERRIDES = { + "Container": "Containers", +} + # This constant is for library-version docs that we know are missing LIBRARY_DOCS_MISSING = { diff --git a/libraries/github.py b/libraries/github.py index 6014c3a3..09eaae87 100644 --- a/libraries/github.py +++ b/libraries/github.py @@ -17,6 +17,7 @@ from django.db import transaction from django.utils import dateparse, timezone from versions.models import Version +from .constants import CATEGORY_OVERRIDES from .models import ( Category, Commit, @@ -163,10 +164,7 @@ class LibraryUpdater: """ def __init__(self, client=None, token=None): - if client: - self.client = client - else: - self.client = GithubAPIClient() + self.client = client or GithubAPIClient() self.api = self.client.initialize_api(token=token) self.parser = GithubDataParser() self.logger = structlog.get_logger() @@ -279,6 +277,7 @@ class LibraryUpdater: obj.categories.clear() for cat_name in categories: + cat_name = CATEGORY_OVERRIDES.get(cat_name, cat_name) cat, _ = Category.objects.get_or_create(name=cat_name) obj.categories.add(cat) diff --git a/libraries/tests/test_github.py b/libraries/tests/test_github.py index 167d8040..142e652c 100644 --- a/libraries/tests/test_github.py +++ b/libraries/tests/test_github.py @@ -175,6 +175,14 @@ def test_update_categories(library, library_updater): assert Category.objects.filter(name="Test").exists() assert library.categories.filter(name="Test").exists() + # Ensure category overrides respected + assert Category.objects.filter(name="Containers").exists() is False + assert Category.objects.filter(name="Container").exists() is False + library_updater.update_categories(library, ["Container"]) # overridden + library.refresh_from_db() + assert Category.objects.filter(name="Containers").exists() is True + assert Category.objects.filter(name="Container").exists() is False + def test_update_issues_new( tp, library, github_api_repo_issues_response, library_updater diff --git a/versions/management/commands/import_versions.py b/versions/management/commands/import_versions.py index 0115f145..09086609 100644 --- a/versions/management/commands/import_versions.py +++ b/versions/management/commands/import_versions.py @@ -23,6 +23,8 @@ def command( verbose (bool): Enable verbose output (show logging statements) delete_versions (bool): If True, deletes all existing Version instances before importing. + new (bool): If True, only imports versions that do not already exist in + the database. token (str): Github API token, if you need to use something other than the setting. """