diff --git a/.github/workflows/dependency_report.yml b/.github/workflows/dependency_report.yml new file mode 100644 index 00000000..163b9dec --- /dev/null +++ b/.github/workflows/dependency_report.yml @@ -0,0 +1,49 @@ +name: Dependency-Report-Artifact + +on: + push: + branches: [ master ] + workflow_dispatch: # Allows manual triggering + +jobs: + create-boost-dep-artifact: + runs-on: ubuntu-latest + steps: + - name: fetch get_deps script + run: | + echo "fetching get_deps.sh with curl" + curl -L https://github.com/${GITHUB_REPOSITORY}/blob/${GITHUB_SHA}/.github/workflows/scripts/get_deps.sh?raw=true -o ./get_deps.sh + echo "allow execution" + chmod +x ./get_deps.sh + + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 # fetch all history for all branches and tags + repository: boostorg/boost + path: boost + submodules: true + fetch-tags: true + + - name: build boostdep + run: | + cd "$GITHUB_WORKSPACE/boost" + echo "updating submodules" + git submodule update --init --force + echo "running bootstrap.sh" + ./bootstrap.sh + echo "running build" + ./b2 tools/boostdep/build + + - name: get_deps + run: | + cd "$GITHUB_WORKSPACE/boost" + echo "Running $GITHUB_WORKSPACE/get_deps.sh from $PWD" + $GITHUB_WORKSPACE/get_deps.sh "$GITHUB_WORKSPACE/output.txt" + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: boost-dep-artifact + path: output.txt + retention-days: 90 diff --git a/.github/workflows/scripts/get_deps.sh b/.github/workflows/scripts/get_deps.sh new file mode 100644 index 00000000..dafc31eb --- /dev/null +++ b/.github/workflows/scripts/get_deps.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Expected to be run from within the boost super-repo +# Loop through all the boost-x.x.0 tags and run `dist/bin/boostdep --list-dependencies --track-sources` on it. +# Append output to a file given by the user from the command line +# +# Ex: get_deps.sh /tmp/output.txt + +# Exit on any error +set -e + +# Exit on undefined variable +set -u + +# Print commands as they execute (debug mode) +set -x + +# Exit on pipe failures +set -o pipefail + +output_file=$1 +echo "Creating $output_file" +> $output_file +# Loop through all tags of the form "boost-x.x.0" +for tag in $(git tag | grep -E 'boost-[0-9]+\.[0-9]+\.0$'); do + git checkout $tag --force + git submodule update --init --force + git clean -dff -e dist # -d recurses through directories, -ff 2 f's to delete files in submodules, do not delete boostdep executable + echo "Dependencies for version $tag" | tee -a $output_file + dist/bin/boostdep --list-dependencies --track-sources | tee -a $output_file || continue +done diff --git a/config/celery.py b/config/celery.py index 73f48620..8c01f932 100644 --- a/config/celery.py +++ b/config/celery.py @@ -33,6 +33,12 @@ def setup_periodic_tasks(sender, **kwargs): app.signature("libraries.tasks.update_libraries"), ) + # Update libraryversion dependencies. Executes daily at 8:05 AM + sender.add_periodic_task( + crontab(hour=8, minute=5), + app.signature("libraries.tasks.update_library_version_dependencies"), + ) + # Clear the static content database cache. Executes daily at 4:05 AM. sender.add_periodic_task( crontab(hour=4, minute=5), diff --git a/core/githubhelper.py b/core/githubhelper.py index 1547ebe3..85a13635 100644 --- a/core/githubhelper.py +++ b/core/githubhelper.py @@ -6,6 +6,8 @@ from datetime import datetime from socket import gaierror import time from urllib.error import URLError +from io import BytesIO +from zipfile import ZipFile import requests import structlog @@ -36,7 +38,8 @@ class GithubAPIClient: :param ref: str, the Git reference :param repo_slug: str, the repository slug """ - self.api = self.initialize_api(token=token) + self.token = token or os.environ.get("GITHUB_TOKEN", None) + self.api = self.initialize_api() self.owner = owner self.ref = ref self.repo_slug = repo_slug @@ -60,15 +63,13 @@ class GithubAPIClient: "more", ] - def initialize_api(self, token=None) -> GhApi: + def initialize_api(self) -> GhApi: """ Initialize the GitHub API with the token from the environment variable. :return: GhApi, the GitHub API """ - if token is None: - token = os.environ.get("GITHUB_TOKEN", None) - return GhApi(token=token) + return GhApi(token=self.token) def with_retry(self, fn, retry_count=5): count = 0 @@ -491,6 +492,44 @@ class GithubAPIClient: """Return the response from GitHub's /users/{username}/""" return self.api.users.get_by_username(username=username) + def get_artifacts(self, owner="", repo_slug="", name=None): + """Return a list of artifacts from the GH api. + + Filter results by the name of the artifact by supplying name. + """ + owner = owner or self.owner + repo_slug = repo_slug or self.repo_slug + url = f"https://api.github.com/repos/{owner}/{repo_slug}/actions/artifacts" + params = {} + if name: + params["name"] = name + headers = {"accept": "application/vnd.github+json"} + response = requests.get(url, params=params, headers=headers) + if response.status_code != 200: + logger.error( + "Error while fetching artifacts.", status_code=response.status_code + ) + return + data = response.json() + return data + + def get_artifact_content(self, url): + resp = requests.get( + url, + headers={ + "Authorization": f"Bearer {self.token}", + "accept": "application/vnd.github+json", + }, + ) + if resp.status_code != 200: + logger.error( + "Error while fetching artifact file.", status_code=resp.status_code + ) + return "" + myzip = ZipFile(BytesIO(resp.content)) + with myzip.open(myzip.filelist[0]) as f: + return f.read().decode() + class GithubDataParser: def get_commits_per_month(self, commits: list[dict]): diff --git a/docs/commands.md b/docs/commands.md index 562936a2..bb4be53e 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -13,6 +13,8 @@ - [`import_commits`](#import_commits) - [`update_issues`](#update_issues) - [`import_beta_release`](#import_beta_release) + - [`sync_mailinglist_stats`](#sync_mailinglist_stats) + - [`update_library_version_dependencies`](#update_library_version_dependencies) ## `boost_setup` @@ -279,3 +281,22 @@ If both the `--release` and the `--library-name` are passed, the command will lo | Options | Format | Description | |----------------------|--------|--------------------------------------------------------------| | `--clean` | bool | If passed, all existing beta EmailData records will be deleted before running the sync. | + + +## `update_library_version_dependencies` + +**Purpose**: Read a boostdep report text file uploaded as an artifact from a github action and update dependencies for LibraryVersion models. + +**Example** + +```bash +./manage.py update_library_version_dependencies +``` + +**Options** + +| Options | Format | Description | +|----------------------|--------|--------------------------------------------------------------| +| `--token` | string | Pass a GitHub API token. If not passed, will use the value in `settings.GITHUB_TOKEN`. | +| `--clean` | bool | If passed, existing dependencies in the M2M will be cleared before reinserting them. | +| `--owner` | string | The repo owner. Defaults to "boostorg", which is correct in most cases but can be useful to specify for testing. | diff --git a/libraries/admin.py b/libraries/admin.py index e79bc0b1..0252cccf 100644 --- a/libraries/admin.py +++ b/libraries/admin.py @@ -337,7 +337,7 @@ class LibraryVersionAdmin(admin.ModelAdmin): ordering = ["library__name", "-version__name"] search_fields = ["library__name", "version__name"] change_list_template = "admin/libraryversion_change_list.html" - autocomplete_fields = ["authors", "maintainers"] + autocomplete_fields = ["authors", "maintainers", "dependencies"] def get_urls(self): urls = super().get_urls() diff --git a/libraries/github.py b/libraries/github.py index 3fe6211e..10c171d5 100644 --- a/libraries/github.py +++ b/libraries/github.py @@ -30,7 +30,7 @@ from .models import ( ) from core.githubhelper import GithubAPIClient, GithubDataParser -from .utils import generate_fake_email, parse_date +from .utils import generate_fake_email, parse_boostdep_artifact, parse_date logger = structlog.get_logger() @@ -172,8 +172,7 @@ class LibraryUpdater: """ def __init__(self, client=None, token=None): - self.client = client or GithubAPIClient() - self.api = self.client.initialize_api(token=token) + self.client = client or GithubAPIClient(token=token) self.parser = GithubDataParser() self.logger = structlog.get_logger() @@ -587,3 +586,46 @@ class LibraryUpdater: if gh_author["html_url"]: author.github_profile_url = gh_author["html_url"] author.save(update_fields=["avatar_url", "github_profile_url"]) + + def fetch_most_recent_boost_dep_artifact_content(self, owner=""): + # get artifacts with the name "boost-dep-artifact" + artifacts = self.client.get_artifacts( + owner=owner, + repo_slug="website-v2", + name="boost-dep-artifact", + ) + if not artifacts or not artifacts.get("artifacts", None): + logger.warning("No artifacts found.") + return + # get the most recent artifact + artifact = artifacts["artifacts"][0] + if artifact["expired"]: + logger.error("The most recent boost-dep-artifact is expired.") + return + return self.client.get_artifact_content(artifact["archive_download_url"]) + + def update_library_version_dependencies(self, owner="", clean=False): + """Update LibraryVersion dependencies M2M via a github action artifact. + + owner: The repo owner. Defaults to `boostorg` in self.client. + clean: Clear the M2M before adding dependencies. + + """ + saved_dependencies = 0 + saved_library_versions = 0 + + content = self.fetch_most_recent_boost_dep_artifact_content(owner=owner) + if not content: + return + for library_version, dependencies in parse_boostdep_artifact(content): + if clean: + library_version.dependencies.set(dependencies, clear=True) + else: + library_version.dependencies.add(*dependencies) + saved_library_versions += 1 + saved_dependencies += len(dependencies) + logger.info( + "update_library_version_dependencies finished", + saved_dependencies=saved_dependencies, + saved_library_versions=saved_library_versions, + ) diff --git a/libraries/management/commands/update_library_version_dependencies.py b/libraries/management/commands/update_library_version_dependencies.py new file mode 100644 index 00000000..0733252a --- /dev/null +++ b/libraries/management/commands/update_library_version_dependencies.py @@ -0,0 +1,21 @@ +import djclick as click + +from libraries.github import LibraryUpdater + + +@click.command() +@click.option("--token", is_flag=False, help="Github API token") +@click.option( + "--owner", + is_flag=False, + help="The repo owner of the artifact to be parsed for dependencies.", +) +@click.option("--clean", is_flag=True, help="Remove dependencies before adding them.") +def command(token=None, owner="", clean=False): + click.secho( + "Attempting to update library version dependencies", + fg="green", + ) + updater = LibraryUpdater(token=token) + updater.update_library_version_dependencies(owner=owner, clean=clean) + click.secho("Finished importing libraries.", fg="green") diff --git a/libraries/migrations/0027_libraryversion_dependencies.py b/libraries/migrations/0027_libraryversion_dependencies.py new file mode 100644 index 00000000..ce3343b3 --- /dev/null +++ b/libraries/migrations/0027_libraryversion_dependencies.py @@ -0,0 +1,20 @@ +# Generated by Django 4.2.16 on 2024-12-05 07:06 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("libraries", "0026_libraryversion_cpp_standard_minimum_and_description"), + ] + + operations = [ + migrations.AddField( + model_name="libraryversion", + name="dependencies", + field=models.ManyToManyField( + blank=True, related_name="dependents", to="libraries.library" + ), + ), + ] diff --git a/libraries/models.py b/libraries/models.py index 9258ce5a..c04c8b8e 100644 --- a/libraries/models.py +++ b/libraries/models.py @@ -378,6 +378,12 @@ class LibraryVersion(models.Model): deletions = models.IntegerField(default=0) files_changed = models.IntegerField(default=0) cpp_standard_minimum = models.CharField(max_length=50, blank=True, null=True) + dependencies = models.ManyToManyField( + "libraries.Library", + symmetrical=False, + related_name="dependents", + blank=True, + ) def __str__(self): return f"{self.library.name} ({self.version.name})" diff --git a/libraries/tasks.py b/libraries/tasks.py index d49f99de..9762d917 100644 --- a/libraries/tasks.py +++ b/libraries/tasks.py @@ -227,3 +227,11 @@ def generate_library_report(params): """Generate a library report asynchronously and save it in RenderedContent.""" form = CreateReportFullForm(params) form.cache_html() + + +@app.task +def update_library_version_dependencies(token=None): + command = ["update_library_version_dependencies"] + if token: + command.extend(["--token", token]) + call_command(*command) diff --git a/libraries/tests/fixtures.py b/libraries/tests/fixtures.py index 0b62a8a3..7d82fc59 100644 --- a/libraries/tests/fixtures.py +++ b/libraries/tests/fixtures.py @@ -1,6 +1,7 @@ import pytest from fastcore.xtras import dict2obj from model_bakery import baker +from textwrap import dedent @pytest.fixture @@ -226,3 +227,22 @@ def github_library(): "maintainers": ["Tester Testerson "], "cxxstd": "03", } + + +@pytest.fixture +def github_action_boostdep_output_artifact(): + """ + A shortened example of the content in the artifact output by the + dependency_report.yaml action + """ + return dedent( + """\ + Dependencies for version boost-1.33.0 + Dependencies for version boost-1.34.0 + Dependencies for version boost-1.35.0 + algorithm -> concept_check config detail logic numeric~conversion + Dependencies for version boost-1.85.0 + algorithm -> array assert bind concept_check config core + numeric~conversion -> array + callable_traits ->""" + ) diff --git a/libraries/tests/test_github.py b/libraries/tests/test_github.py index 142e652c..a0985b78 100644 --- a/libraries/tests/test_github.py +++ b/libraries/tests/test_github.py @@ -6,7 +6,7 @@ from model_bakery import baker from libraries.github import LibraryUpdater from core.githubhelper import GithubAPIClient -from libraries.models import Category, Issue, Library, PullRequest +from libraries.models import Category, Issue, Library, LibraryVersion, PullRequest @pytest.fixture @@ -326,3 +326,63 @@ def test_update_prs_existing( # Test that the existing PR updated pull.refresh_from_db() assert pull.title == existing_pr_data.title + + +def test_parse_boostdep_artifact( + github_action_boostdep_output_artifact, library_updater +): + """Test that the GH artifact format can be parsed and dependencies are created.""" + baker.make( + "libraries.LibraryVersion", + library__key="algorithm", + version__name="boost-1.35.0", + ) + baker.make( + "libraries.LibraryVersion", + library__key="callable_traits", + version__name="boost-1.85.0", + ) + baker.make( + "libraries.LibraryVersion", + library__key="algorithm", + version__name="boost-1.85.0", + ) + baker.make( + "libraries.LibraryVersion", + library__key="numeric/conversion", + version__name="boost-1.85.0", + ) + deps = [ + "concept_check", + "config", + "detail", + "array", + "assert", + "bind", + "core", + "logic/tribool", + "numeric/conversion", + ] + for key in deps: + baker.make("libraries.Library", key=key) + library_updater.fetch_most_recent_boost_dep_artifact_content = MagicMock( + return_value=github_action_boostdep_output_artifact + ) + library_updater.update_library_version_dependencies() + lv = LibraryVersion.objects.get( + library__key="algorithm", version__name="boost-1.35.0" + ) + assert lv.dependencies.count() == 5 + lv = LibraryVersion.objects.get( + library__key="algorithm", version__name="boost-1.85.0" + ) + assert lv.dependencies.count() == 6 + # callable traits is in the file but has no dependencies + lv = LibraryVersion.objects.get( + library__key="callable_traits", version__name="boost-1.85.0" + ) + assert lv.dependencies.count() == 0 + lv = LibraryVersion.objects.get( + library__key="numeric/conversion", version__name="boost-1.85.0" + ) + assert lv.dependencies.count() == 1 diff --git a/libraries/utils.py b/libraries/utils.py index 56b57e16..ab73de53 100644 --- a/libraries/utils.py +++ b/libraries/utils.py @@ -213,3 +213,77 @@ def legacy_path_transform(content_path): if content_path and content_path.startswith(LEGACY_LATEST_RELEASE_URL_PATH_STR): content_path = re.sub(r"([a-zA-Z0-9\.]+)/(\S+)", r"latest/\2", content_path) return content_path + + +def parse_boostdep_artifact(content: str): + """Parse and return a generator which yields libraries and their dependencies. + + - `content` is a string of the artifact content given by the dependency_report + GH action. + - Iterate through the file and yield a tuple of + (library_version: LibraryVersion, dependencies: list[Library]) + - Some library keys in the output do not match the names in our database exactly, + so transform names when necessary + - The boost database may not contain every library version found in this file, + if we find a definition of dependencies for a library version we are not + tracking, ignore it and continue to the next line. + - example content can be found in + libraries/tests/fixtures.py -> github_action_boostdep_output_artifact + + """ + from libraries.models import Library, LibraryVersion + + libraries = {x.key: x for x in Library.objects.all()} + # these libraries do not exist in the DB, ignore them. + ignore_libraries = ["disjoint_sets", "tr1"] + + def fix_library_key(name): + """Transforms library key in boostdep report to match our library keys""" + if name == "logic": + return "logic/tribool" + return name.replace("~", "/") + + def parse_line(line: str): + parts = line.split("->") + if len(parts) == 2: + library_key, dependencies_string = [x.strip() for x in parts] + library_key = fix_library_key(library_key) + dependency_names = [fix_library_key(x) for x in dependencies_string.split()] + dependencies = [ + libraries[x] for x in dependency_names if x not in ignore_libraries + ] + else: + library_key = fix_library_key(parts[0].strip()) + dependencies = [] + return library_key, dependencies + + library_versions = {} + version_name = "" + skipped_library_versions = 0 + for line in content.splitlines(): + # each section is headed with 'Dependencies for version boost-x.x.0' + if line.startswith("Dependencies for version"): + version_name = line.split()[-1] + library_versions = { + x.library.key: x + for x in LibraryVersion.objects.filter( + version__name=version_name + ).select_related("library") + } + else: + library_key, dependencies = parse_line(line) + if library_key in ignore_libraries: + continue + library_version = library_versions.get(library_key, None) + if not library_version: + skipped_library_versions += 1 + logger.info( + f"LibraryVersion with {library_key=} {version_name=} not found." + ) + continue + yield library_version, dependencies + if skipped_library_versions: + logger.info( + "Some library versions were skipped during artifact parsing.", + skipped_library_versions=skipped_library_versions, + )