Add functionality to track LibraryVersion dependencies (#1524)

This commit is contained in:
Brian Perrett
2024-12-16 11:28:33 -08:00
committed by GitHub
parent 223b167032
commit 2981976b80
14 changed files with 406 additions and 10 deletions

49
.github/workflows/dependency_report.yml vendored Normal file
View File

@@ -0,0 +1,49 @@
name: Dependency-Report-Artifact
on:
push:
branches: [ master ]
workflow_dispatch: # Allows manual triggering
jobs:
create-boost-dep-artifact:
runs-on: ubuntu-latest
steps:
- name: fetch get_deps script
run: |
echo "fetching get_deps.sh with curl"
curl -L https://github.com/${GITHUB_REPOSITORY}/blob/${GITHUB_SHA}/.github/workflows/scripts/get_deps.sh?raw=true -o ./get_deps.sh
echo "allow execution"
chmod +x ./get_deps.sh
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # fetch all history for all branches and tags
repository: boostorg/boost
path: boost
submodules: true
fetch-tags: true
- name: build boostdep
run: |
cd "$GITHUB_WORKSPACE/boost"
echo "updating submodules"
git submodule update --init --force
echo "running bootstrap.sh"
./bootstrap.sh
echo "running build"
./b2 tools/boostdep/build
- name: get_deps
run: |
cd "$GITHUB_WORKSPACE/boost"
echo "Running $GITHUB_WORKSPACE/get_deps.sh from $PWD"
$GITHUB_WORKSPACE/get_deps.sh "$GITHUB_WORKSPACE/output.txt"
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: boost-dep-artifact
path: output.txt
retention-days: 90

30
.github/workflows/scripts/get_deps.sh vendored Normal file
View File

@@ -0,0 +1,30 @@
#!/bin/bash
# Expected to be run from within the boost super-repo
# Loop through all the boost-x.x.0 tags and run `dist/bin/boostdep --list-dependencies --track-sources` on it.
# Append output to a file given by the user from the command line
#
# Ex: get_deps.sh /tmp/output.txt
# Exit on any error
set -e
# Exit on undefined variable
set -u
# Print commands as they execute (debug mode)
set -x
# Exit on pipe failures
set -o pipefail
output_file=$1
echo "Creating $output_file"
> $output_file
# Loop through all tags of the form "boost-x.x.0"
for tag in $(git tag | grep -E 'boost-[0-9]+\.[0-9]+\.0$'); do
git checkout $tag --force
git submodule update --init --force
git clean -dff -e dist # -d recurses through directories, -ff 2 f's to delete files in submodules, do not delete boostdep executable
echo "Dependencies for version $tag" | tee -a $output_file
dist/bin/boostdep --list-dependencies --track-sources | tee -a $output_file || continue
done

View File

@@ -33,6 +33,12 @@ def setup_periodic_tasks(sender, **kwargs):
app.signature("libraries.tasks.update_libraries"),
)
# Update libraryversion dependencies. Executes daily at 8:05 AM
sender.add_periodic_task(
crontab(hour=8, minute=5),
app.signature("libraries.tasks.update_library_version_dependencies"),
)
# Clear the static content database cache. Executes daily at 4:05 AM.
sender.add_periodic_task(
crontab(hour=4, minute=5),

View File

@@ -6,6 +6,8 @@ from datetime import datetime
from socket import gaierror
import time
from urllib.error import URLError
from io import BytesIO
from zipfile import ZipFile
import requests
import structlog
@@ -36,7 +38,8 @@ class GithubAPIClient:
:param ref: str, the Git reference
:param repo_slug: str, the repository slug
"""
self.api = self.initialize_api(token=token)
self.token = token or os.environ.get("GITHUB_TOKEN", None)
self.api = self.initialize_api()
self.owner = owner
self.ref = ref
self.repo_slug = repo_slug
@@ -60,15 +63,13 @@ class GithubAPIClient:
"more",
]
def initialize_api(self, token=None) -> GhApi:
def initialize_api(self) -> GhApi:
"""
Initialize the GitHub API with the token from the environment variable.
:return: GhApi, the GitHub API
"""
if token is None:
token = os.environ.get("GITHUB_TOKEN", None)
return GhApi(token=token)
return GhApi(token=self.token)
def with_retry(self, fn, retry_count=5):
count = 0
@@ -491,6 +492,44 @@ class GithubAPIClient:
"""Return the response from GitHub's /users/{username}/"""
return self.api.users.get_by_username(username=username)
def get_artifacts(self, owner="", repo_slug="", name=None):
"""Return a list of artifacts from the GH api.
Filter results by the name of the artifact by supplying name.
"""
owner = owner or self.owner
repo_slug = repo_slug or self.repo_slug
url = f"https://api.github.com/repos/{owner}/{repo_slug}/actions/artifacts"
params = {}
if name:
params["name"] = name
headers = {"accept": "application/vnd.github+json"}
response = requests.get(url, params=params, headers=headers)
if response.status_code != 200:
logger.error(
"Error while fetching artifacts.", status_code=response.status_code
)
return
data = response.json()
return data
def get_artifact_content(self, url):
resp = requests.get(
url,
headers={
"Authorization": f"Bearer {self.token}",
"accept": "application/vnd.github+json",
},
)
if resp.status_code != 200:
logger.error(
"Error while fetching artifact file.", status_code=resp.status_code
)
return ""
myzip = ZipFile(BytesIO(resp.content))
with myzip.open(myzip.filelist[0]) as f:
return f.read().decode()
class GithubDataParser:
def get_commits_per_month(self, commits: list[dict]):

View File

@@ -13,6 +13,8 @@
- [`import_commits`](#import_commits)
- [`update_issues`](#update_issues)
- [`import_beta_release`](#import_beta_release)
- [`sync_mailinglist_stats`](#sync_mailinglist_stats)
- [`update_library_version_dependencies`](#update_library_version_dependencies)
## `boost_setup`
@@ -279,3 +281,22 @@ If both the `--release` and the `--library-name` are passed, the command will lo
| Options | Format | Description |
|----------------------|--------|--------------------------------------------------------------|
| `--clean` | bool | If passed, all existing beta EmailData records will be deleted before running the sync. |
## `update_library_version_dependencies`
**Purpose**: Read a boostdep report text file uploaded as an artifact from a github action and update dependencies for LibraryVersion models.
**Example**
```bash
./manage.py update_library_version_dependencies
```
**Options**
| Options | Format | Description |
|----------------------|--------|--------------------------------------------------------------|
| `--token` | string | Pass a GitHub API token. If not passed, will use the value in `settings.GITHUB_TOKEN`. |
| `--clean` | bool | If passed, existing dependencies in the M2M will be cleared before reinserting them. |
| `--owner` | string | The repo owner. Defaults to "boostorg", which is correct in most cases but can be useful to specify for testing. |

View File

@@ -337,7 +337,7 @@ class LibraryVersionAdmin(admin.ModelAdmin):
ordering = ["library__name", "-version__name"]
search_fields = ["library__name", "version__name"]
change_list_template = "admin/libraryversion_change_list.html"
autocomplete_fields = ["authors", "maintainers"]
autocomplete_fields = ["authors", "maintainers", "dependencies"]
def get_urls(self):
urls = super().get_urls()

View File

@@ -30,7 +30,7 @@ from .models import (
)
from core.githubhelper import GithubAPIClient, GithubDataParser
from .utils import generate_fake_email, parse_date
from .utils import generate_fake_email, parse_boostdep_artifact, parse_date
logger = structlog.get_logger()
@@ -172,8 +172,7 @@ class LibraryUpdater:
"""
def __init__(self, client=None, token=None):
self.client = client or GithubAPIClient()
self.api = self.client.initialize_api(token=token)
self.client = client or GithubAPIClient(token=token)
self.parser = GithubDataParser()
self.logger = structlog.get_logger()
@@ -587,3 +586,46 @@ class LibraryUpdater:
if gh_author["html_url"]:
author.github_profile_url = gh_author["html_url"]
author.save(update_fields=["avatar_url", "github_profile_url"])
def fetch_most_recent_boost_dep_artifact_content(self, owner=""):
# get artifacts with the name "boost-dep-artifact"
artifacts = self.client.get_artifacts(
owner=owner,
repo_slug="website-v2",
name="boost-dep-artifact",
)
if not artifacts or not artifacts.get("artifacts", None):
logger.warning("No artifacts found.")
return
# get the most recent artifact
artifact = artifacts["artifacts"][0]
if artifact["expired"]:
logger.error("The most recent boost-dep-artifact is expired.")
return
return self.client.get_artifact_content(artifact["archive_download_url"])
def update_library_version_dependencies(self, owner="", clean=False):
"""Update LibraryVersion dependencies M2M via a github action artifact.
owner: The repo owner. Defaults to `boostorg` in self.client.
clean: Clear the M2M before adding dependencies.
"""
saved_dependencies = 0
saved_library_versions = 0
content = self.fetch_most_recent_boost_dep_artifact_content(owner=owner)
if not content:
return
for library_version, dependencies in parse_boostdep_artifact(content):
if clean:
library_version.dependencies.set(dependencies, clear=True)
else:
library_version.dependencies.add(*dependencies)
saved_library_versions += 1
saved_dependencies += len(dependencies)
logger.info(
"update_library_version_dependencies finished",
saved_dependencies=saved_dependencies,
saved_library_versions=saved_library_versions,
)

View File

@@ -0,0 +1,21 @@
import djclick as click
from libraries.github import LibraryUpdater
@click.command()
@click.option("--token", is_flag=False, help="Github API token")
@click.option(
"--owner",
is_flag=False,
help="The repo owner of the artifact to be parsed for dependencies.",
)
@click.option("--clean", is_flag=True, help="Remove dependencies before adding them.")
def command(token=None, owner="", clean=False):
click.secho(
"Attempting to update library version dependencies",
fg="green",
)
updater = LibraryUpdater(token=token)
updater.update_library_version_dependencies(owner=owner, clean=clean)
click.secho("Finished importing libraries.", fg="green")

View File

@@ -0,0 +1,20 @@
# Generated by Django 4.2.16 on 2024-12-05 07:06
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("libraries", "0026_libraryversion_cpp_standard_minimum_and_description"),
]
operations = [
migrations.AddField(
model_name="libraryversion",
name="dependencies",
field=models.ManyToManyField(
blank=True, related_name="dependents", to="libraries.library"
),
),
]

View File

@@ -378,6 +378,12 @@ class LibraryVersion(models.Model):
deletions = models.IntegerField(default=0)
files_changed = models.IntegerField(default=0)
cpp_standard_minimum = models.CharField(max_length=50, blank=True, null=True)
dependencies = models.ManyToManyField(
"libraries.Library",
symmetrical=False,
related_name="dependents",
blank=True,
)
def __str__(self):
return f"{self.library.name} ({self.version.name})"

View File

@@ -227,3 +227,11 @@ def generate_library_report(params):
"""Generate a library report asynchronously and save it in RenderedContent."""
form = CreateReportFullForm(params)
form.cache_html()
@app.task
def update_library_version_dependencies(token=None):
command = ["update_library_version_dependencies"]
if token:
command.extend(["--token", token])
call_command(*command)

View File

@@ -1,6 +1,7 @@
import pytest
from fastcore.xtras import dict2obj
from model_bakery import baker
from textwrap import dedent
@pytest.fixture
@@ -226,3 +227,22 @@ def github_library():
"maintainers": ["Tester Testerson <tester -at- example.com>"],
"cxxstd": "03",
}
@pytest.fixture
def github_action_boostdep_output_artifact():
"""
A shortened example of the content in the artifact output by the
dependency_report.yaml action
"""
return dedent(
"""\
Dependencies for version boost-1.33.0
Dependencies for version boost-1.34.0
Dependencies for version boost-1.35.0
algorithm -> concept_check config detail logic numeric~conversion
Dependencies for version boost-1.85.0
algorithm -> array assert bind concept_check config core
numeric~conversion -> array
callable_traits ->"""
)

View File

@@ -6,7 +6,7 @@ from model_bakery import baker
from libraries.github import LibraryUpdater
from core.githubhelper import GithubAPIClient
from libraries.models import Category, Issue, Library, PullRequest
from libraries.models import Category, Issue, Library, LibraryVersion, PullRequest
@pytest.fixture
@@ -326,3 +326,63 @@ def test_update_prs_existing(
# Test that the existing PR updated
pull.refresh_from_db()
assert pull.title == existing_pr_data.title
def test_parse_boostdep_artifact(
github_action_boostdep_output_artifact, library_updater
):
"""Test that the GH artifact format can be parsed and dependencies are created."""
baker.make(
"libraries.LibraryVersion",
library__key="algorithm",
version__name="boost-1.35.0",
)
baker.make(
"libraries.LibraryVersion",
library__key="callable_traits",
version__name="boost-1.85.0",
)
baker.make(
"libraries.LibraryVersion",
library__key="algorithm",
version__name="boost-1.85.0",
)
baker.make(
"libraries.LibraryVersion",
library__key="numeric/conversion",
version__name="boost-1.85.0",
)
deps = [
"concept_check",
"config",
"detail",
"array",
"assert",
"bind",
"core",
"logic/tribool",
"numeric/conversion",
]
for key in deps:
baker.make("libraries.Library", key=key)
library_updater.fetch_most_recent_boost_dep_artifact_content = MagicMock(
return_value=github_action_boostdep_output_artifact
)
library_updater.update_library_version_dependencies()
lv = LibraryVersion.objects.get(
library__key="algorithm", version__name="boost-1.35.0"
)
assert lv.dependencies.count() == 5
lv = LibraryVersion.objects.get(
library__key="algorithm", version__name="boost-1.85.0"
)
assert lv.dependencies.count() == 6
# callable traits is in the file but has no dependencies
lv = LibraryVersion.objects.get(
library__key="callable_traits", version__name="boost-1.85.0"
)
assert lv.dependencies.count() == 0
lv = LibraryVersion.objects.get(
library__key="numeric/conversion", version__name="boost-1.85.0"
)
assert lv.dependencies.count() == 1

View File

@@ -213,3 +213,77 @@ def legacy_path_transform(content_path):
if content_path and content_path.startswith(LEGACY_LATEST_RELEASE_URL_PATH_STR):
content_path = re.sub(r"([a-zA-Z0-9\.]+)/(\S+)", r"latest/\2", content_path)
return content_path
def parse_boostdep_artifact(content: str):
"""Parse and return a generator which yields libraries and their dependencies.
- `content` is a string of the artifact content given by the dependency_report
GH action.
- Iterate through the file and yield a tuple of
(library_version: LibraryVersion, dependencies: list[Library])
- Some library keys in the output do not match the names in our database exactly,
so transform names when necessary
- The boost database may not contain every library version found in this file,
if we find a definition of dependencies for a library version we are not
tracking, ignore it and continue to the next line.
- example content can be found in
libraries/tests/fixtures.py -> github_action_boostdep_output_artifact
"""
from libraries.models import Library, LibraryVersion
libraries = {x.key: x for x in Library.objects.all()}
# these libraries do not exist in the DB, ignore them.
ignore_libraries = ["disjoint_sets", "tr1"]
def fix_library_key(name):
"""Transforms library key in boostdep report to match our library keys"""
if name == "logic":
return "logic/tribool"
return name.replace("~", "/")
def parse_line(line: str):
parts = line.split("->")
if len(parts) == 2:
library_key, dependencies_string = [x.strip() for x in parts]
library_key = fix_library_key(library_key)
dependency_names = [fix_library_key(x) for x in dependencies_string.split()]
dependencies = [
libraries[x] for x in dependency_names if x not in ignore_libraries
]
else:
library_key = fix_library_key(parts[0].strip())
dependencies = []
return library_key, dependencies
library_versions = {}
version_name = ""
skipped_library_versions = 0
for line in content.splitlines():
# each section is headed with 'Dependencies for version boost-x.x.0'
if line.startswith("Dependencies for version"):
version_name = line.split()[-1]
library_versions = {
x.library.key: x
for x in LibraryVersion.objects.filter(
version__name=version_name
).select_related("library")
}
else:
library_key, dependencies = parse_line(line)
if library_key in ignore_libraries:
continue
library_version = library_versions.get(library_key, None)
if not library_version:
skipped_library_versions += 1
logger.info(
f"LibraryVersion with {library_key=} {version_name=} not found."
)
continue
yield library_version, dependencies
if skipped_library_versions:
logger.info(
"Some library versions were skipped during artifact parsing.",
skipped_library_versions=skipped_library_versions,
)