diff --git a/docker-compose.yml b/docker-compose.yml index 9266d071..435a9f56 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,7 @@ version: "3.3" services: db: - image: postgres:12.0 /_/postgres/12.0 + image: postgres:12.0 environment: - "POSTGRES_HOST_AUTH_METHOD=trust" networks: diff --git a/libraries/github.py b/libraries/github.py index 0739f776..c6b8c9d0 100644 --- a/libraries/github.py +++ b/libraries/github.py @@ -6,7 +6,7 @@ import requests import structlog from ghapi.all import GhApi, paged -from .models import Library +from .models import Library, Category logger = structlog.get_logger() @@ -79,7 +79,6 @@ def parse_submodules(content): url_re = re.compile(r"^\s*url\s*\=\s*\.\.\/(.*)\.git\s*$") for line in content.split("\n"): - print(line) sub_m = submodule_re.match(line) if sub_m: current_submodule = {"module": sub_m.group(1)} @@ -89,7 +88,6 @@ def parse_submodules(content): if url_m: name = url_m.group(1) current_submodule["url"] = name - print(f"FOUND: {current_submodule}") modules.append(current_submodule) current_submodule = None @@ -105,6 +103,17 @@ class LibraryUpdater: def __init__(self, owner="boostorg"): self.api = get_api() self.owner = owner + self.logger = structlog.get_logger() + + # Modules we need to skip as they are not really Boost Libraries + self.skip_modules = [ + "inspect", + "boostbook", + "bcp", + "build", + "quickbook", + "litre", + ] def get_ref(self, repo, ref): """Get a particular ref of a particular repo""" @@ -115,7 +124,11 @@ class LibraryUpdater: return self.get_ref(repo="boost", ref="heads/master") def get_library_list(self): - """Determine our list of libraries""" + """ + Determine our list of libraries from .gitmodules and sub-repo + libraries.json files + """ + # Find our latest .gitmodules r = self.get_boost_ref() tree_sha = r["object"]["sha"] @@ -132,7 +145,51 @@ class LibraryUpdater: gitmodules = base64.b64decode(f["content"]) break - modules = parse_submodules(gitmodules) + modules = parse_submodules(gitmodules.decode("utf-8")) + + # Parse the modules into libraries. Most libraries are individual + # repositories, but a few such as "system", "functional", and others + # contain multiple libraries + libraries = [] + for m in modules: + name = m["module"] + + if name in self.skip_modules: + self.logger.info("skipping_library", skipped_library=name) + continue + + meta = self.get_library_metadata(repo=name) + github_url = f"https://github.com/boostorg/{name}/" + if type(meta) is list: + for sublibrary in meta: + libraries.append( + { + "name": name, + "github_url": github_url, + "authors": sublibrary["authors"], + "description": sublibrary["description"], + "category": sublibrary["category"], + "authors": sublibrary["authors"], + "maintainers": sublibrary.get("maintainers", []), + "cxxstd": sublibrary.get("cxxstd"), + } + ) + + elif type(meta) is dict: + libraries.append( + { + "name": name, + "github_url": github_url, + "authors": meta["authors"], + "description": meta["description"], + "category": meta["category"], + "authors": meta["authors"], + "maintainers": meta.get("maintainers", []), + "cxxstd": meta.get("cxxstd"), + } + ) + + return libraries def get_library_metadata(self, repo): """Retrieve library metadata from 'meta/libraries.json'""" @@ -141,14 +198,14 @@ class LibraryUpdater: r = requests.get(url) return r.json() except Exception as e: - logger.exception("get_library_metadata_failed", repo=repo, url=url) + self.logger.exception("get_library_metadata_failed", repo=repo, url=url) return None def update_libraries(self): """Update all libraries and they metadata""" libs = self.get_library_list() - logger.info("update_all_libraries_metadata", library_count=len(libs)) + self.logger.info("update_all_libraries_metadata", library_count=len(libs)) for lib in libs: self.update_library(lib) @@ -162,21 +219,18 @@ class LibraryUpdater: def update_library(self, lib): """Update an individual library""" - logger = logger.bind(lib=lib) + logger = self.logger.bind(lib=lib) try: - obj, created = Library.objects_get_or_create(name=lib) - logger = logger.bind(created=created) + obj, created = Library.objects.get_or_create(name=lib["name"]) + obj.github_url = lib["github_url"] + obj.description = lib["description"] - meta = self.get_library_metadata(lib) - if meta is not None: - logger = logger.bind(meta=meta) - obj.description = meta["description"] + # Update categories + self.update_categories(obj, categories=lib["category"]) - # Update categories - self.update_categories(obj, categories=meta["category"]) - - # Save any changes - obj.save() + # Save any changes + logger = logger.bind(obj_created=created) + obj.save() logger.info("library_udpated") diff --git a/libraries/management/__init__.py b/libraries/management/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libraries/management/commands/__init__.py b/libraries/management/commands/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libraries/management/commands/update_libraries.py b/libraries/management/commands/update_libraries.py new file mode 100644 index 00000000..78f21c3f --- /dev/null +++ b/libraries/management/commands/update_libraries.py @@ -0,0 +1,9 @@ +import djclick as click + +from libraries.github import LibraryUpdater + + +@click.command() +def command(): + l = LibraryUpdater() + l.update_libraries() diff --git a/requirements.in b/requirements.in index 15f58c5d..972b1216 100755 --- a/requirements.in +++ b/requirements.in @@ -11,6 +11,7 @@ gevent gunicorn psycopg2-binary whitenoise +django-click # Logging django-tracer diff --git a/requirements.txt b/requirements.txt index e0a6d094..6c168c7a 100755 --- a/requirements.txt +++ b/requirements.txt @@ -36,6 +36,10 @@ bumpversion==0.6.0 # via -r ./requirements.in celery==5.0.5 # via -r ./requirements.in +certifi==2022.6.15 + # via requests +charset-normalizer==2.1.0 + # via requests click==7.1.2 # via # black @@ -43,6 +47,7 @@ click==7.1.2 # click-didyoumean # click-plugins # click-repl + # django-click # pip-tools click-didyoumean==0.0.3 # via celery @@ -74,6 +79,8 @@ django-bakery==0.12.7 # via -r ./requirements.in django-cache-url==3.2.3 # via environs +django-click==2.3.0 + # via -r ./requirements.in django-db-geventpool==4.0.0 # via -r ./requirements.in django-extensions==3.1.3 @@ -122,6 +129,8 @@ greenlet==1.1.0 # via gevent gunicorn==20.1.0 # via -r ./requirements.in +idna==3.3 + # via requests iniconfig==1.1.1 # via pytest ipython==8.4.0 @@ -225,6 +234,8 @@ redis==3.5.3 # django-redis regex==2021.4.4 # via black +requests==2.28.1 + # via -r ./requirements.in s3transfer==0.4.2 # via boto3 six==1.16.0 @@ -253,7 +264,9 @@ traitlets==5.2.1.post0 # ipython # matplotlib-inline urllib3==1.26.4 - # via botocore + # via + # botocore + # requests vine==5.0.0 # via # amqp