Finished Github parsing code with exclusions

This commit is contained in:
Frank Wiles
2022-07-24 14:18:24 -05:00
parent 73e80d785c
commit ea833085e1
7 changed files with 98 additions and 21 deletions

View File

@@ -3,7 +3,7 @@ version: "3.3"
services:
db:
image: postgres:12.0 /_/postgres/12.0
image: postgres:12.0
environment:
- "POSTGRES_HOST_AUTH_METHOD=trust"
networks:

View File

@@ -6,7 +6,7 @@ import requests
import structlog
from ghapi.all import GhApi, paged
from .models import Library
from .models import Library, Category
logger = structlog.get_logger()
@@ -79,7 +79,6 @@ def parse_submodules(content):
url_re = re.compile(r"^\s*url\s*\=\s*\.\.\/(.*)\.git\s*$")
for line in content.split("\n"):
print(line)
sub_m = submodule_re.match(line)
if sub_m:
current_submodule = {"module": sub_m.group(1)}
@@ -89,7 +88,6 @@ def parse_submodules(content):
if url_m:
name = url_m.group(1)
current_submodule["url"] = name
print(f"FOUND: {current_submodule}")
modules.append(current_submodule)
current_submodule = None
@@ -105,6 +103,17 @@ class LibraryUpdater:
def __init__(self, owner="boostorg"):
self.api = get_api()
self.owner = owner
self.logger = structlog.get_logger()
# Modules we need to skip as they are not really Boost Libraries
self.skip_modules = [
"inspect",
"boostbook",
"bcp",
"build",
"quickbook",
"litre",
]
def get_ref(self, repo, ref):
"""Get a particular ref of a particular repo"""
@@ -115,7 +124,11 @@ class LibraryUpdater:
return self.get_ref(repo="boost", ref="heads/master")
def get_library_list(self):
"""Determine our list of libraries"""
"""
Determine our list of libraries from .gitmodules and sub-repo
libraries.json files
"""
# Find our latest .gitmodules
r = self.get_boost_ref()
tree_sha = r["object"]["sha"]
@@ -132,7 +145,51 @@ class LibraryUpdater:
gitmodules = base64.b64decode(f["content"])
break
modules = parse_submodules(gitmodules)
modules = parse_submodules(gitmodules.decode("utf-8"))
# Parse the modules into libraries. Most libraries are individual
# repositories, but a few such as "system", "functional", and others
# contain multiple libraries
libraries = []
for m in modules:
name = m["module"]
if name in self.skip_modules:
self.logger.info("skipping_library", skipped_library=name)
continue
meta = self.get_library_metadata(repo=name)
github_url = f"https://github.com/boostorg/{name}/"
if type(meta) is list:
for sublibrary in meta:
libraries.append(
{
"name": name,
"github_url": github_url,
"authors": sublibrary["authors"],
"description": sublibrary["description"],
"category": sublibrary["category"],
"authors": sublibrary["authors"],
"maintainers": sublibrary.get("maintainers", []),
"cxxstd": sublibrary.get("cxxstd"),
}
)
elif type(meta) is dict:
libraries.append(
{
"name": name,
"github_url": github_url,
"authors": meta["authors"],
"description": meta["description"],
"category": meta["category"],
"authors": meta["authors"],
"maintainers": meta.get("maintainers", []),
"cxxstd": meta.get("cxxstd"),
}
)
return libraries
def get_library_metadata(self, repo):
"""Retrieve library metadata from 'meta/libraries.json'"""
@@ -141,14 +198,14 @@ class LibraryUpdater:
r = requests.get(url)
return r.json()
except Exception as e:
logger.exception("get_library_metadata_failed", repo=repo, url=url)
self.logger.exception("get_library_metadata_failed", repo=repo, url=url)
return None
def update_libraries(self):
"""Update all libraries and they metadata"""
libs = self.get_library_list()
logger.info("update_all_libraries_metadata", library_count=len(libs))
self.logger.info("update_all_libraries_metadata", library_count=len(libs))
for lib in libs:
self.update_library(lib)
@@ -162,21 +219,18 @@ class LibraryUpdater:
def update_library(self, lib):
"""Update an individual library"""
logger = logger.bind(lib=lib)
logger = self.logger.bind(lib=lib)
try:
obj, created = Library.objects_get_or_create(name=lib)
logger = logger.bind(created=created)
obj, created = Library.objects.get_or_create(name=lib["name"])
obj.github_url = lib["github_url"]
obj.description = lib["description"]
meta = self.get_library_metadata(lib)
if meta is not None:
logger = logger.bind(meta=meta)
obj.description = meta["description"]
# Update categories
self.update_categories(obj, categories=lib["category"])
# Update categories
self.update_categories(obj, categories=meta["category"])
# Save any changes
obj.save()
# Save any changes
logger = logger.bind(obj_created=created)
obj.save()
logger.info("library_udpated")

View File

View File

@@ -0,0 +1,9 @@
import djclick as click
from libraries.github import LibraryUpdater
@click.command()
def command():
l = LibraryUpdater()
l.update_libraries()

View File

@@ -11,6 +11,7 @@ gevent
gunicorn
psycopg2-binary
whitenoise
django-click
# Logging
django-tracer

View File

@@ -36,6 +36,10 @@ bumpversion==0.6.0
# via -r ./requirements.in
celery==5.0.5
# via -r ./requirements.in
certifi==2022.6.15
# via requests
charset-normalizer==2.1.0
# via requests
click==7.1.2
# via
# black
@@ -43,6 +47,7 @@ click==7.1.2
# click-didyoumean
# click-plugins
# click-repl
# django-click
# pip-tools
click-didyoumean==0.0.3
# via celery
@@ -74,6 +79,8 @@ django-bakery==0.12.7
# via -r ./requirements.in
django-cache-url==3.2.3
# via environs
django-click==2.3.0
# via -r ./requirements.in
django-db-geventpool==4.0.0
# via -r ./requirements.in
django-extensions==3.1.3
@@ -122,6 +129,8 @@ greenlet==1.1.0
# via gevent
gunicorn==20.1.0
# via -r ./requirements.in
idna==3.3
# via requests
iniconfig==1.1.1
# via pytest
ipython==8.4.0
@@ -225,6 +234,8 @@ redis==3.5.3
# django-redis
regex==2021.4.4
# via black
requests==2.28.1
# via -r ./requirements.in
s3transfer==0.4.2
# via boto3
six==1.16.0
@@ -253,7 +264,9 @@ traitlets==5.2.1.post0
# ipython
# matplotlib-inline
urllib3==1.26.4
# via botocore
# via
# botocore
# requests
vine==5.0.0
# via
# amqp