mirror of
https://github.com/boostorg/website-v2.git
synced 2026-01-19 04:42:17 +00:00
Finished Github parsing code with exclusions
This commit is contained in:
@@ -3,7 +3,7 @@ version: "3.3"
|
||||
services:
|
||||
|
||||
db:
|
||||
image: postgres:12.0 /_/postgres/12.0
|
||||
image: postgres:12.0
|
||||
environment:
|
||||
- "POSTGRES_HOST_AUTH_METHOD=trust"
|
||||
networks:
|
||||
|
||||
@@ -6,7 +6,7 @@ import requests
|
||||
import structlog
|
||||
|
||||
from ghapi.all import GhApi, paged
|
||||
from .models import Library
|
||||
from .models import Library, Category
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
@@ -79,7 +79,6 @@ def parse_submodules(content):
|
||||
url_re = re.compile(r"^\s*url\s*\=\s*\.\.\/(.*)\.git\s*$")
|
||||
|
||||
for line in content.split("\n"):
|
||||
print(line)
|
||||
sub_m = submodule_re.match(line)
|
||||
if sub_m:
|
||||
current_submodule = {"module": sub_m.group(1)}
|
||||
@@ -89,7 +88,6 @@ def parse_submodules(content):
|
||||
if url_m:
|
||||
name = url_m.group(1)
|
||||
current_submodule["url"] = name
|
||||
print(f"FOUND: {current_submodule}")
|
||||
modules.append(current_submodule)
|
||||
current_submodule = None
|
||||
|
||||
@@ -105,6 +103,17 @@ class LibraryUpdater:
|
||||
def __init__(self, owner="boostorg"):
|
||||
self.api = get_api()
|
||||
self.owner = owner
|
||||
self.logger = structlog.get_logger()
|
||||
|
||||
# Modules we need to skip as they are not really Boost Libraries
|
||||
self.skip_modules = [
|
||||
"inspect",
|
||||
"boostbook",
|
||||
"bcp",
|
||||
"build",
|
||||
"quickbook",
|
||||
"litre",
|
||||
]
|
||||
|
||||
def get_ref(self, repo, ref):
|
||||
"""Get a particular ref of a particular repo"""
|
||||
@@ -115,7 +124,11 @@ class LibraryUpdater:
|
||||
return self.get_ref(repo="boost", ref="heads/master")
|
||||
|
||||
def get_library_list(self):
|
||||
"""Determine our list of libraries"""
|
||||
"""
|
||||
Determine our list of libraries from .gitmodules and sub-repo
|
||||
libraries.json files
|
||||
"""
|
||||
# Find our latest .gitmodules
|
||||
r = self.get_boost_ref()
|
||||
tree_sha = r["object"]["sha"]
|
||||
|
||||
@@ -132,7 +145,51 @@ class LibraryUpdater:
|
||||
gitmodules = base64.b64decode(f["content"])
|
||||
break
|
||||
|
||||
modules = parse_submodules(gitmodules)
|
||||
modules = parse_submodules(gitmodules.decode("utf-8"))
|
||||
|
||||
# Parse the modules into libraries. Most libraries are individual
|
||||
# repositories, but a few such as "system", "functional", and others
|
||||
# contain multiple libraries
|
||||
libraries = []
|
||||
for m in modules:
|
||||
name = m["module"]
|
||||
|
||||
if name in self.skip_modules:
|
||||
self.logger.info("skipping_library", skipped_library=name)
|
||||
continue
|
||||
|
||||
meta = self.get_library_metadata(repo=name)
|
||||
github_url = f"https://github.com/boostorg/{name}/"
|
||||
if type(meta) is list:
|
||||
for sublibrary in meta:
|
||||
libraries.append(
|
||||
{
|
||||
"name": name,
|
||||
"github_url": github_url,
|
||||
"authors": sublibrary["authors"],
|
||||
"description": sublibrary["description"],
|
||||
"category": sublibrary["category"],
|
||||
"authors": sublibrary["authors"],
|
||||
"maintainers": sublibrary.get("maintainers", []),
|
||||
"cxxstd": sublibrary.get("cxxstd"),
|
||||
}
|
||||
)
|
||||
|
||||
elif type(meta) is dict:
|
||||
libraries.append(
|
||||
{
|
||||
"name": name,
|
||||
"github_url": github_url,
|
||||
"authors": meta["authors"],
|
||||
"description": meta["description"],
|
||||
"category": meta["category"],
|
||||
"authors": meta["authors"],
|
||||
"maintainers": meta.get("maintainers", []),
|
||||
"cxxstd": meta.get("cxxstd"),
|
||||
}
|
||||
)
|
||||
|
||||
return libraries
|
||||
|
||||
def get_library_metadata(self, repo):
|
||||
"""Retrieve library metadata from 'meta/libraries.json'"""
|
||||
@@ -141,14 +198,14 @@ class LibraryUpdater:
|
||||
r = requests.get(url)
|
||||
return r.json()
|
||||
except Exception as e:
|
||||
logger.exception("get_library_metadata_failed", repo=repo, url=url)
|
||||
self.logger.exception("get_library_metadata_failed", repo=repo, url=url)
|
||||
return None
|
||||
|
||||
def update_libraries(self):
|
||||
"""Update all libraries and they metadata"""
|
||||
libs = self.get_library_list()
|
||||
|
||||
logger.info("update_all_libraries_metadata", library_count=len(libs))
|
||||
self.logger.info("update_all_libraries_metadata", library_count=len(libs))
|
||||
|
||||
for lib in libs:
|
||||
self.update_library(lib)
|
||||
@@ -162,21 +219,18 @@ class LibraryUpdater:
|
||||
|
||||
def update_library(self, lib):
|
||||
"""Update an individual library"""
|
||||
logger = logger.bind(lib=lib)
|
||||
logger = self.logger.bind(lib=lib)
|
||||
try:
|
||||
obj, created = Library.objects_get_or_create(name=lib)
|
||||
logger = logger.bind(created=created)
|
||||
obj, created = Library.objects.get_or_create(name=lib["name"])
|
||||
obj.github_url = lib["github_url"]
|
||||
obj.description = lib["description"]
|
||||
|
||||
meta = self.get_library_metadata(lib)
|
||||
if meta is not None:
|
||||
logger = logger.bind(meta=meta)
|
||||
obj.description = meta["description"]
|
||||
# Update categories
|
||||
self.update_categories(obj, categories=lib["category"])
|
||||
|
||||
# Update categories
|
||||
self.update_categories(obj, categories=meta["category"])
|
||||
|
||||
# Save any changes
|
||||
obj.save()
|
||||
# Save any changes
|
||||
logger = logger.bind(obj_created=created)
|
||||
obj.save()
|
||||
|
||||
logger.info("library_udpated")
|
||||
|
||||
|
||||
0
libraries/management/__init__.py
Normal file
0
libraries/management/__init__.py
Normal file
0
libraries/management/commands/__init__.py
Normal file
0
libraries/management/commands/__init__.py
Normal file
9
libraries/management/commands/update_libraries.py
Normal file
9
libraries/management/commands/update_libraries.py
Normal file
@@ -0,0 +1,9 @@
|
||||
import djclick as click
|
||||
|
||||
from libraries.github import LibraryUpdater
|
||||
|
||||
|
||||
@click.command()
|
||||
def command():
|
||||
l = LibraryUpdater()
|
||||
l.update_libraries()
|
||||
@@ -11,6 +11,7 @@ gevent
|
||||
gunicorn
|
||||
psycopg2-binary
|
||||
whitenoise
|
||||
django-click
|
||||
|
||||
# Logging
|
||||
django-tracer
|
||||
|
||||
@@ -36,6 +36,10 @@ bumpversion==0.6.0
|
||||
# via -r ./requirements.in
|
||||
celery==5.0.5
|
||||
# via -r ./requirements.in
|
||||
certifi==2022.6.15
|
||||
# via requests
|
||||
charset-normalizer==2.1.0
|
||||
# via requests
|
||||
click==7.1.2
|
||||
# via
|
||||
# black
|
||||
@@ -43,6 +47,7 @@ click==7.1.2
|
||||
# click-didyoumean
|
||||
# click-plugins
|
||||
# click-repl
|
||||
# django-click
|
||||
# pip-tools
|
||||
click-didyoumean==0.0.3
|
||||
# via celery
|
||||
@@ -74,6 +79,8 @@ django-bakery==0.12.7
|
||||
# via -r ./requirements.in
|
||||
django-cache-url==3.2.3
|
||||
# via environs
|
||||
django-click==2.3.0
|
||||
# via -r ./requirements.in
|
||||
django-db-geventpool==4.0.0
|
||||
# via -r ./requirements.in
|
||||
django-extensions==3.1.3
|
||||
@@ -122,6 +129,8 @@ greenlet==1.1.0
|
||||
# via gevent
|
||||
gunicorn==20.1.0
|
||||
# via -r ./requirements.in
|
||||
idna==3.3
|
||||
# via requests
|
||||
iniconfig==1.1.1
|
||||
# via pytest
|
||||
ipython==8.4.0
|
||||
@@ -225,6 +234,8 @@ redis==3.5.3
|
||||
# django-redis
|
||||
regex==2021.4.4
|
||||
# via black
|
||||
requests==2.28.1
|
||||
# via -r ./requirements.in
|
||||
s3transfer==0.4.2
|
||||
# via boto3
|
||||
six==1.16.0
|
||||
@@ -253,7 +264,9 @@ traitlets==5.2.1.post0
|
||||
# ipython
|
||||
# matplotlib-inline
|
||||
urllib3==1.26.4
|
||||
# via botocore
|
||||
# via
|
||||
# botocore
|
||||
# requests
|
||||
vine==5.0.0
|
||||
# via
|
||||
# amqp
|
||||
|
||||
Reference in New Issue
Block a user