import base64 import os import re import requests import structlog from fastcore.xtras import obj2dict from ghapi.all import GhApi, paged from .models import Category, Issue, Library from .utils import parse_date logger = structlog.get_logger() def get_api(): """ Return an GH API object, using a GITHUB_TOKEN from the environment if it exists """ token = os.environ.get("GITHUB_TOKEN", None) return GhApi(token=token) def get_repo(api, owner, repo): """ Return the response from GitHub's /repos/{owner}/{repo} """ return api.repos.get(owner=owner, repo=repo) def repo_issues(owner, repo, state="all", issues_only=True): """ Get all issues for a repo. Note: The GitHub API considers both PRs and Issues to be "Issues" and does not support filtering in the request, so to exclude PRs from the list of issues, we do some manual filtering of the results Note: GhApi() returns results as AttrDict objects: https://fastcore.fast.ai/basics.html#attrdict """ api = get_api() pages = list( paged( api.issues.list_for_repo, owner=owner, repo=repo, state=state, per_page=100, ) ) # Concatenate all pages into a single list all_results = [] for page in pages: all_results.extend(page) # Filter results results = [] if issues_only: results = [result for result in all_results if not result.get("pull_request")] else: results = all_results return results def repo_prs(owner, repo, state="all"): """ Get all PRs for a repo Note: GhApi() returns results as AttrDict objects: https://fastcore.fast.ai/basics.html#attrdict """ api = get_api() pages = list( paged( api.pulls.list, owner=owner, repo=repo, state=state, per_page=100, ) ) # Concatenate all pages into a single list results = [] for p in pages: results.extend(p) return results def update_all_repos_info(): """Update all of our repos information from github""" # FIXME: Write this function logger.info("update_all_github_repos") def parse_submodules(content): """Expects the multiline contents of https://github.com/boostorg/boost/.gitmodules to be passed in""" modules = [] current_submodule = None submodule_re = re.compile(r"^\[submodule \"(.*)\"\]$") url_re = re.compile(r"^\s*url\s*\=\s*\.\.\/(.*)\.git\s*$") for line in content.split("\n"): sub_m = submodule_re.match(line) if sub_m: current_submodule = {"module": sub_m.group(1)} continue url_m = url_re.match(line) if url_m: name = url_m.group(1) current_submodule["url"] = name modules.append(current_submodule) current_submodule = None return modules class LibraryUpdater: """ This class is used to sync Libraries from the list of git submodules and their `libraries.json` file metadata. """ def __init__(self, owner="boostorg"): self.api = get_api() self.owner = owner self.logger = structlog.get_logger() # Modules we need to skip as they are not really Boost Libraries self.skip_modules = [ "inspect", "boostbook", "bcp", "build", "quickbook", "litre", "auto_index", "boostdep", "check_build", "headers", "boost_install", "docca", "cmake", "more", ] def get_ref(self, repo, ref): """Get a particular ref of a particular repo""" return self.api.git.get_ref(owner=self.owner, repo=repo, ref=ref) def get_boost_ref(self): """Retrieve the latest commit to master for boostorg/boost repo""" return self.get_ref(repo="boost", ref="heads/master") def get_library_list(self): """ Determine our list of libraries from .gitmodules and sub-repo libraries.json files """ # Find our latest .gitmodules ref = self.get_boost_ref() tree_sha = ref["object"]["sha"] # Get all the top-level elements of the main Boost repo top_level_files = self.api.git.get_tree( owner=self.owner, repo="boost", tree_sha=tree_sha ) gitmodules = None # Cycle through each top-level item for item in top_level_files["tree"]: # We're only looking for the `.gitmodules` file, so skip everything else if item["path"] != ".gitmodules": continue file_sha = item["sha"] f = self.api.git.get_blob(owner=self.owner, repo="boost", file_sha=file_sha) gitmodules = base64.b64decode(f["content"]) break # Parse the content of the .gitmodules file into a list of dicts with the info we need modules = parse_submodules(gitmodules.decode("utf-8")) # Parse the module data into libraries. Most libraries are individual # repositories, but a few such as "system", "functional", and others # contain multiple libraries libraries = [] for m in modules: name = m["module"] if name in self.skip_modules: self.logger.info("skipping_library", skipped_library=name) continue meta = self.get_library_metadata(repo=name) github_data = self.get_library_github_data(owner=self.owner, repo=name) last_github_update = parse_date(github_data.get("updated_at", "")) github_url = f"https://github.com/boostorg/{name}/" if type(meta) is list: for sublibrary in meta: libraries.append( { "name": name, "github_url": github_url, "authors": sublibrary["authors"], "description": sublibrary["description"], "category": sublibrary["category"], "authors": sublibrary["authors"], "maintainers": sublibrary.get("maintainers", []), "cxxstd": sublibrary.get("cxxstd"), "last_github_update": last_github_update, } ) elif type(meta) is dict: libraries.append( { "name": name, "github_url": github_url, "authors": meta["authors"], "description": meta["description"], "category": meta["category"], "authors": meta["authors"], "maintainers": meta.get("maintainers", []), "cxxstd": meta.get("cxxstd"), "last_github_update": last_github_update, } ) return libraries def get_library_metadata(self, repo): """ Retrieve library metadata from 'meta/libraries.json' Each Boost library will have a `meta` directory with a `libraries.json` file. Example: https://github.com/boostorg/align/blob/5ad7df63cd792fbdb801d600b93cad1a432f0151/meta/libraries.json """ url = f"https://raw.githubusercontent.com/{self.owner}/{repo}/develop/meta/libraries.json" try: response = requests.get(url) return response.json() except Exception: self.logger.exception("get_library_metadata_failed", repo=repo, url=url) return None def get_library_github_data(self, owner, repo): """ Retrieve other data about the library from the GitHub API """ response = get_repo(self.api, owner, repo) return response def update_libraries(self): """Update all libraries with the metadata""" libs = self.get_library_list() self.logger.info("update_all_libraries_metadata", library_count=len(libs)) for lib in libs: library = self.update_library(lib) github_updater = GithubUpdater(owner=self.owner, library=library) github_updater.update() def update_categories(self, obj, categories): """Update all of the categories for an object""" obj.categories.clear() for cat_name in categories: cat, created = Category.objects.get_or_create(name=cat_name) obj.categories.add(cat) def update_library(self, lib): """Update an individual library""" logger = self.logger.bind(lib=lib) try: obj, created = Library.objects.update_or_create(name=lib["name"]) obj.github_url = lib["github_url"] obj.description = lib["description"] obj.cpp_standard_minimum = lib["cxxstd"] obj.last_github_update = lib["last_github_update"] # Update categories self.update_categories(obj, categories=lib["category"]) # Save any changes logger = logger.bind(obj_created=created) obj.save() logger.info("library_udpated") return obj except Exception: logger.exception("library_update_failed") class GithubUpdater: """ We will instantiate an instance of this class for each Library. Running the `update()` method will update all Github related information we need for the site """ def __init__(self, owner="boostorg", library=None): self.owner = owner self.library = library self.logger = logger.bind(owner=owner, library=library) def update(self): self.logger.info("update_github_repo") try: self.update_issues() except Exception: self.logger.exception("update_issues_error") try: self.update_prs() except Exception: self.logger.exception("update_prs_error") def update_issues(self): """Update all issues for a library""" self.logger.info("updating_repo_issues") issues_data = repo_issues( self.owner, self.library.name, state="all", issues_only=True ) for issue_dict in issues_data: # Get the date information closed_at = None created_at = None modified_at = None if issue_dict.get("closed_at"): closed_at = parse_date(issue_dict["closed_at"]) if issue_dict.get("created_at"): created_at = parse_date(issue_dict["created_at"]) if issue_dict.get("updated_at"): modified_at = parse_date(issue_dict["updated_at"]) # Create or update the Issue object try: issue, created = Issue.objects.update_or_create( library=self.library, github_id=issue_dict["id"], defaults={ "title": issue_dict["title"][:255], "number": issue_dict["number"], "is_open": issue_dict["state"] == "open", "closed": closed_at, "created": created_at, "modified": modified_at, "data": obj2dict(issue_dict), }, ) except Exception as e: logger.exception( "update_issues_error_skipped_issue", issue_github_id=issue_dict.get("id"), exc_msg=str(e), ) logger.info( "issue_updated_successfully", issue_id=issue.id, created_issue=created, issue_github_id=issue.github_id, ) def update_prs(self): self.logger.info("updating_repo_prs") # raise ValueError("testing!")