Merge pull request #72 from revsys/64-load-issues

Import issues from GitHub for all libraries
This commit is contained in:
Lacey Williams Henschel
2022-12-16 09:52:50 -08:00
committed by GitHub
9 changed files with 279 additions and 34 deletions

View File

@@ -25,3 +25,5 @@ MIGRATION_MODULES = DisableMigrations()
# User a faster password hasher
PASSWORD_HASHERS = ["django.contrib.auth.hashers.MD5PasswordHasher"]
GITHUB_TOKEN = "changeme"

View File

@@ -1,13 +1,14 @@
import base64
import os
import re
import requests
import structlog
from dateutil.parser import ParserError, parse
from fastcore.xtras import obj2dict
from ghapi.all import GhApi, paged
from .models import Category, Library
from .models import Category, Issue, Library
from .utils import parse_date
logger = structlog.get_logger()
@@ -35,6 +36,9 @@ def repo_issues(owner, repo, state="all", issues_only=True):
Note: The GitHub API considers both PRs and Issues to be "Issues" and does not
support filtering in the request, so to exclude PRs from the list of issues, we
do some manual filtering of the results
Note: GhApi() returns results as AttrDict objects:
https://fastcore.fast.ai/basics.html#attrdict
"""
api = get_api()
pages = list(
@@ -62,7 +66,12 @@ def repo_issues(owner, repo, state="all", issues_only=True):
def repo_prs(owner, repo, state="all"):
"""Get all PRs for a repo"""
"""
Get all PRs for a repo
Note: GhApi() returns results as AttrDict objects:
https://fastcore.fast.ai/basics.html#attrdict
"""
api = get_api()
pages = list(
paged(
@@ -190,11 +199,7 @@ class LibraryUpdater:
meta = self.get_library_metadata(repo=name)
github_data = self.get_library_github_data(owner=self.owner, repo=name)
try:
last_github_update = parse(github_data.get("updated_at"))
except ParserError:
last_github_update = None
last_github_update = parse_date(github_data.get("updated_at", ""))
github_url = f"https://github.com/boostorg/{name}/"
if type(meta) is list:
@@ -260,7 +265,9 @@ class LibraryUpdater:
self.logger.info("update_all_libraries_metadata", library_count=len(libs))
for lib in libs:
self.update_library(lib)
library = self.update_library(lib)
github_updater = GithubUpdater(owner=self.owner, library=library)
github_updater.update()
def update_categories(self, obj, categories):
"""Update all of the categories for an object"""
@@ -289,6 +296,8 @@ class LibraryUpdater:
logger.info("library_udpated")
return obj
except Exception:
logger.exception("library_update_failed")
@@ -300,13 +309,12 @@ class GithubUpdater:
for the site
"""
def __init__(self, owner, repo):
def __init__(self, owner="boostorg", library=None):
self.owner = owner
self.repo = repo
self.logger = logger.bind(owner=owner, repo=repo)
self.library = library
self.logger = logger.bind(owner=owner, library=library)
def update(self):
# FIXME: Write a test
self.logger.info("update_github_repo")
try:
@@ -320,9 +328,57 @@ class GithubUpdater:
self.logger.exception("update_prs_error")
def update_issues(self):
"""Update all issues for a library"""
self.logger.info("updating_repo_issues")
issues = repo_issues(self.owner, self.repo, state="all")
issues_data = repo_issues(
self.owner, self.library.name, state="all", issues_only=True
)
for issue_dict in issues_data:
# Get the date information
closed_at = None
created_at = None
modified_at = None
if issue_dict.get("closed_at"):
closed_at = parse_date(issue_dict["closed_at"])
if issue_dict.get("created_at"):
created_at = parse_date(issue_dict["created_at"])
if issue_dict.get("updated_at"):
modified_at = parse_date(issue_dict["updated_at"])
# Create or update the Issue object
try:
issue, created = Issue.objects.update_or_create(
library=self.library,
github_id=issue_dict["id"],
defaults={
"title": issue_dict["title"][:255],
"number": issue_dict["number"],
"is_open": issue_dict["state"] == "open",
"closed": closed_at,
"created": created_at,
"modified": modified_at,
"data": obj2dict(issue_dict),
},
)
except Exception as e:
logger.exception(
"update_issues_error_skipped_issue",
issue_github_id=issue_dict.get("id"),
exc_msg=str(e),
)
logger.info(
"issue_updated_successfully",
issue_id=issue.id,
created_issue=created,
issue_github_id=issue.github_id,
)
def update_prs(self):
self.logger.info("updating_repo_prs")
raise ValueError("testing!")
# raise ValueError("testing!")

View File

@@ -5,5 +5,11 @@ from libraries.github import LibraryUpdater
@click.command()
def command():
l = LibraryUpdater()
l.update_libraries()
"""
Calls the LibraryUpdater, which retrieves the active boost libraries
from the Boost repo and updates the models in our database with the latest
information on that library (repo) and its issues, pull requests, and related
objects from GitHub.
"""
updater = LibraryUpdater()
updater.update_libraries()

View File

@@ -1,4 +1,5 @@
import pytest
from fastcore.xtras import dict2obj
from model_bakery import baker
@@ -60,6 +61,46 @@ def github_api_get_repo_response(db):
return {"updated_at": "2022-09-14T22:20:38Z"}
@pytest.fixture
def github_api_repo_issues_response(db):
"""Returns the response from GhApi().issues.list_for_repo, already paged"""
return [
dict2obj(
{
"title": "Issue Number One",
"number": 1,
"state": "closed",
"closed_at": "2022-04-11T12:38:24Z",
"created_at": "2022-04-11T11:41:02Z",
"updated_at": "2022-04-11T12:38:25Z",
"id": 5898798798,
}
),
dict2obj(
{
"title": "Issue Number Two",
"number": 2,
"state": "open",
"closed_at": "2022-04-11T12:38:24Z",
"created_at": "2022-04-11T11:41:02Z",
"updated_at": "2022-04-11T12:38:25Z",
"id": 7395968281,
}
),
dict2obj(
{
"title": "Issue Number Three",
"number": 3,
"state": "closed",
"closed_at": "2022-04-11T12:38:24Z",
"created_at": "2022-04-11T11:41:02Z",
"updated_at": "2022-04-11T12:38:25Z",
"id": 7492027464,
}
),
]
@pytest.fixture
def boost_module():
return {"module": "rational", "url": "rational"}

View File

@@ -1,10 +1,13 @@
from unittest.mock import patch
import pytest
import responses
from dateutil.parser import parse
from ghapi.all import GhApi
from model_bakery import baker
from libraries.github import LibraryUpdater, get_api
from libraries.models import Library
from libraries.github import GithubUpdater, LibraryUpdater, get_api
from libraries.models import Issue, Library
def test_get_api():
@@ -12,6 +15,97 @@ def test_get_api():
assert isinstance(result, GhApi)
# GithubUpdater tests
@pytest.mark.skip(reason="Method not yet written")
def test_update_prs():
pass
def test_update_issues_new(tp, library, github_api_repo_issues_response):
"""GithubUpdater.update_issues()"""
new_issues_count = len(github_api_repo_issues_response)
expected_count = Issue.objects.count() + new_issues_count
with patch("libraries.github.repo_issues") as repo_issues_mock:
updater = GithubUpdater(library=library)
repo_issues_mock.return_value = github_api_repo_issues_response
updater.update_issues()
assert Issue.objects.count() == expected_count
ids = [issue.id for issue in github_api_repo_issues_response]
issues = Issue.objects.filter(library=library, github_id__in=ids)
assert issues.exists()
assert issues.count() == expected_count
# Test the values of a sample Issue
gh_issue = github_api_repo_issues_response[0]
issue = issues.get(github_id=gh_issue.id)
assert issue.title == gh_issue.title
assert issue.number == gh_issue.number
if gh_issue.state == "open":
assert issue.is_open
else:
assert not issue.is_open
assert issue.data == gh_issue
expected_closed = parse(gh_issue["closed_at"])
expected_created = parse(gh_issue["created_at"])
expected_modified = parse(gh_issue["updated_at"])
assert issue.closed == expected_closed
assert issue.created == expected_created
assert issue.modified == expected_modified
def test_update_issues_existing(tp, library, github_api_repo_issues_response):
"""GithubUpdater.update_issues()"""
existing_issue_data = github_api_repo_issues_response[0]
old_title = "Old title"
issue = baker.make(
Issue, library=library, github_id=existing_issue_data.id, title=old_title
)
# Make sure we are expected one fewer new issue, since we created one in advance
new_issues_count = len(github_api_repo_issues_response)
expected_count = Issue.objects.count() + new_issues_count - 1
with patch("libraries.github.repo_issues") as repo_issues_mock:
updater = GithubUpdater(library=library)
repo_issues_mock.return_value = github_api_repo_issues_response
updater.update_issues()
assert Issue.objects.count() == expected_count
ids = [issue.id for issue in github_api_repo_issues_response]
issues = Issue.objects.filter(library=library, github_id__in=ids)
assert issues.exists()
assert issues.count() == expected_count
# Test that the existing issue updated
issue.refresh_from_db()
assert issue.title == existing_issue_data.title
def test_update_issues_long_title(tp, library, github_api_repo_issues_response):
"""GithubUpdater.update_issues()"""
new_issues_count = len(github_api_repo_issues_response)
expected_count = Issue.objects.count() + new_issues_count
title = "sample" * 100
assert len(title) > 255
expected_title = title[:255]
assert len(expected_title) <= 255
with patch("libraries.github.repo_issues") as repo_issues_mock:
updater = GithubUpdater(library=library)
# Make an extra-long title so we can confirm that it saves
github_id = github_api_repo_issues_response[0]["id"]
github_api_repo_issues_response[0]["title"] = "sample" * 100
repo_issues_mock.return_value = github_api_repo_issues_response
updater.update_issues()
assert Issue.objects.count() == expected_count
assert Issue.objects.filter(library=library, github_id=github_id).exists()
issue = Issue.objects.get(library=library, github_id=github_id)
assert issue.title == expected_title
# LibraryUpdater tests

View File

@@ -0,0 +1,22 @@
from datetime import datetime
from libraries.utils import parse_date
def test_parse_date_iso():
expected = datetime.now()
result = parse_date(expected.isoformat())
assert expected == result
def test_parse_date_str():
expected = datetime.now()
input_date = f"{expected.month}-{expected.day}-{expected.year}"
result = parse_date(input_date)
assert expected.date() == result.date()
def test_parse_date_str_none():
expected = None
result = parse_date("")
assert expected == result

View File

@@ -1,15 +1,32 @@
from unittest.mock import patch
from model_bakery import baker
def test_library_list(library, tp):
"""GET /libraries/"""
res = tp.get("libraries")
tp.response_200(res)
def test_library_detail(library, tp):
"""GET /libraries/{repo}/"""
url = tp.reverse("library-detail", library.slug)
response = tp.get(url)
tp.response_200(response)
with patch("libraries.views.LibraryDetail.get_open_issues_count") as count_mock:
count_mock.return_value = 21
res = tp.get(url)
tp.response_200(res)
def test_library_detail_issues_context(tp, library):
"""
GET /libraries/{repo}/
Test that the custom context vars appear as expected
"""
# Create open and closed issues for this library, and another random issue
lib2 = baker.make("libraries.Library", slug="sample")
baker.make("libraries.Issue", library=library, is_open=True)
baker.make("libraries.Issue", library=library, is_open=False)
baker.make("libraries.Issue", library=lib2, is_open=True)
url = tp.reverse("library-detail", library.slug)
response = tp.get(url)
tp.response_200(response)
assert "open_issues_count" in response.context
# Verify that the count only includes the one open issue for this library
assert response.context["open_issues_count"] == 1

14
libraries/utils.py Normal file
View File

@@ -0,0 +1,14 @@
import structlog
from dateutil.parser import ParserError, parse
logger = structlog.get_logger()
def parse_date(date_str):
"""Parses a date string to a datetime. Does not return an error."""
try:
return parse(date_str)
except ParserError:
logger.info("parse_date_invalid_date", date_str=date_str)
return None

View File

@@ -1,7 +1,6 @@
from django.views.generic import DetailView, ListView
from .github import repo_issues
from .models import Category, Library
from .models import Category, Issue, Library
class CategoryMixin:
@@ -64,10 +63,4 @@ class LibraryDetail(CategoryMixin, DetailView):
return self.render_to_response(context)
def get_open_issues_count(self, obj):
try:
issues = repo_issues(
obj.github_owner, obj.github_repo, state="open", issues_only=True
)
return len(issues)
except Exception:
return 0
return Issue.objects.filter(library=obj, is_open=True).count()