From e45745f693fae852ae8dc237b105db838f59d8f4 Mon Sep 17 00:00:00 2001 From: Lacey Williams Henschel Date: Thu, 15 Dec 2022 10:03:18 -0800 Subject: [PATCH 01/10] :snowflake: Make sure we're not hitting the GH API in tests --- config/test_settings.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/config/test_settings.py b/config/test_settings.py index 73b3419e..1dfdcf2c 100644 --- a/config/test_settings.py +++ b/config/test_settings.py @@ -25,3 +25,5 @@ MIGRATION_MODULES = DisableMigrations() # User a faster password hasher PASSWORD_HASHERS = ["django.contrib.auth.hashers.MD5PasswordHasher"] + +GITHUB_TOKEN = "changeme" From 7f7ea7fdc425488b35924bb2ea2e0354aa25c29a Mon Sep 17 00:00:00 2001 From: Lacey Williams Henschel Date: Thu, 15 Dec 2022 10:03:46 -0800 Subject: [PATCH 02/10] :sparkles: Util function to parse date --- libraries/tests/test_utils.py | 22 ++++++++++++++++++++++ libraries/utils.py | 14 ++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 libraries/tests/test_utils.py create mode 100644 libraries/utils.py diff --git a/libraries/tests/test_utils.py b/libraries/tests/test_utils.py new file mode 100644 index 00000000..3f8b91de --- /dev/null +++ b/libraries/tests/test_utils.py @@ -0,0 +1,22 @@ +from datetime import datetime + +from libraries.utils import parse_date + + +def test_parse_date_iso(): + expected = datetime.now() + result = parse_date(expected.isoformat()) + assert expected == result + + +def test_parse_date_str(): + expected = datetime.now() + input_date = f"{expected.month}-{expected.day}-{expected.year}" + result = parse_date(input_date) + assert expected.date() == result.date() + + +def test_parse_date_str_none(): + expected = None + result = parse_date("") + assert expected == result diff --git a/libraries/utils.py b/libraries/utils.py new file mode 100644 index 00000000..6ba1b1ef --- /dev/null +++ b/libraries/utils.py @@ -0,0 +1,14 @@ +import structlog + +from dateutil.parser import ParserError, parse + +logger = structlog.get_logger() + + +def parse_date(date_str): + """Parses a date string to a datetime. Does not return an error.""" + try: + return parse(date_str) + except ParserError: + logger.info("parse_date_invalid_date", date_str=date_str) + return None From eed160eb68d33a44b94c5b452aa9f5da478dfb81 Mon Sep 17 00:00:00 2001 From: Lacey Williams Henschel Date: Thu, 15 Dec 2022 10:48:38 -0800 Subject: [PATCH 03/10] :sparkles: Download all issues for a library from GH --- libraries/github.py | 86 +++++++++++++++---- .../management/commands/update_libraries.py | 4 +- libraries/tests/fixtures.py | 41 +++++++++ libraries/tests/test_github.py | 75 +++++++++++++++- 4 files changed, 186 insertions(+), 20 deletions(-) diff --git a/libraries/github.py b/libraries/github.py index 304aa525..f96f3b32 100644 --- a/libraries/github.py +++ b/libraries/github.py @@ -1,13 +1,14 @@ import base64 import os import re - import requests import structlog -from dateutil.parser import ParserError, parse + +from fastcore.xtras import obj2dict from ghapi.all import GhApi, paged -from .models import Category, Library +from .models import Category, Issue, Library +from .utils import parse_date logger = structlog.get_logger() @@ -35,6 +36,9 @@ def repo_issues(owner, repo, state="all", issues_only=True): Note: The GitHub API considers both PRs and Issues to be "Issues" and does not support filtering in the request, so to exclude PRs from the list of issues, we do some manual filtering of the results + + Note: GhApi() returns results as AttrDict objects: + https://fastcore.fast.ai/basics.html#attrdict """ api = get_api() pages = list( @@ -62,7 +66,12 @@ def repo_issues(owner, repo, state="all", issues_only=True): def repo_prs(owner, repo, state="all"): - """Get all PRs for a repo""" + """ + Get all PRs for a repo + + Note: GhApi() returns results as AttrDict objects: + https://fastcore.fast.ai/basics.html#attrdict + """ api = get_api() pages = list( paged( @@ -190,11 +199,7 @@ class LibraryUpdater: meta = self.get_library_metadata(repo=name) github_data = self.get_library_github_data(owner=self.owner, repo=name) - - try: - last_github_update = parse(github_data.get("updated_at")) - except ParserError: - last_github_update = None + last_github_update = parse_date(github_data.get("updated_at", "")) github_url = f"https://github.com/boostorg/{name}/" if type(meta) is list: @@ -260,7 +265,10 @@ class LibraryUpdater: self.logger.info("update_all_libraries_metadata", library_count=len(libs)) for lib in libs: - self.update_library(lib) + library = self.update_library(lib) + # TODO: Test the GH Updater here + github_updater = GithubUpdater(owner=self.owner, library=library) + github_updater.update() def update_categories(self, obj, categories): """Update all of the categories for an object""" @@ -289,6 +297,8 @@ class LibraryUpdater: logger.info("library_udpated") + return obj + except Exception: logger.exception("library_update_failed") @@ -300,13 +310,12 @@ class GithubUpdater: for the site """ - def __init__(self, owner, repo): + def __init__(self, owner="boostorg", library=None): self.owner = owner - self.repo = repo - self.logger = logger.bind(owner=owner, repo=repo) + self.library = library + self.logger = logger.bind(owner=owner, library=library) def update(self): - # FIXME: Write a test self.logger.info("update_github_repo") try: @@ -320,9 +329,54 @@ class GithubUpdater: self.logger.exception("update_prs_error") def update_issues(self): + """Update all issues for a library""" self.logger.info("updating_repo_issues") - issues = repo_issues(self.owner, self.repo, state="all") + + issues_data = repo_issues( + self.owner, self.library.name, state="all", issues_only=True + ) + + for issue_dict in issues_data: + + # Get the date information + closed_at = None + created_at = None + modified_at = None + + if issue_dict.get("closed_at"): + closed_at = parse_date(issue_dict["closed_at"]) + + if issue_dict.get("created_at"): + created_at = parse_date(issue_dict["created_at"]) + + if issue_dict.get("updated_at"): + modified_at = parse_date(issue_dict["updated_at"]) + + # Create or update the Issue object + try: + issue, created = Issue.objects.update_or_create( + library=self.library, + github_id=issue_dict["id"], + defaults={ + "title": issue_dict["title"], + "number": issue_dict["number"], + "is_open": issue_dict["state"] == "open", + "closed": closed_at, + "created": created_at, + "modified": modified_at, + "data": obj2dict(issue_dict), + }, + ) + except Exception: + breakpoint() + continue + logger.info( + "issue_updated_successfully", + issue_id=issue.id, + created_issue=created, + issue_github_id=issue.github_id, + ) def update_prs(self): self.logger.info("updating_repo_prs") - raise ValueError("testing!") + # raise ValueError("testing!") diff --git a/libraries/management/commands/update_libraries.py b/libraries/management/commands/update_libraries.py index 78f21c3f..d460b840 100644 --- a/libraries/management/commands/update_libraries.py +++ b/libraries/management/commands/update_libraries.py @@ -5,5 +5,5 @@ from libraries.github import LibraryUpdater @click.command() def command(): - l = LibraryUpdater() - l.update_libraries() + updater = LibraryUpdater() + updater.update_libraries() diff --git a/libraries/tests/fixtures.py b/libraries/tests/fixtures.py index b6661541..15724074 100644 --- a/libraries/tests/fixtures.py +++ b/libraries/tests/fixtures.py @@ -1,4 +1,5 @@ import pytest +from fastcore.xtras import dict2obj from model_bakery import baker @@ -60,6 +61,46 @@ def github_api_get_repo_response(db): return {"updated_at": "2022-09-14T22:20:38Z"} +@pytest.fixture +def github_api_repo_issues_response(db): + """Returns the response from GhApi().issues.list_for_repo, already paged""" + return [ + dict2obj( + { + "title": "Issue Number One", + "number": 1, + "state": "closed", + "closed_at": "2022-04-11T12:38:24Z", + "created_at": "2022-04-11T11:41:02Z", + "updated_at": "2022-04-11T12:38:25Z", + "id": 5898798798, + } + ), + dict2obj( + { + "title": "Issue Number Two", + "number": 2, + "state": "open", + "closed_at": "2022-04-11T12:38:24Z", + "created_at": "2022-04-11T11:41:02Z", + "updated_at": "2022-04-11T12:38:25Z", + "id": 7395968281, + } + ), + dict2obj( + { + "title": "Issue Number Three", + "number": 3, + "state": "closed", + "closed_at": "2022-04-11T12:38:24Z", + "created_at": "2022-04-11T11:41:02Z", + "updated_at": "2022-04-11T12:38:25Z", + "id": 7492027464, + } + ), + ] + + @pytest.fixture def boost_module(): return {"module": "rational", "url": "rational"} diff --git a/libraries/tests/test_github.py b/libraries/tests/test_github.py index 95be3a08..2fdf788d 100644 --- a/libraries/tests/test_github.py +++ b/libraries/tests/test_github.py @@ -1,10 +1,13 @@ from unittest.mock import patch +import pytest import responses +from dateutil.parser import parse from ghapi.all import GhApi +from model_bakery import baker -from libraries.github import LibraryUpdater, get_api -from libraries.models import Library +from libraries.github import GithubUpdater, LibraryUpdater, get_api +from libraries.models import Issue, Library def test_get_api(): @@ -12,6 +15,74 @@ def test_get_api(): assert isinstance(result, GhApi) +# GithubUpdater tests +@pytest.mark.skip(reason="Method not yet written") +def test_update_prs(): + pass + + +def test_update_issues_new(tp, library, github_api_repo_issues_response): + """GithubUpdater.update_issues()""" + new_issues_count = len(github_api_repo_issues_response) + expected_count = Issue.objects.count() + new_issues_count + with patch("libraries.github.repo_issues") as repo_issues_mock: + updater = GithubUpdater(library=library) + repo_issues_mock.return_value = github_api_repo_issues_response + updater.update_issues() + + assert Issue.objects.count() == expected_count + ids = [issue.id for issue in github_api_repo_issues_response] + issues = Issue.objects.filter(library=library, github_id__in=ids) + assert issues.exists() + assert issues.count() == expected_count + + # Test the values of a sample Issue + gh_issue = github_api_repo_issues_response[0] + issue = issues.get(github_id=gh_issue.id) + assert issue.title == gh_issue.title + assert issue.number == gh_issue.number + if gh_issue.state == "open": + assert issue.is_open + else: + assert not issue.is_open + assert issue.data == gh_issue + + expected_closed = parse(gh_issue["closed_at"]) + expected_created = parse(gh_issue["created_at"]) + expected_modified = parse(gh_issue["updated_at"]) + assert issue.closed == expected_closed + assert issue.created == expected_created + assert issue.modified == expected_modified + + +def test_update_issues_existing(tp, library, github_api_repo_issues_response): + """GithubUpdater.update_issues()""" + existing_issue_data = github_api_repo_issues_response[0] + old_title = "Old title" + issue = baker.make( + Issue, library=library, github_id=existing_issue_data.id, title=old_title + ) + + # Make sure we are expected one fewer new issue, since we created one in advance + new_issues_count = len(github_api_repo_issues_response) + expected_count = Issue.objects.count() + new_issues_count - 1 + + with patch("libraries.github.repo_issues") as repo_issues_mock: + updater = GithubUpdater(library=library) + repo_issues_mock.return_value = github_api_repo_issues_response + updater.update_issues() + + assert Issue.objects.count() == expected_count + ids = [issue.id for issue in github_api_repo_issues_response] + issues = Issue.objects.filter(library=library, github_id__in=ids) + assert issues.exists() + assert issues.count() == expected_count + + # Test that the existing issue updated + issue.refresh_from_db() + assert issue.title == existing_issue_data.title + + # LibraryUpdater tests From cf06cd034d1a7e00b5ef5e1239eaf7ad53d31544 Mon Sep 17 00:00:00 2001 From: Lacey Williams Henschel Date: Thu, 15 Dec 2022 15:32:20 -0800 Subject: [PATCH 04/10] :pencil: Retrieve open issues from DB and not from GH API --- libraries/tests/test_views.py | 27 ++++++++++++++++++++++----- libraries/views.py | 11 ++--------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/libraries/tests/test_views.py b/libraries/tests/test_views.py index 19b0a966..735327f0 100644 --- a/libraries/tests/test_views.py +++ b/libraries/tests/test_views.py @@ -1,15 +1,32 @@ -from unittest.mock import patch +from model_bakery import baker def test_library_list(library, tp): + """GET /libraries/""" res = tp.get("libraries") tp.response_200(res) def test_library_detail(library, tp): + """GET /libraries/{repo}/""" url = tp.reverse("library-detail", library.slug) + response = tp.get(url) + tp.response_200(response) - with patch("libraries.views.LibraryDetail.get_open_issues_count") as count_mock: - count_mock.return_value = 21 - res = tp.get(url) - tp.response_200(res) + +def test_library_detail_issues_context(tp, library): + """ + GET /libraries/{repo}/ + Test that the custom context vars appear as expected + """ + # Create open and closed issues for this library, and another random issue + lib2 = baker.make("libraries.Library", slug="sample") + baker.make("libraries.Issue", library=library, is_open=True) + baker.make("libraries.Issue", library=library, is_open=False) + baker.make("libraries.Issue", library=lib2, is_open=True) + url = tp.reverse("library-detail", library.slug) + response = tp.get(url) + tp.response_200(response) + assert "open_issues_count" in response.context + # Verify that the count only includes the one open issue for this library + assert response.context["open_issues_count"] == 1 diff --git a/libraries/views.py b/libraries/views.py index bfa82fea..d4c867eb 100644 --- a/libraries/views.py +++ b/libraries/views.py @@ -1,7 +1,6 @@ from django.views.generic import DetailView, ListView -from .github import repo_issues -from .models import Category, Library +from .models import Category, Issue, Library class CategoryMixin: @@ -64,10 +63,4 @@ class LibraryDetail(CategoryMixin, DetailView): return self.render_to_response(context) def get_open_issues_count(self, obj): - try: - issues = repo_issues( - obj.github_owner, obj.github_repo, state="open", issues_only=True - ) - return len(issues) - except Exception: - return 0 + return Issue.objects.filter(library=obj, is_open=True).count() From e8d4bbd03061afb0dac572cf911313fb2e82e54a Mon Sep 17 00:00:00 2001 From: Lacey Williams Henschel Date: Thu, 15 Dec 2022 16:02:42 -0800 Subject: [PATCH 05/10] :bug: GH issue titles can be quite long; truncate them --- libraries/github.py | 2 +- libraries/tests/test_github.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/libraries/github.py b/libraries/github.py index f96f3b32..35fd62aa 100644 --- a/libraries/github.py +++ b/libraries/github.py @@ -358,7 +358,7 @@ class GithubUpdater: library=self.library, github_id=issue_dict["id"], defaults={ - "title": issue_dict["title"], + "title": issue_dict["title"][:255], "number": issue_dict["number"], "is_open": issue_dict["state"] == "open", "closed": closed_at, diff --git a/libraries/tests/test_github.py b/libraries/tests/test_github.py index 2fdf788d..447dfc6b 100644 --- a/libraries/tests/test_github.py +++ b/libraries/tests/test_github.py @@ -83,6 +83,29 @@ def test_update_issues_existing(tp, library, github_api_repo_issues_response): assert issue.title == existing_issue_data.title +def test_update_issues_long_title(tp, library, github_api_repo_issues_response): + """GithubUpdater.update_issues()""" + new_issues_count = len(github_api_repo_issues_response) + expected_count = Issue.objects.count() + new_issues_count + title = "sample" * 100 + assert len(title) > 255 + expected_title = title[:255] + assert len(expected_title) <= 255 + + with patch("libraries.github.repo_issues") as repo_issues_mock: + updater = GithubUpdater(library=library) + # Make an extra-long title so we can confirm that it saves + github_id = github_api_repo_issues_response[0]["id"] + github_api_repo_issues_response[0]["title"] = "sample" * 100 + repo_issues_mock.return_value = github_api_repo_issues_response + updater.update_issues() + + assert Issue.objects.count() == expected_count + assert Issue.objects.filter(library=library, github_id=github_id).exists() + issue = Issue.objects.get(library=library, github_id=github_id) + assert issue.title == expected_title + + # LibraryUpdater tests From e5120586d33c72e4e96b16c39a693811434b0b04 Mon Sep 17 00:00:00 2001 From: Lacey Williams Henschel Date: Thu, 15 Dec 2022 16:02:50 -0800 Subject: [PATCH 06/10] :books: Update docstring --- libraries/management/commands/update_libraries.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libraries/management/commands/update_libraries.py b/libraries/management/commands/update_libraries.py index d460b840..1337cd9f 100644 --- a/libraries/management/commands/update_libraries.py +++ b/libraries/management/commands/update_libraries.py @@ -5,5 +5,11 @@ from libraries.github import LibraryUpdater @click.command() def command(): + """ + Calls the LibraryUpdater, which retrieves the active boost libraries + from the Boost repo and updates the models in our database with the latest + information on that library (repo) and its issues, pull requests, and related + objects from GitHub. + """ updater = LibraryUpdater() updater.update_libraries() From e9972f0ffae282dcffd65117abd7865dd8c0a6ad Mon Sep 17 00:00:00 2001 From: Lacey Williams Henschel Date: Thu, 15 Dec 2022 16:03:57 -0800 Subject: [PATCH 07/10] :fire: Remove comment --- libraries/github.py | 1 - 1 file changed, 1 deletion(-) diff --git a/libraries/github.py b/libraries/github.py index 35fd62aa..466aa638 100644 --- a/libraries/github.py +++ b/libraries/github.py @@ -266,7 +266,6 @@ class LibraryUpdater: for lib in libs: library = self.update_library(lib) - # TODO: Test the GH Updater here github_updater = GithubUpdater(owner=self.owner, library=library) github_updater.update() From 24d1ac5080080c4d8af11fa4fe7e1950e31d2f73 Mon Sep 17 00:00:00 2001 From: Lacey Williams Henschel Date: Thu, 15 Dec 2022 16:05:23 -0800 Subject: [PATCH 08/10] :fire: remove debugging --- libraries/github.py | 1 - 1 file changed, 1 deletion(-) diff --git a/libraries/github.py b/libraries/github.py index 466aa638..18343ae0 100644 --- a/libraries/github.py +++ b/libraries/github.py @@ -367,7 +367,6 @@ class GithubUpdater: }, ) except Exception: - breakpoint() continue logger.info( "issue_updated_successfully", From 5506ea09fb28190df4862897b2ef70c3e73d9ae5 Mon Sep 17 00:00:00 2001 From: Lacey Williams Henschel Date: Thu, 15 Dec 2022 16:09:05 -0800 Subject: [PATCH 09/10] :speaker: Log exception --- libraries/github.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libraries/github.py b/libraries/github.py index 18343ae0..df018079 100644 --- a/libraries/github.py +++ b/libraries/github.py @@ -366,8 +366,12 @@ class GithubUpdater: "data": obj2dict(issue_dict), }, ) - except Exception: - continue + except Exception as e: + logger.exception( + "update_issues_cannot_save_issue", + issue_github_id=issue_dict.get("id"), + exc_msg=str(e), + ) logger.info( "issue_updated_successfully", issue_id=issue.id, From 17b5ba68aa6447739c33b3a1e1cea49d2d986179 Mon Sep 17 00:00:00 2001 From: Lacey Williams Henschel Date: Thu, 15 Dec 2022 16:09:22 -0800 Subject: [PATCH 10/10] :speaker: Rename the log --- libraries/github.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/github.py b/libraries/github.py index df018079..f5b18562 100644 --- a/libraries/github.py +++ b/libraries/github.py @@ -368,7 +368,7 @@ class GithubUpdater: ) except Exception as e: logger.exception( - "update_issues_cannot_save_issue", + "update_issues_error_skipped_issue", issue_github_id=issue_dict.get("id"), exc_msg=str(e), )