Fix bug where commit counts for libraries were not being updated monthly as they should have been

- Remove the kwargs from the call to update_libraries
- Add placeholder task for commit counts
- Refactor to simplify the commit count logic and pull it out of the management command and into a task
- Adjust schedule
- Add a separate task to update current month commit counts
- Add CommitData to admin, and add functionality to update the CommitData from the admin via button, like other admin features
This commit is contained in:
Lacey Williams Henschel
2024-02-12 10:07:57 -08:00
committed by Lacey Henschel
parent 4525e8d0a0
commit 468615dad8
8 changed files with 176 additions and 60 deletions

View File

@@ -33,7 +33,21 @@ def setup_periodic_tasks(sender, **kwargs):
app.signature("libraries.tasks.update_libraries"),
)
# Clear the static content database cache. Executs daily at 4:05 AM.
# Update the commit counts for the libraries. Executes daily at 2:05 AM
# Ensures the current month is up to date
sender.add_periodic_task(
crontab(hour=2, minute=5),
app.signature("libraries.tasks.update_current_month_commit_counts"),
)
# Monthly on the first day at 1:05 AM.
# Ensures the prior month commit count is up-to-date as quickly as possible
sender.add_periodic_task(
crontab(hour=1, minute=5, day_of_month=1),
app.signature("libraries.tasks.update_commit_counts"),
)
# Clear the static content database cache. Executes daily at 4:05 AM.
sender.add_periodic_task(
crontab(hour=4, minute=5),
app.signature("core.tasks.clear_static_content_cache"),

View File

@@ -1,9 +1,11 @@
from django.contrib import admin
from django.http import HttpResponseRedirect
from django.urls import path
from django.urls import path, reverse
from django.utils.html import format_html
from .models import Category, Issue, Library, LibraryVersion, PullRequest
from .models import Category, CommitData, Issue, Library, LibraryVersion, PullRequest
from .tasks import (
update_commit_counts,
update_libraries,
update_library_version_documentation_urls_all_versions,
)
@@ -16,6 +18,71 @@ class CategoryAdmin(admin.ModelAdmin):
search_fields = ["name"]
@admin.register(CommitData)
class CommitDataAdmin(admin.ModelAdmin):
list_display = (
"library",
"commit_count_formatted",
"month_year_formatted",
"branch",
"library_link",
)
list_filter = ("library__name", "branch", "month_year")
search_fields = ("library__name", "branch")
date_hierarchy = "month_year"
ordering = ("library__name", "-month_year")
autocomplete_fields = ["library"]
change_list_template = "admin/commit_data_change_list.html"
def commit_count_formatted(self, obj):
return f"{obj.commit_count:,}"
commit_count_formatted.admin_order_field = "commit_count"
commit_count_formatted.short_description = "Commit Count"
def month_year_formatted(self, obj):
return obj.month_year.strftime("%B %Y")
month_year_formatted.admin_order_field = "month_year"
month_year_formatted.short_description = "Month/Year"
def library_link(self, obj):
return format_html(
'<a href="{}">{}</a>',
reverse("admin:libraries_library_change", args=(obj.library.pk,)),
obj.library.name,
)
library_link.short_description = "Library Details"
def formfield_for_foreignkey(self, db_field, request, **kwargs):
if db_field.name == "library":
kwargs["queryset"] = Library.objects.order_by("name")
return super().formfield_for_foreignkey(db_field, request, **kwargs)
def get_urls(self):
urls = super().get_urls()
my_urls = [
path(
"update_commit_data/",
self.update_commit_data,
name="update_commit_data",
),
]
return my_urls + urls
def update_commit_data(self, request):
"""Run the task to refresh the library data from GitHub"""
update_commit_counts.delay()
self.message_user(
request,
"""
Commit data is being refreshed.
""",
)
return HttpResponseRedirect("../")
@admin.register(Library)
class LibraryAdmin(admin.ModelAdmin):
list_display = ["name", "key", "github_url"]
@@ -33,7 +100,7 @@ class LibraryAdmin(admin.ModelAdmin):
def update_libraries(self, request):
"""Run the task to refresh the library data from GitHub"""
update_libraries.delay(update_all=True)
update_libraries.delay()
self.message_user(
request,
"""

View File

@@ -1,5 +1,7 @@
import structlog
from dateutil.relativedelta import relativedelta
from django.contrib.auth import get_user_model
from django.utils import timezone
from fastcore.xtras import obj2dict
from core.githubhelper import GithubAPIClient, GithubDataParser
@@ -13,6 +15,15 @@ logger = structlog.get_logger()
User = get_user_model()
now = timezone.now()
FIRST_OF_MONTH_ONE_YEAR_AGO = timezone.make_aware(
timezone.datetime(year=now.year - 1, month=now.month, day=1)
)
FIRST_OF_CURRENT_MONTH = timezone.make_aware(
timezone.datetime(year=now.year, month=now.month, day=1)
) - relativedelta(days=1)
class LibraryUpdater:
"""
This class is used to sync Libraries from the list of git submodules
@@ -88,9 +99,7 @@ class LibraryUpdater:
return libraries
def update_libraries(
self,
):
def update_libraries(self):
"""
Update all libraries with the metadata from their libraries.json file.
"""
@@ -211,23 +220,34 @@ class LibraryUpdater:
self.logger.info(f"User {user.email} added as a maintainer of {obj}")
def update_monthly_commit_counts(
self, obj: Library, branch: str = "master", since=None, until=None
self,
branch: str = "master",
since=FIRST_OF_MONTH_ONE_YEAR_AGO,
until=FIRST_OF_CURRENT_MONTH,
):
"""Update the monthly commit data for a library.
"""Update the monthly commit data for all libraries
:param obj: Library object
:param commit_data: Dictionary of commit data, as output by the parser's
get_commits_per_month method.
:param branch: Branch to update commit data for. Defaults to "master".
:param since: Year to update commit data for. Defaults to a year ago
:param until: Year to update commit data for. Defaults to present year
Note: Overrides CommitData objects for the library; does not increment
the count.
"""
if not obj.github_repo:
self.logger.info("updating_monthly_commit_data_skipped_no_repo")
return
self.logger.info("updating_monthly_commit_data")
for library in Library.objects.all():
self.update_monthly_commit_counts_for_library(
library, branch=branch, since=since, until=until
)
def update_monthly_commit_counts_for_library(
self,
obj,
branch: str = "master",
since=FIRST_OF_MONTH_ONE_YEAR_AGO,
until=FIRST_OF_CURRENT_MONTH,
):
"""Update the commit counts for a specific library."""
commits = self.client.get_commits(
repo_slug=obj.github_repo, branch=branch, since=since, until=until
)

View File

@@ -1,48 +1,16 @@
import djclick as click
from django.utils import timezone
from dateutil.relativedelta import relativedelta
from libraries.github import LibraryUpdater
from core.githubhelper import GithubAPIClient
from libraries.models import Library
from libraries.tasks import update_commit_counts
@click.command()
@click.option("--token", is_flag=False, help="Github API token")
@click.option(
"--branch", is_flag=False, help="Branch name (default master)", default="master"
)
def command(branch, token):
def command(token):
"""Imports commit counts for all libraries, broken down by month, and saves
them to the database. This is a one-time import.
:param branch: The branch to import commit data from. Default is master.
:param token: Github API token
"""
click.secho("Importing library commit history...", fg="green")
client = GithubAPIClient(token=token)
updater = LibraryUpdater(client=client)
# We import the most recent year of commit history
now = timezone.now()
# Set 'since' to the first day of the month, 12 months ago
since = timezone.make_aware(
timezone.datetime(year=now.year - 1, month=now.month, day=1)
)
# Set 'until' to the last day of last month
until = timezone.make_aware(
timezone.datetime(year=now.year, month=now.month, day=1)
) - relativedelta(days=1)
for library in Library.objects.all():
updater.update_monthly_commit_counts(
library, branch=branch, since=since, until=until
)
library.refresh_from_db()
click.secho(
f"Updated {library.name} commits; {library.commit_data.count()} monthly "
f"counts added",
fg="green",
)
update_commit_counts(token=token)
click.secho("Finished importing library commit history", fg="green")

View File

@@ -0,0 +1,16 @@
# Generated by Django 4.2.2 on 2024-02-12 19:23
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("libraries", "0017_merge_20230919_2029"),
]
operations = [
migrations.AlterModelOptions(
name="commitdata",
options={"verbose_name_plural": "Commit Data"},
),
]

View File

@@ -61,6 +61,7 @@ class CommitData(models.Model):
class Meta:
unique_together = ("library", "month_year", "branch")
verbose_name_plural = "Commit Data"
def __str__(self):
return (

View File

@@ -1,12 +1,13 @@
import structlog
from dateutil.relativedelta import relativedelta
from config.celery import app
from django.db.models import Q
from django.utils import timezone
from core.boostrenderer import get_content_from_s3
from core.htmlhelper import get_library_documentation_urls
from libraries.github import LibraryUpdater
from libraries.models import LibraryVersion
from libraries.utils import get_first_last_day_last_month
from versions.models import Version
from .utils import (
generate_library_docs_url,
@@ -317,7 +318,7 @@ def get_and_store_library_version_documentation_urls_for_version(version_pk):
@app.task
def update_libraries(update_all=False):
def update_libraries():
"""Update local libraries from GitHub Boost libraries.
Use the LibraryUpdater, which retrieves the active boost libraries from the
@@ -326,10 +327,28 @@ def update_libraries(update_all=False):
from GitHub.
"""
updater = LibraryUpdater()
if update_all:
updater.update_libraries()
logger.info("libraries_tasks_update_all_libraries_finished")
else:
since, until = get_first_last_day_last_month()
updater.update_libraries(since=since, until=until)
logger.info("libraries_tasks_update_libraries_finished")
updater.update_libraries()
logger.info("libraries_tasks_update_all_libraries_finished")
@app.task
def update_commit_counts(token=None):
"""Imports commit counts for all libraries, broken down by month, and saves
them to the database. See LibraryUpdater class for defaults.
"""
updater = LibraryUpdater(token=token)
updater.update_monthly_commit_counts()
logger.info("libraries_update_commit_counts_finished")
@app.task
def update_current_month_commit_counts(token=None):
"""Imports commit counts for all libraries for the current month."""
updater = LibraryUpdater(token=token)
now = timezone.now()
# First of this month
since = timezone.make_aware(
timezone.datetime(year=now.year, month=now.month, day=1)
) - relativedelta(days=1)
updater.update_monthly_commit_counts(since=since, until=now)
logger.info("libraries_update_current_month_commit_counts_finished")

View File

@@ -0,0 +1,11 @@
{% extends "admin/change_list.html" %}
{% load i18n admin_urls %}
{% block object-tools %}
<ul class="object-tools">
{% block object-tools-items %}
{{ block.super }}
<li><a href="{% url 'admin:update_commit_data' %}" class="addlink">{% trans "Update Commit Data" %}</a></li>
{% endblock %}
</ul>
{% endblock %}