Release reports refactor (#1996) (#1999)

Co-authored-by: Greg Kaleka <greg@gregkaleka.com>
This commit is contained in:
daveoconnor
2025-12-01 10:27:05 -08:00
committed by GitHub
parent 61c651c665
commit 69a652d066
19 changed files with 1160 additions and 740 deletions

View File

@@ -98,7 +98,7 @@ def setup_periodic_tasks(sender, **kwargs):
# Update data required for release report. Executes Saturday evenings.
sender.add_periodic_task(
crontab(day_of_week="sat", hour=20, minute=3),
app.signature("libraries.tasks.release_tasks", generate_report=True),
app.signature("libraries.tasks.release_tasks", generate_report=False),
)
# Update users' profile photos from GitHub. Executes daily at 3:30 AM.

View File

@@ -427,6 +427,7 @@ if LOCAL_DEVELOPMENT:
]
ACCOUNT_DEFAULT_HTTP_PROTOCOL = "http"
if not LOCAL_DEVELOPMENT:
ACCOUNT_DEFAULT_HTTP_PROTOCOL = "https"
SECURE_PROXY_SSL_HEADER = (

View File

@@ -6,9 +6,9 @@
1. Ask Sam for a copy of the "subscribe" data.
2. In the Django admin interface go to "Subscription datas" under "MAILING_LIST".
3. At the top of the page click on the "IMPORT 'SUBSCRIBE' DATA" button.
2. To update the mailing list counts, if you haven't already run the "DO IT ALL" button:
1. Go to "Versions" under "VERSIONS" in the admin interface
2. At the top of the page click on the "DO IT ALL" button.
2. To update the mailing list counts, if you haven't already run the "GET RELEASE REPORT DATA" button:
1. Go to "Release Reports" under "VERSIONS" in the admin interface
2. At the top of the page click on the "GET RELEASE REPORT DATA" button.
## Report Creation

View File

@@ -1,5 +1,7 @@
from django.contrib import admin
from django.core.files.storage import default_storage
import structlog
from django.conf import settings
from django.contrib import admin, messages
from django.core.exceptions import ValidationError
from django.db import transaction
from django.db.models import F, Count, OuterRef, Window
from django.db.models.functions import RowNumber
@@ -42,6 +44,9 @@ from .tasks import (
from .utils import generate_release_report_filename
logger = structlog.get_logger()
@admin.register(Commit)
class CommitAdmin(admin.ModelAdmin):
list_display = ["library_version", "sha", "author"]
@@ -183,13 +188,39 @@ class ReleaseReportView(TemplateView):
return context
def generate_report(self):
uri = f"{settings.ACCOUNT_DEFAULT_HTTP_PROTOCOL}://{self.request.get_host()}"
generate_release_report.delay(
user_id=self.request.user.id, params=self.request.GET
user_id=self.request.user.id,
params=self.request.GET,
base_uri=uri,
)
def locked_publish_check(self):
form = self.get_form()
form.is_valid()
publish = form.cleaned_data["publish"]
report_configuration = form.cleaned_data["report_configuration"]
if publish and ReleaseReport.latest_published_locked(report_configuration):
msg = (
f"A release report already exists with locked status for "
f"{report_configuration.display_name}. Delete or unlock the most "
f"recent report."
)
raise ValueError(msg)
def get(self, request, *args, **kwargs):
form = self.get_form()
if form.is_valid():
try:
self.locked_publish_check()
except ValueError as e:
messages.error(request, str(e))
return TemplateResponse(
request,
self.form_template,
self.get_context_data(),
)
if form.cleaned_data["no_cache"]:
params = request.GET.copy()
form.cache_clear()
@@ -458,28 +489,92 @@ class ReleaseReportAdminForm(forms.ModelForm):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.instance.pk and not self.instance.published:
file_name = generate_release_report_filename(
self.instance.report_configuration.get_slug()
if not self.is_publish_editable():
# we require users to intentionally manually delete existing reports
self.fields["published"].disabled = True
self.fields["published"].help_text = (
"⚠️ A published PDF already exists for this Report Configuration. See "
'"Publishing" notes at the top of this page.'
)
published_filename = f"{ReleaseReport.upload_dir}{file_name}"
if default_storage.exists(published_filename):
# we require users to intentionally manually delete existing reports
self.fields["published"].disabled = True
self.fields["published"].help_text = (
f"⚠️ A published '{file_name}' already exists. To prevent accidents "
"you must manually delete that file before publishing this report."
def is_publish_editable(self) -> bool:
# in play here are currently published and previously published rows because of
# filename collision risk.
if self.instance.published:
return True
published_filename = generate_release_report_filename(
version_slug=self.instance.report_configuration.get_slug(),
published_format=True,
)
reports = ReleaseReport.objects.filter(
report_configuration=self.instance.report_configuration,
file=f"{ReleaseReport.upload_dir}{published_filename}",
)
if reports.count() == 0 or reports.latest("created_at") == self.instance:
return True
return False
def clean(self):
cleaned_data = super().clean()
if not self.is_publish_editable():
raise ValidationError("This file is not publishable.")
if cleaned_data.get("published"):
report_configuration = cleaned_data.get("report_configuration")
if ReleaseReport.latest_published_locked(
report_configuration, self.instance
):
raise ValidationError(
f"A release report already exists with locked status for "
f"{report_configuration.display_name}. Delete or unlock the most "
f"recent report."
)
return cleaned_data
@admin.register(ReleaseReport)
class ReleaseReportAdmin(admin.ModelAdmin):
form = ReleaseReportAdminForm
list_display = ["__str__", "created_at", "published", "published_at"]
list_filter = ["published", ReportConfigurationFilter, StaffUserCreatedByFilter]
list_display = ["__str__", "created_at", "published", "published_at", "locked"]
list_filter = [
"published",
"locked",
ReportConfigurationFilter,
StaffUserCreatedByFilter,
]
search_fields = ["file"]
readonly_fields = ["created_at", "created_by"]
ordering = ["-created_at"]
change_list_template = "admin/releasereport_change_list.html"
change_form_template = "admin/releasereport_change_form.html"
def get_urls(self):
urls = super().get_urls()
my_urls = [
path(
"release_tasks/",
self.admin_site.admin_view(self.release_tasks),
name="release_tasks",
),
]
return my_urls + urls
def release_tasks(self, request):
from libraries.tasks import release_tasks
release_tasks.delay(
base_uri=f"{settings.ACCOUNT_DEFAULT_HTTP_PROTOCOL}://{request.get_host()}",
user_id=request.user.id,
generate_report=False,
)
self.message_user(
request,
"release_tasks has started, you will receive an email when the task finishes.", # noqa: E501
)
return HttpResponseRedirect("../")
def has_add_permission(self, request):
return False
@@ -488,3 +583,25 @@ class ReleaseReportAdmin(admin.ModelAdmin):
if not change:
obj.created_by = request.user
super().save_model(request, obj, form, change)
@staticmethod
def clear_other_report_files(release_report: ReleaseReport):
if release_report.file:
other_reports = ReleaseReport.objects.filter(
file=release_report.file.name
).exclude(pk=release_report.pk)
if other_reports.exists():
release_report.file = None
release_report.save()
def delete_model(self, request, obj):
# check if another report uses the same file
self.clear_other_report_files(obj)
super().delete_model(request, obj)
def delete_queryset(self, request, queryset):
# clear file reference, prevents deletion of the file if it's linked elsewhere
for obj in queryset:
self.clear_other_report_files(obj)
super().delete_queryset(request, queryset)

View File

@@ -367,3 +367,5 @@ MASTER_RELEASE_URL_PATH_STR = "master"
VERSION_SLUG_PREFIX = "boost-"
RELEASE_REPORT_SEARCH_TOP_COUNTRIES_LIMIT = 5
DOCKER_CONTAINER_URL_WEB = "http://web:8000"
RELEASE_REPORT_AUTHORS_PER_PAGE_THRESHOLD = 6

View File

@@ -1,41 +1,50 @@
from structlog import get_logger
from functools import cached_property
from itertools import groupby, chain
from operator import attrgetter
from dataclasses import dataclass, field
from datetime import date, timedelta
from datetime import date
from django import forms
from django.template.loader import render_to_string
from django.db.models import F, Q, Count, OuterRef, Sum, When, Value, Case
from django.db.models import Q, Count, Sum
from django.forms import Form, ModelChoiceField, ModelForm, BooleanField
from algoliasearch.analytics.client import AnalyticsClientSync
from config import settings
from core.models import RenderedContent
from reports.generation import (
generate_wordcloud,
get_mailing_list_post_stats,
get_new_subscribers_stats,
generate_mailinglist_words,
generate_algolia_words,
get_algolia_search_stats,
get_git_graph_data,
get_library_data,
get_library_full_counts,
get_libraries_by_name,
get_top_contributors_for_version,
get_top_libraries,
get_top_libraries_for_version,
lines_changes_count,
get_commit_counts,
get_issues_counts,
get_download_links,
determine_versions,
get_libraries,
get_libraries_for_index,
get_mailinglist_counts,
get_slack_channels,
get_slack_stats,
)
from slack.models import Channel, SlackActivityBucket, SlackUser
from versions.exceptions import BoostImportedDataException
from versions.models import Version, ReportConfiguration
from .models import (
Commit,
CommitAuthor,
Issue,
Library,
LibraryVersion,
CommitAuthorEmail,
)
from libraries.constants import SUB_LIBRARIES
from libraries.constants import SUB_LIBRARIES, RELEASE_REPORT_AUTHORS_PER_PAGE_THRESHOLD
from mailing_list.models import EmailData
from .utils import batched, conditional_batched
from .tasks import (
count_mailinglist_contributors,
generate_search_cloud,
generate_mailinglist_cloud,
get_mailing_list_stats,
get_new_subscribers_stats,
count_commit_contributors_totals,
get_new_contributors_count,
)
from .utils import conditional_batched
logger = get_logger(__name__)
@@ -111,12 +120,7 @@ class CreateReportFullForm(Form):
lib_string = ",".join(str(x.id) if x else "" for x in chosen_libraries)
return f"full-report-{lib_string}"
def _get_top_libraries(self):
return self.library_queryset.annotate(
commit_count=Count("library_version__commit")
).order_by("-commit_count")[:5]
def _get_library_order(self, top_libraries):
def _get_library_order(self, top_libraries) -> list[int]:
library_order = [
x.id
for x in [
@@ -135,16 +139,6 @@ class CreateReportFullForm(Form):
library_order = [x.id for x in top_libraries]
return library_order
def _get_library_full_counts(self, libraries, library_order):
return sorted(
list(
libraries.annotate(
commit_count=Count("library_version__commit")
).values("commit_count", "id")
),
key=lambda x: library_order.index(x["id"]),
)
def _get_top_contributors_overall(self):
return (
CommitAuthor.objects.all()
@@ -177,7 +171,7 @@ class CreateReportFullForm(Form):
library_version__library__in=self.library_queryset
).count()
top_libraries = self._get_top_libraries()
top_libraries = get_top_libraries()
library_order = self._get_library_order(top_libraries)
libraries = Library.objects.filter(id__in=library_order)
library_data = [
@@ -188,7 +182,7 @@ class CreateReportFullForm(Form):
}
for x in zip(
sorted(list(libraries), key=lambda x: library_order.index(x.id)),
self._get_library_full_counts(libraries, library_order),
get_library_full_counts(libraries, library_order),
self._get_top_contributors_for_library(library_order),
)
]
@@ -250,13 +244,25 @@ class CreateReportForm(CreateReportFullForm):
"""Form for creating a report for a specific release."""
html_template_name = "admin/release_report_detail.html"
report_configuration = ModelChoiceField(
queryset=ReportConfiguration.objects.order_by("-version")
)
# queryset will be set in __init__
report_configuration = ModelChoiceField(queryset=ReportConfiguration.objects.none())
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# we want to allow master, develop, the latest release, the latest beta, along
# with any report configuration matching no Version, exclude all others.
exclusion_versions = []
if betas := Version.objects.filter(beta=True).order_by("-release_date")[1:]:
exclusion_versions += betas
if older_releases := Version.objects.filter(
active=True, full_release=True
).order_by("-release_date")[1:]:
exclusion_versions += older_releases
qs = ReportConfiguration.objects.exclude(
version__in=[v.name for v in exclusion_versions]
).order_by("-version")
self.fields["report_configuration"].queryset = qs
self.fields["library_1"].help_text = (
"If none are selected, all libraries will be selected."
)
@@ -277,592 +283,92 @@ class CreateReportForm(CreateReportFullForm):
report_configuration = self.cleaned_data["report_configuration"]
return f"release-report-{lib_string}-{report_configuration.version}"
def _get_top_contributors_for_version(self, version):
return (
CommitAuthor.objects.filter(commit__library_version__version=version)
.annotate(
commit_count=Count(
"commit",
filter=Q(
commit__library_version__library__in=self.library_queryset
),
)
)
.order_by("-commit_count")[:10]
)
def _get_library_queryset_by_version(
self, version: Version, annotate_commit_count=False
):
qs = self.library_queryset.none()
if version:
qs = self.library_queryset.filter(
library_version=LibraryVersion.objects.filter(
library=OuterRef("id"), version=version
)[:1],
)
if annotate_commit_count:
qs = qs.annotate(commit_count=Count("library_version__commit"))
return qs
def _get_top_libraries_for_version(self, version):
library_qs = self._get_library_queryset_by_version(
version, annotate_commit_count=True
)
return library_qs.order_by("-commit_count")
def _get_libraries_by_name(self, version):
library_qs = self._get_library_queryset_by_version(
version, annotate_commit_count=True
)
return library_qs.order_by("name")
def _get_libraries_by_quality(self, version):
# returns "great", "good", and "standard" libraries in that order
library_qs = self._get_library_queryset_by_version(version)
return list(
chain(
library_qs.filter(graphic__isnull=False),
library_qs.filter(graphic__isnull=True, is_good=True),
library_qs.filter(graphic__isnull=True, is_good=False),
)
)
def _get_library_version_counts(self, library_order, version):
library_qs = self._get_library_queryset_by_version(
version, annotate_commit_count=True
)
return sorted(
list(library_qs.values("commit_count", "id")),
key=lambda x: library_order.index(x["id"]),
)
def _global_new_contributors(self, version):
version_lt = list(
Version.objects.minor_versions()
.filter(version_array__lt=version.cleaned_version_parts_int)
.order_by("id")
.values_list("id", flat=True)
)
prior_version_author_ids = (
CommitAuthor.objects.filter(commit__library_version__version__in=version_lt)
.distinct()
.values_list("id", flat=True)
)
version_author_ids = (
CommitAuthor.objects.filter(
commit__library_version__version__in=version_lt + [version.id]
)
.distinct()
.values_list("id", flat=True)
)
return set(version_author_ids) - set(prior_version_author_ids)
def _count_new_contributors(self, libraries, library_order, version):
version_lt = list(
Version.objects.minor_versions()
.filter(version_array__lt=version.cleaned_version_parts_int)
.values_list("id", flat=True)
)
version_lte = version_lt + [version.id]
lt_subquery = LibraryVersion.objects.filter(
version__in=version_lt,
library=OuterRef("id"),
).values("id")
lte_subquery = LibraryVersion.objects.filter(
version__in=version_lte,
library=OuterRef("id"),
).values("id")
return sorted(
list(
libraries.annotate(
authors_before_release_count=Count(
"library_version__commit__author",
filter=Q(library_version__in=lt_subquery),
distinct=True,
),
authors_through_release_count=Count(
"library_version__commit__author",
filter=Q(library_version__in=lte_subquery),
distinct=True,
),
)
.annotate(
count=F("authors_through_release_count")
- F("authors_before_release_count")
)
.values("id", "count")
),
key=lambda x: library_order.index(x["id"]),
)
def _count_issues(self, libraries, library_order, version, prior_version):
data = {
x["library_id"]: x
for x in Issue.objects.count_opened_closed_during_release(
version, prior_version
).filter(library_id__in=[x.id for x in libraries])
}
ret = []
for lib_id in library_order:
if lib_id in data:
ret.append(data[lib_id])
else:
ret.append({"opened": 0, "closed": 0, "library_id": lib_id})
return ret
def _count_commit_contributors_totals(self, version, prior_version):
"""Get a count of contributors for this release, and a count of
new contributors.
"""
version_lt = list(
Version.objects.minor_versions()
.filter(version_array__lte=prior_version.cleaned_version_parts_int)
.values_list("id", flat=True)
)
version_lte = version_lt + [version.id]
lt_subquery = LibraryVersion.objects.filter(
version__in=version_lt,
library=OuterRef("id"),
).values("id")
lte_subquery = LibraryVersion.objects.filter(
version__in=version_lte,
library=OuterRef("id"),
).values("id")
qs = self.library_queryset.aggregate(
this_release_count=Count(
"library_version__commit__author",
filter=Q(library_version__version=version),
distinct=True,
),
authors_before_release_count=Count(
"library_version__commit__author",
filter=Q(library_version__in=lt_subquery),
distinct=True,
),
authors_through_release_count=Count(
"library_version__commit__author",
filter=Q(library_version__in=lte_subquery),
distinct=True,
),
)
new_count = (
qs["authors_through_release_count"] - qs["authors_before_release_count"]
)
this_release_count = qs["this_release_count"]
return this_release_count, new_count
def _get_top_contributors_for_library_version(self, library_order, version):
top_contributors_release = []
for library_id in library_order:
top_contributors_release.append(
CommitAuthor.objects.filter(
commit__library_version=LibraryVersion.objects.get(
version=version, library_id=library_id
)
)
.annotate(commit_count=Count("commit"))
.order_by("-commit_count")[:10]
)
return top_contributors_release
def _count_mailinglist_contributors(self, version, prior_version):
version_lt = list(
Version.objects.minor_versions()
.filter(version_array__lte=prior_version.cleaned_version_parts_int)
.values_list("id", flat=True)
)
version_lte = version_lt + [version.id]
current = (
EmailData.objects.filter(version__in=version_lte)
.distinct("author_id")
.count()
)
prior = (
EmailData.objects.filter(version__in=version_lt)
.distinct("author_id")
.count()
)
release = EmailData.objects.filter(version=version).count()
return release, current - prior
def _get_library_versions(self, library_order, version):
return sorted(
list(
LibraryVersion.objects.filter(
version=version, library_id__in=library_order
)
),
key=lambda x: library_order.index(x.library_id),
)
def _get_git_graph_data(self, prior_version: Version | None, version: Version):
"""Fetch commit count data for a release and return an instance of Graph.
Returns data in a format to easily create a github style green box commit graph.
"""
if prior_version is None:
return None
@dataclass
class Day:
date: date
count: int
color: str = ""
@dataclass
class Week:
days: list[Day] = field(default_factory=list)
@cached_property
def max(self):
"""The max number of commits this week."""
return max(x.count for x in self.days)
@dataclass
class Graph:
weeks: list[Week] = field(default_factory=list)
colors: list[str] = field(
default_factory=lambda: [
"#E8F5E9",
"#C8E6C9",
"#A5D6A7",
"#81C784",
"#66BB6A",
"#4CAF50",
"#43A047",
"#388E3C",
"#2E7D32",
"#1B5E20",
],
)
@cached_property
def graph_start(self):
return start.strftime("%B '%y")
@cached_property
def graph_end(self):
return end.strftime("%B '%y")
@cached_property
def max(self):
"""The max number of commits in all weeks."""
return max(x.max for x in self.weeks)
def append_day(self, day: Day):
"""Append a day into the last week of self.weeks.
- Automatically create a new week if there are already 7 days in the
last week.
"""
if len(self.weeks) == 0 or len(self.weeks[-1].days) == 7:
self.weeks.append(Week())
self.weeks[-1].days.append(day)
def apply_colors(self):
"""Iterate through each day and apply a color.
- The color is selected based on the number of commits made on
that day, relative to the highest number of commits in all days in
Graph.weeks.days.
"""
if not (high := self.max):
# No commits this release
# TODO: we may want a more elegant solution
# than just not graphing this library
return
for week in self.weeks:
for day in week.days:
decimal = day.count / high
if decimal == 1:
day.color = self.colors[-1]
else:
idx = int(decimal * len(self.colors))
day.color = self.colors[idx]
count_query = (
Commit.objects.filter(library_version__version=version)
.values("committed_at__date")
.annotate(count=Count("id"))
)
counts_by_date = {x["committed_at__date"]: x["count"] for x in count_query}
graph = Graph()
# The start date is the release date of the previous version
# The end date is one day before the release date of the current version
start: date = prior_version.release_date
end: date = (version.release_date or date.today()) - timedelta(days=1)
# if the release started on a Thursday, we want to add Sun -> Wed to the data
# with empty counts, even if they aren't part of the release.
for i in range(start.weekday(), 0, -1):
day = Day(date=start - timedelta(days=i), count=0)
graph.append_day(day)
current_date = start
while current_date <= end:
day = Day(date=current_date, count=counts_by_date.get(current_date, 0))
graph.append_day(day)
current_date = current_date + timedelta(days=1)
graph.apply_colors()
return graph
def _get_slack_stats(self, prior_version, version):
"""Returns all slack related stats.
Only returns channels with activity.
"""
stats = []
for channel in Channel.objects.filter(name__istartswith="boost"):
channel_stat = self._get_slack_stats_for_channels(
prior_version, version, channels=[channel]
)
channel_stat["channel"] = channel
if channel_stat["user_count"] > 0:
stats.append(channel_stat)
stats.sort(key=lambda x: -(x["total"] or 0)) # Convert None to 0
return stats
def _get_slack_stats_for_channels(
self, prior_version, version, channels: list[Channel] | None = None
):
"""Get slack stats for specific channels, or all channels."""
start = prior_version.release_date
end = date.today()
if version.release_date:
end = version.release_date - timedelta(days=1)
# count of all messages in the date range
q = Q(day__range=[start, end])
if channels:
q &= Q(channel__in=channels)
total = SlackActivityBucket.objects.filter(q).aggregate(total=Sum("count"))[
"total"
]
# message counts per user in the date range
q = Q(slackactivitybucket__day__range=[start, end])
if channels:
q &= Q(slackactivitybucket__channel__in=channels)
per_user = (
SlackUser.objects.annotate(
total=Sum(
"slackactivitybucket__count",
filter=q,
)
)
.filter(total__gt=0)
.order_by("-total")
)
q = Q()
if channels:
q &= Q(channel__in=channels)
distinct_users = (
SlackActivityBucket.objects.filter(q)
.order_by("user_id")
.distinct("user_id")
)
new_user_count = (
distinct_users.filter(day__lte=end).count()
- distinct_users.filter(day__lt=start).count()
)
return {
"users": per_user[:10],
"user_count": per_user.count(),
"total": total,
"new_user_count": new_user_count,
}
def _get_dependency_data(self, library_order, version):
try:
dependency_diff_values = version.get_dependency_diffs().values()
except BoostImportedDataException as e:
logger.warning(f"Could not get dependency diffs for version {version}: {e}")
dependency_diff_values = {}
diffs_by_id = {x["library_id"]: x for x in dependency_diff_values}
diffs = []
for lib_id in library_order:
diffs.append(diffs_by_id.get(lib_id, {}))
return diffs
def get_library_data(self, libraries, library_order, prior_version, version):
library_data = [
{
"library": item[0],
"full_count": item[1],
"version_count": item[2],
"top_contributors_release": item[3],
"new_contributors_count": item[4],
"issues": item[5],
"library_version": item[6],
"deps": item[7],
}
for item in zip(
libraries,
self._get_library_full_counts(libraries, library_order),
self._get_library_version_counts(library_order, version),
self._get_top_contributors_for_library_version(library_order, version),
self._count_new_contributors(libraries, library_order, version),
self._count_issues(libraries, library_order, version, prior_version),
self._get_library_versions(library_order, version),
self._get_dependency_data(library_order, version),
)
]
return [x for x in library_data if x["version_count"]["commit_count"] > 0]
def get_stats(self):
report_configuration = self.cleaned_data["report_configuration"]
version = Version.objects.filter(name=report_configuration.version).first()
committee_members = report_configuration.financial_committee_members.all()
# NOTE TO FUTURE DEVS: remember to account for the fact that a report
# configuration may not match with a real version in frequent cases where
# reports are generated before the release version has been created.
report_before_release = False if version else True
(report_before_release, prior_version, version) = determine_versions(
report_configuration.version
)
prior_version = None
if report_before_release:
# if the version is not set then the user has chosen a report configuration
# that's not matching a live version, so we use the most recent version
version = Version.objects.filter(name="master").first()
prior_version = Version.objects.most_recent()
# trigger tasks first to run in parallel
mailing_list_contributors_task = count_mailinglist_contributors.delay(
prior_version.pk, version.pk
)
mailing_list_stats_task = get_mailing_list_stats.delay(
prior_version.pk, version.pk
)
commit_contributors_task = count_commit_contributors_totals.delay(
version.pk, prior_version.pk
)
new_subscribers_stats_task = get_new_subscribers_stats.delay(
prior_version.release_date, version.release_date or date.today()
)
mailinglist_wordcloud_task = generate_mailinglist_cloud.delay(
prior_version.pk, version.pk
)
# if the report is based on a live version, look for stats for that
# version, otherwise use the stats for the prior (live) version
search_wordcloud_task = generate_search_cloud.delay(
prior_version.pk if report_before_release else version.pk
)
new_contributors_count_task = get_new_contributors_count.delay(version.pk)
# end of task triggering
downloads = {
k: list(v)
for k, v in groupby(
version.downloads.all().order_by("operating_system"),
key=attrgetter("operating_system"),
)
}
if not prior_version:
prior_version = (
Version.objects.minor_versions()
.filter(version_array__lt=version.cleaned_version_parts_int)
.order_by("-version_array")
.first()
)
commit_count = Commit.objects.filter(
library_version__version__name__lte=version.name,
library_version__library__in=self.library_queryset,
).count()
version_commit_count = Commit.objects.filter(
library_version__version=version,
library_version__library__in=self.library_queryset,
).count()
top_libraries_for_version = self._get_top_libraries_for_version(version)
top_libraries_by_name = self._get_libraries_by_name(version)
commit_count, version_commit_count = get_commit_counts(version)
top_libraries_for_version = get_top_libraries_for_version(version)
top_libraries_by_name = get_libraries_by_name(version)
library_order = self._get_library_order(top_libraries_by_name)
libraries = Library.objects.filter(id__in=library_order).order_by(
Case(
*[When(id=pk, then=Value(pos)) for pos, pk in enumerate(library_order)]
)
)
library_data = self.get_library_data(
libraries, library_order, prior_version, version
)
AUTHORS_PER_PAGE_THRESHOLD = 6
batched_library_data = conditional_batched(
library_data,
2,
lambda x: x.get("top_contributors_release").count()
<= AUTHORS_PER_PAGE_THRESHOLD,
)
new_libraries = libraries.exclude(
library_version__version__release_date__lte=prior_version.release_date
).prefetch_related("authors")
# TODO: we may in future need to find a way to show the removed libraries, for
# now it's not needed. In that case the distinction between running this on a
# ReportConfiguration with a real 'version' entry vs one that instead uses 'master'
# will need to be considered
top_contributors = self._get_top_contributors_for_version(version)
# total messages sent during this release (version)
total_mailinglist_count = EmailData.objects.filter(version=version).aggregate(
total=Sum("count")
)["total"]
mailinglist_counts = (
EmailData.objects.filter(version=version)
.with_total_counts()
.order_by("-total_count")[:10]
libraries = get_libraries(library_order)
new_libraries = libraries.exclude(
library_version__version__release_date__lte=prior_version.release_date
).prefetch_related("authors")
top_contributors = get_top_contributors_for_version(version)
mailinglist_counts = get_mailinglist_counts(version)
lines_added, lines_removed = lines_changes_count(version)
opened_issues_count, closed_issues_count = get_issues_counts(
prior_version, version
)
(
mailinglist_contributor_release_count,
mailinglist_contributor_new_count,
) = self._count_mailinglist_contributors(version, prior_version)
(
commit_contributors_release_count,
commit_contributors_new_count,
) = self._count_commit_contributors_totals(version, prior_version)
added_library_count = new_libraries.count()
# TODO: connected to above todo, add removed_libraries.count()
removed_library_count = 0
lines_added = LibraryVersion.objects.filter(
version=version,
library__in=self.library_queryset,
).aggregate(lines=Sum("insertions"))["lines"]
library_data = get_library_data(library_order, prior_version.pk, version.pk)
slack_stats = get_slack_stats(prior_version, version)
lines_removed = LibraryVersion.objects.filter(
version=version,
library__in=self.library_queryset,
).aggregate(lines=Sum("deletions"))["lines"]
# we want 2 channels per pdf page, use batched to get groups of 2
slack_stats = batched(self._get_slack_stats(prior_version, version), 2)
slack_channels = batched(
Channel.objects.filter(name__istartswith="boost").order_by("name"), 10
library_index_library_data = get_libraries_for_index(library_data, version)
batched_library_data = conditional_batched(
library_data,
2,
lambda x: x.get("top_contributors_release").count()
<= RELEASE_REPORT_AUTHORS_PER_PAGE_THRESHOLD,
)
committee_members = report_configuration.financial_committee_members.all()
mailinglist_post_stats = get_mailing_list_post_stats(
prior_version.release_date, version.release_date or date.today()
git_graph_data = get_git_graph_data(prior_version, version)
download = get_download_links(version)
### completed task handling ###
(mailinglist_contributor_release_count, mailinglist_contributor_new_count) = (
mailing_list_contributors_task.get()
)
new_subscribers_stats = get_new_subscribers_stats(
prior_version.release_date, version.release_date or date.today()
(mailinglist_post_stats, total_mailinglist_count) = (
mailing_list_stats_task.get()
)
library_index_library_data = []
for library in self._get_libraries_by_quality(version):
library_index_library_data.append(
(
library,
library in [lib["library"] for lib in library_data],
)
)
# mailinglist word cloud generation
mailinglist_words = generate_mailinglist_words(prior_version, version)
mailinglist_wordcloud_base64, mailinglist_wordcloud_top_words = (
generate_wordcloud(mailinglist_words, width=1400, height=700)
(commit_contributors_release_count, commit_contributors_new_count) = (
commit_contributors_task.get()
)
# algolia search word cloud generation
client = AnalyticsClientSync(**settings.ALGOLIA)
# if the report is based on a live version, look for stats for that
# version, otherwise use the stats for the prior (live) version
search_version = prior_version if report_before_release else version
search_list_words = generate_algolia_words(client, search_version)
search_wordcloud_base64, search_wordcloud_top_words = generate_wordcloud(
search_list_words, width=800, height=250
)
search_stats = get_algolia_search_stats(client, search_version)
logger.info(f"{search_stats=}")
opened_issues_count = (
Issue.objects.filter(library__in=self.library_queryset)
.opened_during_release(version, prior_version)
.count()
)
closed_issues_count = (
Issue.objects.filter(library__in=self.library_queryset)
.closed_during_release(version, prior_version)
.count()
(
mailinglist_words,
mailinglist_wordcloud_base64,
mailinglist_wordcloud_top_words,
) = mailinglist_wordcloud_task.get()
(search_wordcloud_base64, search_wordcloud_top_words, search_stats) = (
search_wordcloud_task.get()
)
global_contributors_new_count = new_contributors_count_task.get()
return {
"committee_members": committee_members,
@@ -880,16 +386,14 @@ class CreateReportForm(CreateReportFullForm):
"mailinglist_contributor_release_count": mailinglist_contributor_release_count, # noqa: E501
"mailinglist_contributor_new_count": mailinglist_contributor_new_count,
"mailinglist_post_stats": mailinglist_post_stats,
"mailinglist_new_subscribers_stats": new_subscribers_stats,
"mailinglist_new_subscribers_stats": new_subscribers_stats_task.get(),
"mailinglist_charts_start_year": prior_version.release_date.year,
"search_wordcloud_base64": search_wordcloud_base64,
"search_wordcloud_frequencies": search_wordcloud_top_words,
"search_stats": search_stats,
"commit_contributors_release_count": commit_contributors_release_count,
"commit_contributors_new_count": commit_contributors_new_count,
"global_contributors_new_count": len(
self._global_new_contributors(version)
),
"global_contributors_new_count": global_contributors_new_count,
"commit_count": commit_count,
"version_commit_count": version_commit_count,
"top_contributors_release_overall": top_contributors,
@@ -899,11 +403,11 @@ class CreateReportForm(CreateReportFullForm):
"top_libraries_for_version": top_libraries_for_version,
"library_count": libraries.count(),
"library_index_libraries": library_index_library_data,
"added_library_count": added_library_count,
"added_library_count": new_libraries.count(),
"removed_library_count": removed_library_count,
"downloads": downloads,
"contribution_box_graph": self._get_git_graph_data(prior_version, version),
"slack_channels": slack_channels,
"downloads": download,
"contribution_box_graph": git_graph_data,
"slack_channels": get_slack_channels(),
"slack": slack_stats,
}

View File

@@ -0,0 +1,21 @@
# Generated by Django 5.2.7 on 2025-11-11 22:39
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("libraries", "0035_releasereport"),
]
operations = [
migrations.AddField(
model_name="releasereport",
name="locked",
field=models.BooleanField(
default=False,
help_text="Can't be overwritten during release report publish. Blocks task-based publishing.",
),
),
]

View File

@@ -1,3 +1,4 @@
import os
import re
import uuid
from datetime import timedelta
@@ -565,6 +566,10 @@ class ReleaseReport(models.Model):
published = models.BooleanField(default=False)
published_at = models.DateTimeField(blank=True, null=True)
locked = models.BooleanField(
default=False,
help_text="Can't be overwritten during release report publish. Blocks task-based publishing.",
)
def __str__(self):
return f"{self.file.name.replace(self.upload_dir, "")}"
@@ -589,17 +594,69 @@ class ReleaseReport(models.Model):
default_storage.delete(current_name)
self.file.name = final_filename
def save(self, allow_overwrite=False, *args, **kwargs):
super().save(*args, **kwargs)
def get_media_file(self):
return os.sep.join(
[
settings.MEDIA_URL.rstrip("/"),
self.file.name,
]
)
@staticmethod
def latest_published_locked(
report_configuration: ReportConfiguration,
release_report_exclusion=None,
) -> bool:
release_reports_qs = ReleaseReport.objects.filter(
report_configuration__version=report_configuration.version,
published=True,
)
if release_report_exclusion:
release_reports_qs = release_reports_qs.exclude(
pk=release_report_exclusion.id
)
if release_reports_qs:
return release_reports_qs.first().locked
return False
def unpublish_previous_reports(self):
for r in ReleaseReport.objects.filter(
report_configuration__version=self.report_configuration.version,
published=True,
).exclude(pk=self.id):
r.published = False
r.save()
def save(self, allow_published_overwrite=False, *args, **kwargs):
"""
Args:
allow_published_overwrite (bool): If True, allows overwriting of published
reports (locked checks still apply)
*args: Additional positional arguments passed to the superclass save method
**kwargs: Additional keyword arguments passed to the superclass save method
Raises:
ValueError: Raised if there is an existing locked release report for the configuration, preventing publication
of another one without resolving the conflict.
"""
is_being_published = self.published and not self.published_at
if not is_being_published:
super().save(*args, **kwargs)
if is_being_published and self.file:
if ReleaseReport.latest_published_locked(self.report_configuration, self):
msg = (
f"A release report already exists with locked status for "
f"{self.report_configuration.display_name}. Delete or unlock the "
f"most recent report."
)
raise ValueError(msg)
self.unpublish_previous_reports()
new_filename = generate_release_report_filename(
self.report_configuration.get_slug(), self.published
)
self.rename_file_to(new_filename, allow_overwrite)
self.rename_file_to(new_filename, allow_published_overwrite)
self.published_at = timezone.now()
super().save(update_fields=["published_at", "file"])
super().save()
# Signal handler to delete files when ReleaseReport is deleted

View File

@@ -1,14 +1,16 @@
from datetime import date, timedelta
from celery import shared_task, chain
from django.core.mail import EmailMultiAlternatives
from django.core.management import call_command
import structlog
from django.db.models.functions import ExtractWeek, ExtractIsoYear
from config.celery import app
from django.conf import settings
from django.db.models import Q, Count
from django.db.models import Q, Count, Sum, OuterRef
from core.boostrenderer import get_content_from_s3
from core.htmlhelper import get_library_documentation_urls
from libraries.forms import CreateReportForm, CreateReportFullForm
from libraries.github import LibraryUpdater
from libraries.models import (
Library,
@@ -17,6 +19,14 @@ from libraries.models import (
CommitAuthor,
ReleaseReport,
)
from mailing_list.models import EmailData, PostingData, SubscriptionData
from reports.generation import (
generate_algolia_words,
generate_wordcloud,
get_algolia_search_stats,
generate_mailinglist_words,
global_new_contributors,
)
from users.tasks import User
from versions.models import Version
from .constants import (
@@ -243,6 +253,8 @@ def update_issues(clean=False):
@app.task
def generate_release_report(user_id: int, params: dict, base_uri: str = None):
"""Generate a release report asynchronously and save it in RenderedContent."""
from libraries.forms import CreateReportForm
form = CreateReportForm(params)
html = form.cache_html(base_uri=base_uri)
# override the base uri to reference the internal container for local dev
@@ -304,9 +316,9 @@ def generate_release_report_pdf(
release_report.file.save(filename, ContentFile(pdf_bytes), save=True)
if publish:
release_report.published = True
release_report.save(allow_overwrite=True)
logger.info(f"{release_report_id=} updated with PDF {filename=}")
release_report.save(allow_published_overwrite=True)
except ValueError as e:
logger.error(f"Failed to publish release: {e}")
except Exception as e:
logger.error(f"Failed to generate PDF: {e}", exc_info=True)
raise
@@ -315,6 +327,8 @@ def generate_release_report_pdf(
@app.task
def generate_library_report(params):
"""Generate a library report asynchronously and save it in RenderedContent."""
from libraries.forms import CreateReportFullForm
form = CreateReportFullForm(params)
form.cache_html()
@@ -479,3 +493,182 @@ def send_commit_author_email_verify_mail(commit_author_email, url):
msg.attach_alternative(html_content, "text/html")
msg.send()
logger.info(f"Verification email to {commit_author_email} sent")
@shared_task
def count_mailinglist_contributors(prior_version_id: int, version_id: int):
version = Version.objects.get(id=version_id)
prior_version = Version.objects.get(id=prior_version_id)
version_lt = list(
Version.objects.minor_versions()
.filter(version_array__lte=prior_version.cleaned_version_parts_int)
.values_list("id", flat=True)
)
version_lte = version_lt + [version.id]
current = (
EmailData.objects.filter(version__in=version_lte).distinct("author_id").count()
)
prior = (
EmailData.objects.filter(version__in=version_lt).distinct("author_id").count()
)
release = EmailData.objects.filter(version=version).count()
return release, current - prior
@shared_task
def generate_mailinglist_cloud(prior_version_id: int, version_id: int):
prior_version = Version.objects.get(id=prior_version_id)
version = Version.objects.get(id=version_id)
mailinglist_words = generate_mailinglist_words(prior_version, version)
mailinglist_wordcloud_base64, mailinglist_wordcloud_top_words = generate_wordcloud(
mailinglist_words, width=1400, height=700
)
return (
mailinglist_words,
mailinglist_wordcloud_base64,
mailinglist_wordcloud_top_words,
)
@shared_task
def generate_search_cloud(search_version_id: int):
"""
Algolia search word cloud generation
"""
search_version = Version.objects.get(id=search_version_id)
from algoliasearch.analytics.client import AnalyticsClientSync
client = AnalyticsClientSync(**settings.ALGOLIA)
# if the report is based on a live version, look for stats for that
# version, otherwise use the stats for the prior (live) version
search_list_words = generate_algolia_words(client, search_version)
search_wordcloud_base64, search_wordcloud_top_words = generate_wordcloud(
search_list_words, width=800, height=250
)
search_stats = get_algolia_search_stats(client, search_version)
return search_wordcloud_base64, search_wordcloud_top_words, search_stats
@shared_task
def get_mailing_list_stats(prior_version_id: int, version_id: int):
version = Version.objects.get(id=version_id)
prior_version = Version.objects.get(id=prior_version_id)
start_date: date = prior_version.release_date
end_date: date = version.release_date or date.today()
data = (
PostingData.objects.filter(post_time__gt=start_date, post_time__lte=end_date)
.annotate(week=ExtractWeek("post_time"), iso_year=ExtractIsoYear("post_time"))
.values("iso_year", "week")
.annotate(count=Count("id"))
.order_by("iso_year", "week")
)
chart_data = []
for row in data:
week_number = row["week"]
year_number = str(row["iso_year"])[2:] # e.g. 25
x = f"{week_number} ({year_number})" # e.g., "51 (24)", "1 (25)"
y = row["count"]
chart_data.append({"x": x, "y": y})
total_mailinglist_count = EmailData.objects.filter(version=version).aggregate(
total=Sum("count")
)["total"]
return chart_data, total_mailinglist_count
@shared_task
def get_new_subscribers_stats(start_date: date, end_date: date):
data = (
SubscriptionData.objects.filter(
subscription_dt__gte=start_date,
subscription_dt__lte=end_date,
list="boost",
)
.annotate(
week=ExtractWeek("subscription_dt"),
iso_year=ExtractIsoYear("subscription_dt"),
)
.values("iso_year", "week")
.annotate(count=Count("id"))
.order_by("iso_year", "week")
)
# Convert data into a dict for easy lookup
counts_by_week = {(row["iso_year"], row["week"]): row["count"] for row in data}
# Iterate through every ISO week in the date range
current = start_date
seen = set()
chart_data = []
while current <= end_date:
iso_year, iso_week, _ = current.isocalendar()
key = (iso_year, iso_week)
if key not in seen: # skip duplicate weeks in the same loop
seen.add(key)
year_suffix = str(iso_year)[2:]
label = f"{iso_week} ({year_suffix})"
count = counts_by_week.get(key, 0)
chart_data.append({"x": label, "y": count})
current += timedelta(days=7) # hop by weeks
return chart_data
@shared_task
def count_commit_contributors_totals(version_id: int, prior_version_id: int):
"""Get a count of contributors for this release, and a count of
new contributors.
"""
from libraries.forms import CreateReportFullForm
prior_version = Version.objects.get(id=prior_version_id)
version = Version.objects.get(id=version_id)
version_lt = list(
Version.objects.minor_versions()
.filter(version_array__lte=prior_version.cleaned_version_parts_int)
.values_list("id", flat=True)
)
version_lte = version_lt + [version.id]
lt_subquery = LibraryVersion.objects.filter(
version__in=version_lt,
library=OuterRef("id"),
).values("id")
lte_subquery = LibraryVersion.objects.filter(
version__in=version_lte,
library=OuterRef("id"),
).values("id")
qs = CreateReportFullForm.library_queryset.aggregate(
this_release_count=Count(
"library_version__commit__author",
filter=Q(library_version__version=version),
distinct=True,
),
authors_before_release_count=Count(
"library_version__commit__author",
filter=Q(library_version__in=lt_subquery),
distinct=True,
),
authors_through_release_count=Count(
"library_version__commit__author",
filter=Q(library_version__in=lte_subquery),
distinct=True,
),
)
new_count = qs["authors_through_release_count"] - qs["authors_before_release_count"]
this_release_count = qs["this_release_count"]
return this_release_count, new_count
@shared_task
def get_new_contributors_count(version_id: int):
"""Get a count of contributors for this release"""
version = Version.objects.get(id=version_id)
return len(global_new_contributors(version))

View File

@@ -368,6 +368,5 @@ def generate_release_report_filename(version_slug: str, published_format: bool =
filename_data = ["release-report", version_slug]
if not published_format:
filename_data.append(datetime.now(timezone.utc).isoformat())
filename = f"{"-".join(filename_data)}.pdf"
filename = f"{'-'.join(filename_data)}.pdf"
return filename

View File

@@ -3,22 +3,35 @@ import io
import json
import logging
import random
from datetime import datetime, timedelta, date
from dataclasses import dataclass, field
from datetime import timedelta, date
from functools import cached_property
from itertools import chain, groupby
from operator import attrgetter
import psycopg2
from django.conf import settings
from django.contrib.staticfiles import finders
from django.db.models import Count
from django.db.models.functions import ExtractWeek, ExtractIsoYear
from django.db.models import OuterRef, Q, F, Case, When, Value, Sum, Count
from matplotlib import pyplot as plt
from wordcloud import WordCloud, STOPWORDS
from algoliasearch.analytics.client import AnalyticsClientSync
from core.models import SiteSettings
from libraries.constants import RELEASE_REPORT_SEARCH_TOP_COUNTRIES_LIMIT
from libraries.models import WordcloudMergeWord # TODO: move model to this app
from mailing_list.models import PostingData, SubscriptionData
from libraries.models import (
WordcloudMergeWord, # TODO: move model to this app
CommitAuthor,
LibraryVersion,
Issue,
Commit,
Library,
)
from libraries.utils import batched
from mailing_list.models import EmailData
from reports.constants import WORDCLOUD_FONT
from slack.models import Channel, SlackActivityBucket, SlackUser
from versions.exceptions import BoostImportedDataException
from versions.models import Version
logger = logging.getLogger(__name__)
@@ -48,9 +61,13 @@ def generate_algolia_words(
"index": version.stripped_boost_url_slug,
"limit": 100,
}
search_results = client.get_top_searches(**args).to_json()
search_data = json.loads(search_results)
return {r["search"]: r["count"] for r in search_data["searches"] if r["count"] > 1}
try:
search_results = client.get_top_searches(**args).to_json()
search_data = json.loads(search_results)
searches = search_data.get("searches") or []
return {r["search"]: r["count"] for r in searches if r["count"] > 1}
except ValueError:
return {}
def generate_wordcloud(
@@ -147,73 +164,27 @@ def get_mail_content(version: Version, prior_version: Version):
yield content
def get_mailing_list_post_stats(start_date: datetime, end_date: datetime):
data = (
PostingData.objects.filter(post_time__gt=start_date, post_time__lte=end_date)
.annotate(week=ExtractWeek("post_time"), iso_year=ExtractIsoYear("post_time"))
.values("iso_year", "week")
.annotate(count=Count("id"))
.order_by("iso_year", "week")
def get_mailinglist_counts(version: Version):
return (
EmailData.objects.filter(version=version)
.with_total_counts()
.order_by("-total_count")[:10]
)
chart_data = []
for row in data:
week_number = row["week"]
year_number = str(row["iso_year"])[2:] # e.g. 25
x = f"{week_number} ({year_number})" # e.g., "51 (24)", "1 (25)"
y = row["count"]
chart_data.append({"x": x, "y": y})
return chart_data
def get_new_subscribers_stats(start_date: datetime, end_date: datetime):
data = (
SubscriptionData.objects.filter(
subscription_dt__gte=start_date,
subscription_dt__lte=end_date,
list="boost",
)
.annotate(
week=ExtractWeek("subscription_dt"),
iso_year=ExtractIsoYear("subscription_dt"),
)
.values("iso_year", "week")
.annotate(count=Count("id"))
.order_by("iso_year", "week")
)
# Convert data into a dict for easy lookup
counts_by_week = {(row["iso_year"], row["week"]): row["count"] for row in data}
# Iterate through every ISO week in the date range
current = start_date
seen = set()
chart_data = []
while current <= end_date:
iso_year, iso_week, _ = current.isocalendar()
key = (iso_year, iso_week)
if key not in seen: # skip duplicate weeks in the same loop
seen.add(key)
year_suffix = str(iso_year)[2:]
label = f"{iso_week} ({year_suffix})"
count = counts_by_week.get(key, 0)
chart_data.append({"x": label, "y": count})
current += timedelta(days=7) # hop by weeks
return chart_data
def get_algolia_search_stats(client: AnalyticsClientSync, version: Version) -> dict:
default_args = {"index": version.stripped_boost_url_slug}
# search data
search_response = client.get_searches_count(**default_args).to_json()
search_data = json.loads(search_response)
# country data
country_results = client.get_top_countries(**default_args, limit=100).to_json()
country_data = json.loads(country_results)
country_stats = {r["country"]: r["count"] for r in country_data["countries"]}
try:
# country data
country_results = client.get_top_countries(**default_args, limit=100).to_json()
country_data = json.loads(country_results)
countries = country_data.get("countries") or []
country_stats = {r["country"]: r["count"] for r in countries}
except ValueError:
country_stats = {}
return {
"total_searches": search_data.get("count"),
"country_stats": country_stats,
@@ -221,3 +192,524 @@ def get_algolia_search_stats(client: AnalyticsClientSync, version: Version) -> d
:RELEASE_REPORT_SEARCH_TOP_COUNTRIES_LIMIT
],
}
def determine_versions(report_configuration_name: str) -> tuple[bool, Version, Version]:
version = Version.objects.filter(name=report_configuration_name).first()
report_before_release = False if version else True
prior_version = None
if report_before_release:
# if the version is not set then the user has chosen a report configuration
# that's not matching a live version, so we use the most recent version
version = Version.objects.filter(name="master").first()
prior_version = Version.objects.most_recent()
if not prior_version:
prior_version = (
Version.objects.minor_versions()
.filter(version_array__lt=version.cleaned_version_parts_int)
.order_by("-version_array")
.first()
)
return report_before_release, prior_version, version
def get_dependency_data(library_order, version):
try:
dependency_diff_values = version.get_dependency_diffs().values()
except BoostImportedDataException as e:
logger.warning(f"Could not get dependency diffs for version {version}: {e}")
dependency_diff_values = {}
diffs_by_id = {x["library_id"]: x for x in dependency_diff_values}
diffs = []
for lib_id in library_order:
diffs.append(diffs_by_id.get(lib_id, {}))
return diffs
def global_new_contributors(version):
version_lt = list(
Version.objects.minor_versions()
.filter(version_array__lt=version.cleaned_version_parts_int)
.order_by("id")
.values_list("id", flat=True)
)
prior_version_author_ids = (
CommitAuthor.objects.filter(commit__library_version__version__in=version_lt)
.distinct()
.values_list("id", flat=True)
)
version_author_ids = (
CommitAuthor.objects.filter(
commit__library_version__version__in=version_lt + [version.id]
)
.distinct()
.values_list("id", flat=True)
)
return set(version_author_ids) - set(prior_version_author_ids)
def get_library_queryset_by_version(version: Version, annotate_commit_count=False):
from libraries.forms import CreateReportFullForm
qs = CreateReportFullForm.library_queryset.none()
if version:
qs = CreateReportFullForm.library_queryset.filter(
library_version=LibraryVersion.objects.filter(
library=OuterRef("id"), version=version
)[:1],
)
if annotate_commit_count:
qs = qs.annotate(commit_count=Count("library_version__commit"))
return qs
def get_top_libraries_for_version(version):
library_qs = get_library_queryset_by_version(version, annotate_commit_count=True)
return library_qs.order_by("-commit_count")
def get_libraries_by_name(version):
library_qs = get_library_queryset_by_version(version, annotate_commit_count=True)
return library_qs.order_by("name")
def get_libraries_by_quality(version):
# returns "great", "good", and "standard" libraries in that order
library_qs = get_library_queryset_by_version(version)
return list(
chain(
library_qs.filter(graphic__isnull=False),
library_qs.filter(graphic__isnull=True, is_good=True),
library_qs.filter(graphic__isnull=True, is_good=False),
)
)
def get_library_version_counts(library_order, version):
library_qs = get_library_queryset_by_version(version, annotate_commit_count=True)
return sorted(
list(library_qs.values("commit_count", "id")),
key=lambda x: library_order.index(x["id"]),
)
def get_library_full_counts(libraries, library_order):
return sorted(
list(
libraries.annotate(commit_count=Count("library_version__commit")).values(
"commit_count", "id"
)
),
key=lambda x: library_order.index(x["id"]),
)
def get_top_contributors_for_library_version(library_order, version):
top_contributors_release = []
for library_id in library_order:
top_contributors_release.append(
CommitAuthor.objects.filter(
commit__library_version=LibraryVersion.objects.get(
version=version, library_id=library_id
)
)
.annotate(commit_count=Count("commit"))
.order_by("-commit_count")[:10]
)
return top_contributors_release
def count_new_contributors(libraries, library_order, version):
version_lt = list(
Version.objects.minor_versions()
.filter(version_array__lt=version.cleaned_version_parts_int)
.values_list("id", flat=True)
)
version_lte = version_lt + [version.id]
lt_subquery = LibraryVersion.objects.filter(
version__in=version_lt,
library=OuterRef("id"),
).values("id")
lte_subquery = LibraryVersion.objects.filter(
version__in=version_lte,
library=OuterRef("id"),
).values("id")
return sorted(
list(
libraries.annotate(
authors_before_release_count=Count(
"library_version__commit__author",
filter=Q(library_version__in=lt_subquery),
distinct=True,
),
authors_through_release_count=Count(
"library_version__commit__author",
filter=Q(library_version__in=lte_subquery),
distinct=True,
),
)
.annotate(
count=F("authors_through_release_count")
- F("authors_before_release_count")
)
.values("id", "count")
),
key=lambda x: library_order.index(x["id"]),
)
def count_issues(libraries, library_order, version, prior_version):
data = {
x["library_id"]: x
for x in Issue.objects.count_opened_closed_during_release(
version, prior_version
).filter(library_id__in=[x.id for x in libraries])
}
ret = []
for lib_id in library_order:
if lib_id in data:
ret.append(data[lib_id])
else:
ret.append({"opened": 0, "closed": 0, "library_id": lib_id})
return ret
def get_library_versions(library_order, version):
return sorted(
list(
LibraryVersion.objects.filter(version=version, library_id__in=library_order)
),
key=lambda x: library_order.index(x.library_id),
)
def get_top_contributors_for_version(version):
from libraries.forms import CreateReportFullForm
return (
CommitAuthor.objects.filter(commit__library_version__version=version)
.annotate(
commit_count=Count(
"commit",
filter=Q(
commit__library_version__library__in=CreateReportFullForm.library_queryset
),
)
)
.order_by("-commit_count")[:10]
)
def get_git_graph_data(prior_version: Version | None, version: Version):
"""Fetch commit count data for a release and return an instance of Graph.
Returns data in a format to easily create a github style green box commit graph.
"""
if prior_version is None:
return None
@dataclass
class Day:
date: date
count: int
color: str = ""
@dataclass
class Week:
days: list[Day] = field(default_factory=list)
@cached_property
def max(self):
"""The max number of commits this week."""
return max(x.count for x in self.days)
@dataclass
class Graph:
weeks: list[Week] = field(default_factory=list)
colors: list[str] = field(
default_factory=lambda: [
"#E8F5E9",
"#C8E6C9",
"#A5D6A7",
"#81C784",
"#66BB6A",
"#4CAF50",
"#43A047",
"#388E3C",
"#2E7D32",
"#1B5E20",
],
)
@cached_property
def graph_start(self):
return start.strftime("%B '%y")
@cached_property
def graph_end(self):
return end.strftime("%B '%y")
@cached_property
def max(self):
"""The max number of commits in all weeks."""
return max(x.max for x in self.weeks)
def append_day(self, day: Day):
"""Append a day into the last week of self.weeks.
- Automatically create a new week if there are already 7 days in the
last week.
"""
if len(self.weeks) == 0 or len(self.weeks[-1].days) == 7:
self.weeks.append(Week())
self.weeks[-1].days.append(day)
def apply_colors(self):
"""Iterate through each day and apply a color.
- The color is selected based on the number of commits made on
that day, relative to the highest number of commits in all days in
Graph.weeks.days.
"""
if not (high := self.max):
# No commits this release
# TODO: we may want a more elegant solution
# than just not graphing this library
return
for week in self.weeks:
for day in week.days:
decimal = day.count / high
if decimal == 1:
day.color = self.colors[-1]
else:
idx = int(decimal * len(self.colors))
day.color = self.colors[idx]
count_query = (
Commit.objects.filter(library_version__version=version)
.values("committed_at__date")
.annotate(count=Count("id"))
)
counts_by_date = {x["committed_at__date"]: x["count"] for x in count_query}
graph = Graph()
# The start date is the release date of the previous version
# The end date is one day before the release date of the current version
start: date = prior_version.release_date
end: date = (version.release_date or date.today()) - timedelta(days=1)
# if the release started on a Thursday, we want to add Sun -> Wed to the data
# with empty counts, even if they aren't part of the release.
for i in range(start.weekday(), 0, -1):
day = Day(date=start - timedelta(days=i), count=0)
graph.append_day(day)
current_date = start
while current_date <= end:
day = Day(date=current_date, count=counts_by_date.get(current_date, 0))
graph.append_day(day)
current_date = current_date + timedelta(days=1)
graph.apply_colors()
return graph
def get_libraries(library_order: list[int]):
return Library.objects.filter(id__in=library_order).order_by(
Case(*[When(id=pk, then=Value(pos)) for pos, pk in enumerate(library_order)])
)
def get_library_data(library_order: list[int], prior_version_id: int, version_id: int):
prior_version = Version.objects.get(pk=prior_version_id)
version = Version.objects.get(pk=version_id)
libraries = get_libraries(library_order)
library_data = [
{
"library": item[0],
"full_count": item[1],
"version_count": item[2],
"top_contributors_release": item[3],
"new_contributors_count": item[4],
"issues": item[5],
"library_version": item[6],
"deps": item[7],
}
for item in zip(
libraries,
get_library_full_counts(libraries, library_order),
get_library_version_counts(library_order, version),
get_top_contributors_for_library_version(library_order, version),
count_new_contributors(libraries, library_order, version),
count_issues(libraries, library_order, version, prior_version),
get_library_versions(library_order, version),
get_dependency_data(library_order, version),
)
]
return [x for x in library_data if x["version_count"]["commit_count"] > 0]
def get_top_libraries():
from libraries.forms import CreateReportFullForm
return CreateReportFullForm.library_queryset.annotate(
commit_count=Count("library_version__commit")
).order_by("-commit_count")[:5]
def lines_changes_count(version: Version):
from libraries.forms import CreateReportFullForm
lines_added = LibraryVersion.objects.filter(
version=version,
library__in=CreateReportFullForm.library_queryset,
).aggregate(lines=Sum("insertions"))["lines"]
lines_removed = LibraryVersion.objects.filter(
version=version,
library__in=CreateReportFullForm.library_queryset,
).aggregate(lines=Sum("deletions"))["lines"]
return lines_added, lines_removed
def get_commit_counts(version: Version):
from libraries.forms import CreateReportFullForm
commit_count = Commit.objects.filter(
library_version__version__name__lte=version.name,
library_version__library__in=CreateReportFullForm.library_queryset,
).count()
version_commit_count = Commit.objects.filter(
library_version__version=version,
library_version__library__in=CreateReportFullForm.library_queryset,
).count()
return commit_count, version_commit_count
def get_issues_counts(prior_version: Version, version: Version):
from libraries.forms import CreateReportFullForm
opened_issues_count = (
Issue.objects.filter(library__in=CreateReportFullForm.library_queryset)
.opened_during_release(version, prior_version)
.count()
)
closed_issues_count = (
Issue.objects.filter(library__in=CreateReportFullForm.library_queryset)
.closed_during_release(version, prior_version)
.count()
)
return opened_issues_count, closed_issues_count
def get_download_links(version: Version):
return {
k: list(v)
for k, v in groupby(
version.downloads.all().order_by("operating_system"),
key=attrgetter("operating_system"),
)
}
def get_mailinglist_msg_counts(version: Version) -> tuple[int, int]:
total_mailinglist_count = EmailData.objects.filter(version=version).aggregate(
total=Sum("count")
)["total"]
mailinglist_counts = (
EmailData.objects.filter(version=version)
.with_total_counts()
.order_by("-total_count")[:10]
)
return total_mailinglist_count, mailinglist_counts
def get_slack_channels():
return batched(
Channel.objects.filter(name__istartswith="boost").order_by("name"), 10
)
def get_libraries_for_index(library_data, version: Version):
library_index_library_data = []
for library in get_libraries_by_quality(version):
library_index_library_data.append(
(
library,
library in [lib["library"] for lib in library_data],
)
)
return library_index_library_data
def get_slack_stats_for_channels(
prior_version, version, channels: list[Channel] | None = None
):
"""Get slack stats for specific channels, or all channels."""
start = prior_version.release_date
end = date.today()
if version.release_date:
end = version.release_date - timedelta(days=1)
# count of all messages in the date range
q = Q(day__range=[start, end])
if channels:
q &= Q(channel__in=channels)
total = SlackActivityBucket.objects.filter(q).aggregate(total=Sum("count"))["total"]
# message counts per user in the date range
q = Q(slackactivitybucket__day__range=[start, end])
if channels:
q &= Q(slackactivitybucket__channel__in=channels)
per_user = (
SlackUser.objects.annotate(
total=Sum(
"slackactivitybucket__count",
filter=q,
)
)
.filter(total__gt=0)
.order_by("-total")
)
q = Q()
if channels:
q &= Q(channel__in=channels)
distinct_users = (
SlackActivityBucket.objects.filter(q).order_by("user_id").distinct("user_id")
)
new_user_count = (
distinct_users.filter(day__lte=end).count()
- distinct_users.filter(day__lt=start).count()
)
return {
"users": per_user[:10],
"user_count": per_user.count(),
"total": total,
"new_user_count": new_user_count,
}
def get_slack_stats(prior_version: Version, version: Version):
"""Returns all slack related stats.
Only returns channels with activity.
"""
stats = []
for channel in Channel.objects.filter(name__istartswith="boost"):
channel_stat = get_slack_stats_for_channels(
prior_version, version, channels=[channel]
)
channel_stat["channel"] = channel
if channel_stat["user_count"] > 0:
stats.append(channel_stat)
stats.sort(key=lambda x: -(x["total"] or 0)) # Convert None to 0
# we want 2 channels per pdf page, use batched to get groups of 2
return batched(stats, 2)

View File

@@ -7,7 +7,6 @@
{{ block.super }}
<li><a href="{% url 'admin:update_libraries' %}" class="addlink">{% trans "Update Library Data" %}</a></li>
<li><a href="{% url 'admin:update_authors_and_maintainers' %}" class="addlink">{% trans "Update Authors & Maintainers" %}</a></li>
<li><a href="{% url 'admin:release_report' %}" class="addlink">{% trans "Get Release Report" %}</a></li>
<li><a href="{% url 'admin:library_report_full' %}" class="addlink">{% trans "Get Library Report" %}</a></li>
{% endblock %}
</ul>

View File

@@ -0,0 +1,16 @@
{% extends "admin/change_form.html" %}
{% block content %}
<div class="warning">
Publishing:
<ul>
<li>Report PDFs are not archived or versioned. Publishing an older version
is not possible after a newer one has been published.</li>
<li>For any Report Configuration it's only possible to republish the most
recently published report, or a new draft.</li>
<li>To publish a draft, for safety, all existing entries for that Report
Configuration must be manually deleted first.</li>
</ul>
</div>
{{ block.super }}
{% endblock %}

View File

@@ -0,0 +1,17 @@
{% extends "admin/change_list.html" %}
{% load i18n admin_urls %}
{% block object-tools %}
<ul class="object-tools">
{% block object-tools-items %}
{{ block.super }}
<li>
<a
title='{% trans "Run all required tasks after a new release. Expect this to take up to a few hours." %}'
href="{% url 'admin:release_tasks' %}" class="addlink">{% trans "Get Release Report Data" %}
</a>
</li>
<li><a href="{% url 'admin:release_report' %}" class="addlink">{% trans "Create Release Report" %}</a></li>
{% endblock %}
</ul>
{% endblock %}

View File

@@ -9,7 +9,7 @@
document.addEventListener("DOMContentLoaded", function() {
setTimeout(function () {
window.location.reload()
}, 10000);
}, 2000);
})
</script>
{% endblock content %}

View File

@@ -6,12 +6,6 @@
{% block object-tools-items %}
{{ block.super }}
<li><a href="{% url 'admin:import_new_releases' %}" class="addlink">{% trans "Import New Releases" %}</a></li>
<li>
<a
title='{% trans "Run all required tasks after a new release. Expect this to take up to a few hours." %}'
href="{% url 'admin:release_tasks' %}" class="addlink">{% trans "Do it all" %}
</a>
</li>
{% endblock %}
</ul>
{% endblock %}

View File

@@ -51,6 +51,15 @@
<span class="block text-xs">{{ version.github_url|cut:"https://" }}</span>
</a>
</div>
{% if release_report_url %}
<div class="-ml-2 h-14">
<a class="block items-center py-1 px-2 rounded cursor-pointer hover:bg-gray-100 dark:hover:bg-slate text-sky-600 dark:text-sky-300 hover:text-orange dark:hover:text-orange"
href="{{ release_report_url }}">
<span class="dark:text-white text-slate">Release Report</span>
<span class="block text-xs">{{ release_report_file_name }}</span>
</a>
</div>
{% endif %}
</div>
{% if not version.beta %}
{% if deps.added or deps.removed %}

View File

@@ -4,7 +4,7 @@ from django.db.models.query import QuerySet
from django.http import HttpRequest, HttpResponseRedirect
from django.urls import path
from libraries.tasks import release_tasks, import_new_versions_tasks
from libraries.tasks import import_new_versions_tasks
from . import models
@@ -35,26 +35,9 @@ class VersionAdmin(admin.ModelAdmin):
self.admin_site.admin_view(self.import_new_releases),
name="import_new_releases",
),
path(
"release_tasks/",
self.admin_site.admin_view(self.release_tasks),
name="release_tasks",
),
]
return my_urls + urls
def release_tasks(self, request):
release_tasks.delay(
base_uri=f"https://{request.get_host()}",
user_id=request.user.id,
generate_report=True,
)
self.message_user(
request,
"release_tasks has started, you will receive an email when the task finishes.", # noqa: E501
)
return HttpResponseRedirect("../")
def import_new_releases(self, request):
import_new_versions_tasks.delay(user_id=request.user.id)
msg = "New releases are being imported. You will receive an email when the task finishes." # noqa: E501

View File

@@ -17,7 +17,7 @@ from django.views.decorators.csrf import csrf_exempt
from core.models import RenderedContent
from libraries.constants import LATEST_RELEASE_URL_PATH_STR
from libraries.mixins import VersionAlertMixin, BoostVersionMixin
from libraries.models import Commit, CommitAuthor
from libraries.models import Commit, CommitAuthor, ReleaseReport
from libraries.tasks import generate_release_report
from libraries.utils import (
set_selected_boost_version,
@@ -70,6 +70,10 @@ class VersionDetail(BoostVersionMixin, VersionAlertMixin, DetailView):
context["top_contributors_release"] = self.get_top_contributors_release(obj)
context["documentation_url"] = obj.documentation_url
report_file_info = self.get_release_report_info()
if report_file_info:
context["release_report_file_name"] = report_file_info["file_name"]
context["release_report_url"] = report_file_info["file_path"]
try:
context["deps"] = self.get_library_version_dependencies(obj)
except BoostImportedDataException:
@@ -83,6 +87,18 @@ class VersionDetail(BoostVersionMixin, VersionAlertMixin, DetailView):
context["version_alert"] = False
return context
def get_release_report_info(self) -> dict | None:
try:
if report := ReleaseReport.objects.get(
report_configuration__version=self.object.name, published=True
):
return {
"file_name": report.file.name.replace(ReleaseReport.upload_dir, ""),
"file_path": report.get_media_file(),
}
except ReleaseReport.DoesNotExist:
return {}
def get_library_version_dependencies(self, version: Version):
diffs = version.get_dependency_diffs()
added = [len(x["added"]) for x in diffs.values() if x["added"]]
@@ -224,7 +240,7 @@ class ReportPreviewGenerateView(BoostVersionMixin, View):
generate_release_report.delay(
user_id=request.user.id,
params={"version": version.id},
base_uri=f"https://{request.get_host()}",
base_uri=f"{settings.ACCOUNT_DEFAULT_HTTP_PROTOCOL}://{request.get_host()}",
)
messages.success(request, "Report generation queued.")
return redirect("release-report-preview", version_slug=version_name)