mirror of
https://github.com/boostorg/website-v2.git
synced 2026-01-19 04:42:17 +00:00
569 lines
23 KiB
Python
569 lines
23 KiB
Python
from structlog import get_logger
|
|
from datetime import date
|
|
|
|
from django import forms
|
|
from django.template.loader import render_to_string
|
|
from django.db.models import Q, Count, Sum
|
|
from django.forms import Form, ModelChoiceField, ModelForm, BooleanField
|
|
|
|
from core.models import RenderedContent
|
|
from reports.generation import (
|
|
get_git_graph_data,
|
|
get_library_data,
|
|
get_library_full_counts,
|
|
get_libraries_by_name,
|
|
get_top_contributors_for_version,
|
|
get_top_libraries,
|
|
get_top_libraries_for_version,
|
|
lines_changes_count,
|
|
get_commit_counts,
|
|
get_issues_counts,
|
|
get_download_links,
|
|
determine_versions,
|
|
get_libraries,
|
|
get_libraries_for_index,
|
|
get_mailinglist_counts,
|
|
get_slack_channels,
|
|
get_slack_stats,
|
|
)
|
|
from versions.models import Version, ReportConfiguration
|
|
from .models import (
|
|
Commit,
|
|
CommitAuthor,
|
|
Library,
|
|
CommitAuthorEmail,
|
|
)
|
|
from libraries.constants import SUB_LIBRARIES, RELEASE_REPORT_AUTHORS_PER_PAGE_THRESHOLD
|
|
from mailing_list.models import EmailData
|
|
from .tasks import (
|
|
count_mailinglist_contributors,
|
|
generate_search_cloud,
|
|
generate_mailinglist_cloud,
|
|
get_mailing_list_stats,
|
|
get_new_subscribers_stats,
|
|
count_commit_contributors_totals,
|
|
get_new_contributors_count,
|
|
)
|
|
from .utils import conditional_batched
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
class LibraryForm(ModelForm):
|
|
class Meta:
|
|
model = Library
|
|
fields = ["categories"]
|
|
|
|
|
|
class CreateReportFullForm(Form):
|
|
"""Form for creating a report over all releases."""
|
|
|
|
html_template_name = "admin/library_report_full_detail.html"
|
|
|
|
library_queryset = Library.objects.exclude(key__in=SUB_LIBRARIES).order_by("name")
|
|
library_1 = ModelChoiceField(
|
|
queryset=library_queryset,
|
|
required=False,
|
|
help_text="If none are selected, the top 5 will be auto-selected.",
|
|
)
|
|
library_2 = ModelChoiceField(
|
|
queryset=library_queryset,
|
|
required=False,
|
|
)
|
|
library_3 = ModelChoiceField(
|
|
queryset=library_queryset,
|
|
required=False,
|
|
)
|
|
library_4 = ModelChoiceField(
|
|
queryset=library_queryset,
|
|
required=False,
|
|
)
|
|
library_5 = ModelChoiceField(
|
|
queryset=library_queryset,
|
|
required=False,
|
|
)
|
|
library_6 = ModelChoiceField(
|
|
queryset=library_queryset,
|
|
required=False,
|
|
)
|
|
library_7 = ModelChoiceField(
|
|
queryset=library_queryset,
|
|
required=False,
|
|
)
|
|
library_8 = ModelChoiceField(
|
|
queryset=library_queryset,
|
|
required=False,
|
|
)
|
|
no_cache = BooleanField(
|
|
required=False,
|
|
initial=False,
|
|
help_text="Force the page to be regenerated, do not use cache.",
|
|
)
|
|
publish = BooleanField(
|
|
required=False,
|
|
initial=False,
|
|
help_text="Warning: overwrites existing published report, not reversible.",
|
|
)
|
|
|
|
@property
|
|
def cache_key(self):
|
|
chosen_libraries = [
|
|
self.cleaned_data["library_1"],
|
|
self.cleaned_data["library_2"],
|
|
self.cleaned_data["library_3"],
|
|
self.cleaned_data["library_4"],
|
|
self.cleaned_data["library_5"],
|
|
self.cleaned_data["library_6"],
|
|
self.cleaned_data["library_7"],
|
|
self.cleaned_data["library_8"],
|
|
]
|
|
lib_string = ",".join(str(x.id) if x else "" for x in chosen_libraries)
|
|
return f"full-report-{lib_string}"
|
|
|
|
def _get_library_order(self, top_libraries) -> list[int]:
|
|
library_order = [
|
|
x.id
|
|
for x in [
|
|
self.cleaned_data["library_1"],
|
|
self.cleaned_data["library_2"],
|
|
self.cleaned_data["library_3"],
|
|
self.cleaned_data["library_4"],
|
|
self.cleaned_data["library_5"],
|
|
self.cleaned_data["library_6"],
|
|
self.cleaned_data["library_7"],
|
|
self.cleaned_data["library_8"],
|
|
]
|
|
if x is not None
|
|
]
|
|
if not library_order:
|
|
library_order = [x.id for x in top_libraries]
|
|
return library_order
|
|
|
|
def _get_top_contributors_overall(self):
|
|
return (
|
|
CommitAuthor.objects.all()
|
|
.annotate(
|
|
commit_count=Count(
|
|
"commit",
|
|
filter=Q(
|
|
commit__library_version__library__in=self.library_queryset
|
|
),
|
|
)
|
|
)
|
|
.values("name", "avatar_url", "commit_count", "github_profile_url")
|
|
.order_by("-commit_count")[:10]
|
|
)
|
|
|
|
def _get_top_contributors_for_library(self, library_order):
|
|
top_contributors_library = []
|
|
for library_id in library_order:
|
|
top_contributors_library.append(
|
|
CommitAuthor.objects.filter(
|
|
commit__library_version__library_id=library_id
|
|
)
|
|
.annotate(commit_count=Count("commit"))
|
|
.order_by("-commit_count")[:10]
|
|
)
|
|
return top_contributors_library
|
|
|
|
def get_stats(self):
|
|
commit_count = Commit.objects.filter(
|
|
library_version__library__in=self.library_queryset
|
|
).count()
|
|
|
|
top_libraries = get_top_libraries()
|
|
library_order = self._get_library_order(top_libraries)
|
|
libraries = Library.objects.filter(id__in=library_order)
|
|
library_data = [
|
|
{
|
|
"library": x[0],
|
|
"full_count": x[1],
|
|
"top_contributors": x[2],
|
|
}
|
|
for x in zip(
|
|
sorted(list(libraries), key=lambda x: library_order.index(x.id)),
|
|
get_library_full_counts(libraries, library_order),
|
|
self._get_top_contributors_for_library(library_order),
|
|
)
|
|
]
|
|
top_contributors = self._get_top_contributors_overall()
|
|
mailinglist_total = EmailData.objects.all().aggregate(total=Sum("count"))[
|
|
"total"
|
|
]
|
|
first_version = Version.objects.order_by("release_date").first()
|
|
return {
|
|
"mailinglist_counts": EmailData.objects.with_total_counts().order_by(
|
|
"-total_count"
|
|
)[:10],
|
|
"mailinglist_total": mailinglist_total,
|
|
"first_version": first_version,
|
|
"commit_count": commit_count,
|
|
"top_contributors": top_contributors,
|
|
"library_data": library_data,
|
|
"top_libraries": top_libraries,
|
|
"library_count": self.library_queryset.count(),
|
|
}
|
|
|
|
def cache_html(self, base_uri=None):
|
|
"""Render and cache the html for this report."""
|
|
# ensure we have "cleaned_data"
|
|
if not self.is_valid():
|
|
return ""
|
|
try:
|
|
context = self.get_stats()
|
|
if base_uri:
|
|
context["base_uri"] = base_uri
|
|
html = render_to_string(self.html_template_name, context)
|
|
except FileNotFoundError as e:
|
|
html = (
|
|
f"An error occurred generating the report: {e}. To see the image "
|
|
f"which is broken copy the error and run the report again. This "
|
|
f"error isn't shown on subsequent runs."
|
|
)
|
|
self.cache_set(html)
|
|
return html
|
|
|
|
def cache_get(self) -> RenderedContent | None:
|
|
return RenderedContent.objects.filter(cache_key=self.cache_key).first()
|
|
|
|
def cache_clear(self):
|
|
return RenderedContent.objects.filter(cache_key=self.cache_key).delete()
|
|
|
|
def cache_set(self, content_html):
|
|
"""Cache the html for this report."""
|
|
return RenderedContent.objects.update_or_create(
|
|
cache_key=self.cache_key,
|
|
defaults={
|
|
"content_html": content_html,
|
|
"content_type": "text/html",
|
|
},
|
|
)
|
|
|
|
|
|
class CreateReportForm(CreateReportFullForm):
|
|
"""Form for creating a report for a specific release."""
|
|
|
|
html_template_name = "admin/release_report_detail.html"
|
|
# queryset will be set in __init__
|
|
report_configuration = ModelChoiceField(queryset=ReportConfiguration.objects.none())
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
# we want to allow master, develop, the latest release, the latest beta, along
|
|
# with any report configuration matching no Version, exclude all others.
|
|
exclusion_versions = []
|
|
if betas := Version.objects.filter(beta=True).order_by("-release_date")[1:]:
|
|
exclusion_versions += betas
|
|
if older_releases := Version.objects.filter(
|
|
active=True, full_release=True
|
|
).order_by("-release_date")[1:]:
|
|
exclusion_versions += older_releases
|
|
qs = ReportConfiguration.objects.exclude(
|
|
version__in=[v.name for v in exclusion_versions]
|
|
).order_by("-version")
|
|
|
|
self.fields["report_configuration"].queryset = qs
|
|
self.fields["library_1"].help_text = (
|
|
"If none are selected, all libraries will be selected."
|
|
)
|
|
|
|
@property
|
|
def cache_key(self):
|
|
chosen_libraries = [
|
|
self.cleaned_data["library_1"],
|
|
self.cleaned_data["library_2"],
|
|
self.cleaned_data["library_3"],
|
|
self.cleaned_data["library_4"],
|
|
self.cleaned_data["library_5"],
|
|
self.cleaned_data["library_6"],
|
|
self.cleaned_data["library_7"],
|
|
self.cleaned_data["library_8"],
|
|
]
|
|
lib_string = ",".join(str(x.id) if x else "" for x in chosen_libraries)
|
|
report_configuration = self.cleaned_data["report_configuration"]
|
|
return f"release-report-{lib_string}-{report_configuration.version}"
|
|
|
|
def get_stats(self):
|
|
report_configuration = self.cleaned_data["report_configuration"]
|
|
committee_members = report_configuration.financial_committee_members.all()
|
|
# NOTE TO FUTURE DEVS: remember to account for the fact that a report
|
|
# configuration may not match with a real version in frequent cases where
|
|
# reports are generated before the release version has been created.
|
|
(report_before_release, prior_version, version) = determine_versions(
|
|
report_configuration.version
|
|
)
|
|
|
|
# trigger tasks first to run in parallel
|
|
mailing_list_contributors_task = count_mailinglist_contributors.delay(
|
|
prior_version.pk, version.pk
|
|
)
|
|
mailing_list_stats_task = get_mailing_list_stats.delay(
|
|
prior_version.pk, version.pk
|
|
)
|
|
commit_contributors_task = count_commit_contributors_totals.delay(
|
|
version.pk, prior_version.pk
|
|
)
|
|
new_subscribers_stats_task = get_new_subscribers_stats.delay(
|
|
prior_version.release_date, version.release_date or date.today()
|
|
)
|
|
mailinglist_wordcloud_task = generate_mailinglist_cloud.delay(
|
|
prior_version.pk, version.pk
|
|
)
|
|
# if the report is based on a live version, look for stats for that
|
|
# version, otherwise use the stats for the prior (live) version
|
|
search_wordcloud_task = generate_search_cloud.delay(
|
|
prior_version.pk if report_before_release else version.pk
|
|
)
|
|
new_contributors_count_task = get_new_contributors_count.delay(version.pk)
|
|
# end of task triggering
|
|
|
|
commit_count, version_commit_count = get_commit_counts(version)
|
|
top_libraries_for_version = get_top_libraries_for_version(version)
|
|
top_libraries_by_name = get_libraries_by_name(version)
|
|
library_order = self._get_library_order(top_libraries_by_name)
|
|
# TODO: we may in future need to find a way to show the removed libraries, for
|
|
# now it's not needed. In that case the distinction between running this on a
|
|
# ReportConfiguration with a real 'version' entry vs one that instead uses 'master'
|
|
# will need to be considered
|
|
libraries = get_libraries(library_order)
|
|
new_libraries = libraries.exclude(
|
|
library_version__version__release_date__lte=prior_version.release_date
|
|
).prefetch_related("authors")
|
|
top_contributors = get_top_contributors_for_version(version)
|
|
mailinglist_counts = get_mailinglist_counts(version)
|
|
lines_added, lines_removed = lines_changes_count(version)
|
|
opened_issues_count, closed_issues_count = get_issues_counts(
|
|
prior_version, version
|
|
)
|
|
# TODO: connected to above todo, add removed_libraries.count()
|
|
removed_library_count = 0
|
|
|
|
library_data = get_library_data(library_order, prior_version.pk, version.pk)
|
|
slack_stats = get_slack_stats(prior_version, version)
|
|
|
|
library_index_library_data = get_libraries_for_index(
|
|
library_data, version, prior_version
|
|
)
|
|
batched_library_data = conditional_batched(
|
|
library_data,
|
|
2,
|
|
lambda x: x.get("top_contributors_release").count()
|
|
<= RELEASE_REPORT_AUTHORS_PER_PAGE_THRESHOLD,
|
|
)
|
|
git_graph_data = get_git_graph_data(prior_version, version)
|
|
download = get_download_links(version)
|
|
### completed task handling ###
|
|
(mailinglist_contributor_release_count, mailinglist_contributor_new_count) = (
|
|
mailing_list_contributors_task.get()
|
|
)
|
|
(mailinglist_post_stats, total_mailinglist_count) = (
|
|
mailing_list_stats_task.get()
|
|
)
|
|
(commit_contributors_release_count, commit_contributors_new_count) = (
|
|
commit_contributors_task.get()
|
|
)
|
|
(
|
|
mailinglist_words,
|
|
mailinglist_wordcloud_base64,
|
|
mailinglist_wordcloud_top_words,
|
|
) = mailinglist_wordcloud_task.get()
|
|
(search_wordcloud_base64, search_wordcloud_top_words, search_stats) = (
|
|
search_wordcloud_task.get()
|
|
)
|
|
global_contributors_new_count = new_contributors_count_task.get()
|
|
|
|
return {
|
|
"committee_members": committee_members,
|
|
"lines_added": lines_added,
|
|
"lines_removed": lines_removed,
|
|
"version": version,
|
|
"report_configuration": report_configuration,
|
|
"prior_version": prior_version,
|
|
"opened_issues_count": opened_issues_count,
|
|
"closed_issues_count": closed_issues_count,
|
|
"mailinglist_wordcloud_base64": mailinglist_wordcloud_base64,
|
|
"mailinglist_wordcloud_frequencies": mailinglist_wordcloud_top_words,
|
|
"mailinglist_counts": mailinglist_counts,
|
|
"mailinglist_total": total_mailinglist_count or 0,
|
|
"mailinglist_contributor_release_count": mailinglist_contributor_release_count, # noqa: E501
|
|
"mailinglist_contributor_new_count": mailinglist_contributor_new_count,
|
|
"mailinglist_post_stats": mailinglist_post_stats,
|
|
"mailinglist_new_subscribers_stats": new_subscribers_stats_task.get(),
|
|
"mailinglist_charts_start_year": prior_version.release_date.year,
|
|
"search_wordcloud_base64": search_wordcloud_base64,
|
|
"search_wordcloud_frequencies": search_wordcloud_top_words,
|
|
"search_stats": search_stats,
|
|
"commit_contributors_release_count": commit_contributors_release_count,
|
|
"commit_contributors_new_count": commit_contributors_new_count,
|
|
"global_contributors_new_count": global_contributors_new_count,
|
|
"commit_count": commit_count,
|
|
"version_commit_count": version_commit_count,
|
|
"top_contributors_release_overall": top_contributors,
|
|
"library_data": library_data,
|
|
"new_libraries": new_libraries,
|
|
"batched_library_data": batched_library_data,
|
|
"top_libraries_for_version": top_libraries_for_version,
|
|
"library_count": libraries.count(),
|
|
"library_index_libraries": library_index_library_data,
|
|
"added_library_count": new_libraries.count(),
|
|
"removed_library_count": removed_library_count,
|
|
"downloads": download,
|
|
"contribution_box_graph": git_graph_data,
|
|
"slack_channels": get_slack_channels(),
|
|
"slack": slack_stats,
|
|
}
|
|
|
|
def generate_context(
|
|
self, report_configuration: ReportConfiguration, stats_results: dict
|
|
):
|
|
committee_members = report_configuration.financial_committee_members.all()
|
|
|
|
# NOTE TO FUTURE DEVS: remember to account for the fact that a report
|
|
# configuration may not match with a real version in frequent cases where
|
|
# reports are generated before the release version has been created.
|
|
(report_before_release, prior_version, version) = determine_versions(
|
|
report_configuration.version
|
|
)
|
|
|
|
# Unpack stats_results in the same order as tasks were defined in the workflow
|
|
(
|
|
(mailinglist_contributor_release_count, mailinglist_contributor_new_count),
|
|
(mailinglist_post_stats, total_mailinglist_count),
|
|
(commit_contributors_release_count, commit_contributors_new_count),
|
|
mailinglist_new_subscribers_stats,
|
|
(
|
|
mailinglist_words,
|
|
mailinglist_wordcloud_base64,
|
|
mailinglist_wordcloud_top_words,
|
|
),
|
|
(search_wordcloud_base64, search_wordcloud_top_words, search_stats),
|
|
global_contributors_new_count,
|
|
) = stats_results
|
|
|
|
# Compute the synchronous stats that don't require async tasks
|
|
commit_count, version_commit_count = get_commit_counts(version)
|
|
top_libraries_for_version = get_top_libraries_for_version(version)
|
|
top_libraries_by_name = get_libraries_by_name(version)
|
|
library_order = self._get_library_order(top_libraries_by_name)
|
|
# TODO: we may in future need to find a way to show the removed libraries, for
|
|
# now it's not needed. In that case the distinction between running this on a
|
|
# ReportConfiguration with a real 'version' entry vs one that instead uses 'master'
|
|
# will need to be considered
|
|
libraries = get_libraries(library_order)
|
|
new_libraries = libraries.exclude(
|
|
library_version__version__release_date__lte=prior_version.release_date
|
|
).prefetch_related("authors")
|
|
top_contributors = get_top_contributors_for_version(version)
|
|
mailinglist_counts = get_mailinglist_counts(version)
|
|
lines_added, lines_removed = lines_changes_count(version)
|
|
opened_issues_count, closed_issues_count = get_issues_counts(
|
|
prior_version, version
|
|
)
|
|
# TODO: connected to above todo, add removed_libraries.count()
|
|
removed_library_count = 0
|
|
|
|
library_data = get_library_data(library_order, prior_version.pk, version.pk)
|
|
slack_stats = get_slack_stats(prior_version, version)
|
|
|
|
library_index_library_data = get_libraries_for_index(
|
|
library_data, version, prior_version
|
|
)
|
|
batched_library_data = conditional_batched(
|
|
library_data,
|
|
2,
|
|
lambda x: x.get("top_contributors_release").count()
|
|
<= RELEASE_REPORT_AUTHORS_PER_PAGE_THRESHOLD,
|
|
)
|
|
git_graph_data = get_git_graph_data(prior_version, version)
|
|
download = get_download_links(version)
|
|
|
|
return {
|
|
"committee_members": committee_members,
|
|
"lines_added": lines_added,
|
|
"lines_removed": lines_removed,
|
|
"version": version,
|
|
"report_configuration": report_configuration,
|
|
"prior_version": prior_version,
|
|
"opened_issues_count": opened_issues_count,
|
|
"closed_issues_count": closed_issues_count,
|
|
"mailinglist_wordcloud_base64": mailinglist_wordcloud_base64,
|
|
"mailinglist_wordcloud_frequencies": mailinglist_wordcloud_top_words,
|
|
"mailinglist_counts": mailinglist_counts,
|
|
"mailinglist_total": total_mailinglist_count or 0,
|
|
"mailinglist_contributor_release_count": mailinglist_contributor_release_count, # noqa: E501
|
|
"mailinglist_contributor_new_count": mailinglist_contributor_new_count,
|
|
"mailinglist_post_stats": mailinglist_post_stats,
|
|
"mailinglist_new_subscribers_stats": mailinglist_new_subscribers_stats,
|
|
"mailinglist_charts_start_year": prior_version.release_date.year,
|
|
"search_wordcloud_base64": search_wordcloud_base64,
|
|
"search_wordcloud_frequencies": search_wordcloud_top_words,
|
|
"search_stats": search_stats,
|
|
"commit_contributors_release_count": commit_contributors_release_count,
|
|
"commit_contributors_new_count": commit_contributors_new_count,
|
|
"global_contributors_new_count": global_contributors_new_count,
|
|
"commit_count": commit_count,
|
|
"version_commit_count": version_commit_count,
|
|
"top_contributors_release_overall": top_contributors,
|
|
"library_data": library_data,
|
|
"new_libraries": new_libraries,
|
|
"batched_library_data": batched_library_data,
|
|
"top_libraries_for_version": top_libraries_for_version,
|
|
"library_count": libraries.count(),
|
|
"library_index_libraries": library_index_library_data,
|
|
"added_library_count": new_libraries.count(),
|
|
"removed_library_count": removed_library_count,
|
|
"downloads": download,
|
|
"contribution_box_graph": git_graph_data,
|
|
"slack_channels": get_slack_channels(),
|
|
"slack": slack_stats,
|
|
}
|
|
|
|
def render_with_stats(self, stats_results, base_uri=None):
|
|
"""Render HTML with pre-computed stats results"""
|
|
context = self.generate_context(
|
|
self.cleaned_data["report_configuration"], stats_results
|
|
)
|
|
if base_uri:
|
|
context["base_uri"] = base_uri
|
|
html = render_to_string(self.html_template_name, context)
|
|
self.cache_set(html)
|
|
return html
|
|
|
|
|
|
class CommitAuthorEmailForm(Form):
|
|
"""
|
|
This model is used to allow developers to claim a commit author
|
|
by email address.
|
|
"""
|
|
|
|
email = forms.EmailField()
|
|
|
|
class Meta:
|
|
fields = ["email"]
|
|
|
|
def clean_email(self):
|
|
"""Emails should have been created by the commit import process, so we need to
|
|
ensure the email exists."""
|
|
email = self.cleaned_data.get("email")
|
|
commit_author_email = CommitAuthorEmail.objects.filter(
|
|
email__iexact=email
|
|
).first()
|
|
msg = None
|
|
|
|
if not commit_author_email:
|
|
msg = "Email address is not associated with any commits."
|
|
elif commit_author_email.author.user is not None:
|
|
msg = (
|
|
"This email address is already associated with a user. Report an "
|
|
"issue if this is incorrect."
|
|
)
|
|
elif commit_author_email.claim_verified:
|
|
msg = (
|
|
"This email address has already been claimed and verified. Report an"
|
|
" issue if this is incorrect."
|
|
)
|
|
if msg:
|
|
raise forms.ValidationError(msg)
|
|
|
|
return email
|