import io import base64 from functools import cached_property from itertools import groupby from operator import attrgetter from dataclasses import dataclass, field from datetime import date, timedelta import psycopg2 from wordcloud import WordCloud, STOPWORDS from matplotlib import pyplot as plt from django.template.loader import render_to_string from django.db.models import F, Q, Count, OuterRef, Sum from django.forms import Form, ModelChoiceField, ModelForm, BooleanField from django.conf import settings from core.models import RenderedContent, SiteSettings from versions.models import Version from .models import Commit, CommitAuthor, Issue, Library, LibraryVersion from libraries.constants import SUB_LIBRARIES from mailing_list.models import EmailData class LibraryForm(ModelForm): class Meta: model = Library fields = ["categories"] class VersionSelectionForm(Form): queryset = Version.objects.active().defer("data") queryset = queryset.exclude(name__in=["develop", "master", "head"]) version = ModelChoiceField( queryset=queryset, label="Select a version", empty_label="Choose a version...", ) class CreateReportFullForm(Form): """Form for creating a report over all releases.""" html_template_name = "admin/library_report_full_detail.html" library_queryset = Library.objects.exclude(key__in=SUB_LIBRARIES).order_by("name") library_1 = ModelChoiceField( queryset=library_queryset, required=False, help_text="If none are selected, the top 5 will be auto-selected.", ) library_2 = ModelChoiceField( queryset=library_queryset, required=False, ) library_3 = ModelChoiceField( queryset=library_queryset, required=False, ) library_4 = ModelChoiceField( queryset=library_queryset, required=False, ) library_5 = ModelChoiceField( queryset=library_queryset, required=False, ) library_6 = ModelChoiceField( queryset=library_queryset, required=False, ) library_7 = ModelChoiceField( queryset=library_queryset, required=False, ) library_8 = ModelChoiceField( queryset=library_queryset, required=False, ) no_cache = BooleanField( required=False, initial=False, help_text="Force the page to be regenerated, do not use cache.", ) @property def cache_key(self): chosen_libraries = [ self.cleaned_data["library_1"], self.cleaned_data["library_2"], self.cleaned_data["library_3"], self.cleaned_data["library_4"], self.cleaned_data["library_5"], self.cleaned_data["library_6"], self.cleaned_data["library_7"], self.cleaned_data["library_8"], ] lib_string = ",".join(str(x.id) if x else "" for x in chosen_libraries) return f"full-report-{lib_string}" def _get_top_libraries(self): return self.library_queryset.annotate( commit_count=Count("library_version__commit") ).order_by("-commit_count")[:5] def _get_library_order(self, top_libraries): library_order = [ x.id for x in [ self.cleaned_data["library_1"], self.cleaned_data["library_2"], self.cleaned_data["library_3"], self.cleaned_data["library_4"], self.cleaned_data["library_5"], self.cleaned_data["library_6"], self.cleaned_data["library_7"], self.cleaned_data["library_8"], ] if x is not None ] if not library_order: library_order = [x.id for x in top_libraries] return library_order def _get_library_full_counts(self, libraries, library_order): return sorted( list( libraries.annotate( commit_count=Count("library_version__commit") ).values("commit_count", "id") ), key=lambda x: library_order.index(x["id"]), ) def _get_top_contributors_overall(self): return ( CommitAuthor.objects.all() .annotate( commit_count=Count( "commit", filter=Q( commit__library_version__library__in=self.library_queryset ), ) ) .values("name", "avatar_url", "commit_count", "github_profile_url") .order_by("-commit_count")[:10] ) def _get_top_contributors_for_library(self, library_order): top_contributors_library = [] for library_id in library_order: top_contributors_library.append( CommitAuthor.objects.filter( commit__library_version__library_id=library_id ) .annotate(commit_count=Count("commit")) .values( "name", "avatar_url", "github_profile_url", "commit_count", "commit__library_version__library_id", ) .order_by("-commit_count")[:10] ) return top_contributors_library def get_stats(self): commit_count = Commit.objects.filter( library_version__library__in=self.library_queryset ).count() top_libraries = self._get_top_libraries() library_order = self._get_library_order(top_libraries) libraries = Library.objects.filter(id__in=library_order) library_data = [ { "library": x[0], "full_count": x[1], "top_contributors": x[2], } for x in zip( sorted(list(libraries), key=lambda x: library_order.index(x.id)), self._get_library_full_counts(libraries, library_order), self._get_top_contributors_for_library(library_order), ) ] top_contributors = self._get_top_contributors_overall() mailinglist_total = EmailData.objects.all().aggregate(total=Sum("count"))[ "total" ] first_version = Version.objects.order_by("release_date").first() return { "mailinglist_counts": EmailData.objects.with_total_counts().order_by( "-total_count" )[:10], "mailinglist_total": mailinglist_total, "first_version": first_version, "commit_count": commit_count, "top_contributors": top_contributors, "library_data": library_data, "top_libraries": top_libraries, "library_count": self.library_queryset.count(), } def cache_html(self): """Render and cache the html for this report.""" # ensure we have "cleaned_data" if not self.is_valid(): return "" html = render_to_string(self.html_template_name, self.get_stats()) self.cache_set(html) return html def cache_get(self) -> RenderedContent | None: return RenderedContent.objects.filter(cache_key=self.cache_key).first() def cache_clear(self): return RenderedContent.objects.filter(cache_key=self.cache_key).delete() def cache_set(self, content_html): """Cache the html for this report.""" return RenderedContent.objects.update_or_create( cache_key=self.cache_key, defaults={ "content_html": content_html, "content_type": "text/html", }, ) class CreateReportForm(CreateReportFullForm): """Form for creating a report for a specific release.""" html_template_name = "admin/release_report_detail.html" version = ModelChoiceField( queryset=Version.objects.minor_versions().order_by("-version_array") ) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.fields[ "library_1" ].help_text = "If none are selected, all libraries will be selected." @property def cache_key(self): chosen_libraries = [ self.cleaned_data["library_1"], self.cleaned_data["library_2"], self.cleaned_data["library_3"], self.cleaned_data["library_4"], self.cleaned_data["library_5"], self.cleaned_data["library_6"], self.cleaned_data["library_7"], self.cleaned_data["library_8"], ] lib_string = ",".join(str(x.id) if x else "" for x in chosen_libraries) version = self.cleaned_data["version"] return f"release-report-{lib_string}-{version.name}" def _get_top_contributors_for_version(self): return ( CommitAuthor.objects.filter( commit__library_version__version=self.cleaned_data["version"] ) .annotate( commit_count=Count( "commit", filter=Q( commit__library_version__library__in=self.library_queryset ), ) ) .values("name", "avatar_url", "commit_count", "github_profile_url") .order_by("-commit_count")[:10] ) def _get_top_libraries_for_version(self): return ( self.library_queryset.filter( library_version=LibraryVersion.objects.filter( library=OuterRef("id"), version=self.cleaned_data["version"] )[:1], ) .annotate(commit_count=Count("library_version__commit")) .order_by("-commit_count") ) def _get_library_version_counts(self, libraries, library_order): return sorted( list( libraries.filter( library_version=LibraryVersion.objects.filter( library=OuterRef("id"), version=self.cleaned_data["version"] )[:1] ) .annotate(commit_count=Count("library_version__commit")) .values("commit_count", "id") ), key=lambda x: library_order.index(x["id"]), ) def _count_new_contributors(self, libraries, library_order): version = self.cleaned_data["version"] version_lt = list( Version.objects.minor_versions() .filter(version_array__lt=version.cleaned_version_parts_int) .values_list("id", flat=True) ) version_lte = version_lt + [version.id] lt_subquery = LibraryVersion.objects.filter( version__in=version_lt, library=OuterRef("id"), ).values("id") lte_subquery = LibraryVersion.objects.filter( version__in=version_lte, library=OuterRef("id"), ).values("id") return sorted( list( libraries.annotate( authors_before_release_count=Count( "library_version__commit__author", filter=Q(library_version__in=lt_subquery), distinct=True, ), authors_through_release_count=Count( "library_version__commit__author", filter=Q(library_version__in=lte_subquery), distinct=True, ), ) .annotate( count=F("authors_through_release_count") - F("authors_before_release_count") ) .values("id", "count") ), key=lambda x: library_order.index(x["id"]), ) def _count_issues(self, libraries, library_order, version): data = { x["library_id"]: x for x in Issue.objects.count_opened_closed_during_release(version).filter( library_id__in=[x.id for x in libraries] ) } ret = [] for lib_id in library_order: if lib_id in data: ret.append(data[lib_id]) else: ret.append({"opened": 0, "closed": 0, "library_id": lib_id}) return ret def _count_commit_contributors_totals(self, version): """Get a count of contributors for this release, and a count of new contributors. """ version_lt = list( Version.objects.minor_versions() .filter(version_array__lt=version.cleaned_version_parts_int) .values_list("id", flat=True) ) version_lte = version_lt + [version.id] lt_subquery = LibraryVersion.objects.filter( version__in=version_lt, library=OuterRef("id"), ).values("id") lte_subquery = LibraryVersion.objects.filter( version__in=version_lte, library=OuterRef("id"), ).values("id") qs = self.library_queryset.aggregate( this_release_count=Count( "library_version__commit__author", filter=Q(library_version__version=version), distinct=True, ), authors_before_release_count=Count( "library_version__commit__author", filter=Q(library_version__in=lt_subquery), distinct=True, ), authors_through_release_count=Count( "library_version__commit__author", filter=Q(library_version__in=lte_subquery), distinct=True, ), ) new_count = ( qs["authors_through_release_count"] - qs["authors_before_release_count"] ) this_release_count = qs["this_release_count"] return this_release_count, new_count def _get_top_contributors_for_library_version(self, library_order): top_contributors_release = [] for library_id in library_order: top_contributors_release.append( CommitAuthor.objects.filter( commit__library_version=LibraryVersion.objects.get( version=self.cleaned_data["version"], library_id=library_id ) ) .annotate(commit_count=Count("commit")) .values( "name", "avatar_url", "github_profile_url", "commit_count", "commit__library_version__library_id", ) .order_by("-commit_count")[:10] ) return top_contributors_release def _get_mail_content(self, version): prior_version = ( Version.objects.minor_versions() .filter(version_array__lt=version.cleaned_version_parts_int) .order_by("-release_date") .first() ) if not prior_version or not settings.HYPERKITTY_DATABASE_NAME: return [] conn = psycopg2.connect(settings.HYPERKITTY_DATABASE_URL) with conn.cursor(name="fetch-mail-content") as cursor: cursor.execute( """ SELECT content FROM hyperkitty_email WHERE date >= %(start)s AND date < %(end)s; """, {"start": prior_version.release_date, "end": version.release_date}, ) for [content] in cursor: yield content def _generate_hyperkitty_word_cloud(self, version): """Generates a wordcloud png and returns it as a base64 string.""" wc = WordCloud( width=1400, height=700, stopwords=STOPWORDS | SiteSettings.load().wordcloud_ignore_set, font_path=settings.BASE_DIR / "static" / "font" / "notosans_mono.woff", ) image_bytes = io.BytesIO() frequencies = {} for content in self._get_mail_content(version): for key, val in wc.process_text(content).items(): if key not in frequencies: frequencies[key] = 0 frequencies[key] += val if not frequencies: return wc.generate_from_frequencies(frequencies) plt.figure(figsize=(14, 7)) plt.imshow(wc, interpolation="bilinear") plt.axis("off") image_bytes = io.BytesIO() plt.savefig( image_bytes, format="png", dpi=100, bbox_inches="tight", pad_inches=0, ) image_bytes.seek(0) return base64.b64encode(image_bytes.read()).decode() def _count_mailinglist_contributors(self, version): version_lt = list( Version.objects.minor_versions() .filter(version_array__lt=version.cleaned_version_parts_int) .values_list("id", flat=True) ) version_lte = version_lt + [version.id] current = ( EmailData.objects.filter(version__in=version_lte) .distinct("author_id") .count() ) prior = ( EmailData.objects.filter(version__in=version_lt) .distinct("author_id") .count() ) release = EmailData.objects.filter(version=version).count() return release, current - prior def _get_library_versions(self, library_order, version): return sorted( list( LibraryVersion.objects.filter( version=version, library_id__in=library_order ) ), key=lambda x: library_order.index(x.library_id), ) def _get_git_graph_data(self, prior_version: Version | None, version: Version): """Fetch commit count data for a release and return an instance of Graph. Returns data in a format to easily create a github style green box commit graph. """ if prior_version is None: return None if prior_version.release_date is None or version.release_date is None: return None @dataclass class Day: date: date count: int color: str = "" @dataclass class Week: days: list[Day] = field(default_factory=list) @cached_property def max(self): """The max number of commits this week.""" return max(x.count for x in self.days) @dataclass class Graph: weeks: list[Week] = field(default_factory=list) colors: list[str] = field( default_factory=lambda: [ "#E8F5E9", "#C8E6C9", "#A5D6A7", "#81C784", "#66BB6A", "#4CAF50", "#43A047", "#388E3C", "#2E7D32", "#1B5E20", ], ) @cached_property def max(self): """The max number of commits in all weeks.""" return max(x.max for x in self.weeks) def append_day(self, day: Day): """Append a day into the last week of self.weeks. - Automatically create a new week if there are already 7 days in the last week. """ if len(self.weeks) == 0 or len(self.weeks[-1].days) == 7: self.weeks.append(Week()) self.weeks[-1].days.append(day) def apply_colors(self): """Iterate through each day and apply a color. - The color is selected based on the number of commits made on that day, relative to the highest number of commits in all days in Graph.weeks.days. """ high = self.max for week in self.weeks: for day in week.days: decimal = day.count / high if decimal == 1: day.color = self.colors[-1] else: idx = int(decimal * len(self.colors)) day.color = self.colors[idx] count_query = ( Commit.objects.filter(library_version__version=version) .values("committed_at__date") .annotate(count=Count("id")) ) counts_by_date = {x["committed_at__date"]: x["count"] for x in count_query} graph = Graph() # The start date is the release date of the previous version # The end date is one day before the release date of the current version start: date = prior_version.release_date end: date = version.release_date - timedelta(days=1) # if the release started on a Thursday, we want to add Sun -> Wed to the data # with empty counts, even if they aren't part of the release. for i in range(start.weekday(), 0, -1): day = Day(date=start - timedelta(days=i), count=0) graph.append_day(day) current_date = start while current_date <= end: day = Day(date=current_date, count=counts_by_date.get(current_date, 0)) graph.append_day(day) current_date = current_date + timedelta(days=1) graph.apply_colors() return graph def get_stats(self): version = self.cleaned_data["version"] downloads = { k: list(v) for k, v in groupby( version.downloads.all().order_by("operating_system"), key=attrgetter("operating_system"), ) } prior_version = ( Version.objects.minor_versions() .filter(version_array__lt=version.cleaned_version_parts_int) .order_by("-version_array") .first() ) commit_count = Commit.objects.filter( library_version__version__name__lte=version.name, library_version__library__in=self.library_queryset, ).count() version_commit_count = Commit.objects.filter( library_version__version=version, library_version__library__in=self.library_queryset, ).count() top_libraries_for_version = self._get_top_libraries_for_version() library_order = self._get_library_order(top_libraries_for_version) libraries = Library.objects.filter(id__in=library_order) library_names = ( LibraryVersion.objects.filter( version=version, library__in=self.library_queryset, ) .annotate(name=F("library__name")) .order_by("name") .values_list("name", flat=True) ) library_data = [ { "library": a, "full_count": b, "version_count": c, "top_contributors_release": d, "new_contributors_count": e, "issues": f, "library_version": g, } for a, b, c, d, e, f, g in zip( sorted(list(libraries), key=lambda x: library_order.index(x.id)), self._get_library_full_counts(libraries, library_order), self._get_library_version_counts(libraries, library_order), self._get_top_contributors_for_library_version(library_order), self._count_new_contributors(libraries, library_order), self._count_issues(libraries, library_order, version), self._get_library_versions(library_order, version), ) ] library_data = [ x for x in library_data if x["version_count"]["commit_count"] > 0 ] top_contributors = self._get_top_contributors_for_version() # total messages sent during this release (version) total_mailinglist_count = EmailData.objects.filter(version=version).aggregate( total=Sum("count") )["total"] mailinglist_counts = ( EmailData.objects.filter(version=version) .with_total_counts() .order_by("-total_count")[:10] ) ( mailinglist_contributor_release_count, mailinglist_contributor_new_count, ) = self._count_mailinglist_contributors(version) ( commit_contributors_release_count, commit_contributors_new_count, ) = self._count_commit_contributors_totals(version) library_count = LibraryVersion.objects.filter( version=version, library__in=self.library_queryset, ).count() if prior_version: library_count_prior = LibraryVersion.objects.filter( version=prior_version, library__in=self.library_queryset, ).count() else: library_count_prior = 0 added_library_count = max(0, library_count - library_count_prior) removed_library_count = max(0, library_count_prior - library_count) lines_added = LibraryVersion.objects.filter( version=version, library__in=self.library_queryset, ).aggregate(lines=Sum("insertions"))["lines"] lines_removed = LibraryVersion.objects.filter( version=version, library__in=self.library_queryset, ).aggregate(lines=Sum("deletions"))["lines"] return { "lines_added": lines_added, "lines_removed": lines_removed, "wordcloud_base64": self._generate_hyperkitty_word_cloud(version), "version": version, "prior_version": prior_version, "opened_issues_count": Issue.objects.filter( library__in=self.library_queryset ) .opened_during_release(version) .count(), "closed_issues_count": Issue.objects.filter( library__in=self.library_queryset ) .closed_during_release(version) .count(), "mailinglist_counts": mailinglist_counts, "mailinglist_total": total_mailinglist_count or 0, "mailinglist_contributor_release_count": mailinglist_contributor_release_count, # noqa: E501 "mailinglist_contributor_new_count": mailinglist_contributor_new_count, "commit_contributors_release_count": commit_contributors_release_count, "commit_contributors_new_count": commit_contributors_new_count, "commit_count": commit_count, "version_commit_count": version_commit_count, "top_contributors_release_overall": top_contributors, "library_data": library_data, "top_libraries_for_version": top_libraries_for_version, "library_count": library_count, "library_count_prior": library_count_prior, "library_names": library_names, "added_library_count": added_library_count, "removed_library_count": removed_library_count, "downloads": downloads, "contribution_box_graph": self._get_git_graph_data(prior_version, version), }