from functools import cached_property
from itertools import groupby, chain
from operator import attrgetter
from dataclasses import dataclass, field
from datetime import date, timedelta


from django.template.loader import render_to_string
from django.db.models import F, Q, Count, OuterRef, Sum, When, Value, Case
from django.forms import Form, ModelChoiceField, ModelForm, BooleanField

from core.models import RenderedContent
from reports.generation import (
    generate_wordcloud,
    get_mailing_list_post_stats,
    get_new_subscribers_stats,
)
from slack.models import Channel, SlackActivityBucket, SlackUser
from versions.models import Version
from .models import (
    Commit,
    CommitAuthor,
    Issue,
    Library,
    LibraryVersion,
)
from libraries.constants import SUB_LIBRARIES
from mailing_list.models import EmailData
from .utils import batched


class LibraryForm(ModelForm):
    class Meta:
        model = Library
        fields = ["categories"]


class CreateReportFullForm(Form):
    """Form for creating a report over all releases."""

    html_template_name = "admin/library_report_full_detail.html"

    library_queryset = Library.objects.exclude(key__in=SUB_LIBRARIES).order_by("name")
    library_1 = ModelChoiceField(
        queryset=library_queryset,
        required=False,
        help_text="If none are selected, the top 5 will be auto-selected.",
    )
    library_2 = ModelChoiceField(
        queryset=library_queryset,
        required=False,
    )
    library_3 = ModelChoiceField(
        queryset=library_queryset,
        required=False,
    )
    library_4 = ModelChoiceField(
        queryset=library_queryset,
        required=False,
    )
    library_5 = ModelChoiceField(
        queryset=library_queryset,
        required=False,
    )
    library_6 = ModelChoiceField(
        queryset=library_queryset,
        required=False,
    )
    library_7 = ModelChoiceField(
        queryset=library_queryset,
        required=False,
    )
    library_8 = ModelChoiceField(
        queryset=library_queryset,
        required=False,
    )
    no_cache = BooleanField(
        required=False,
        initial=False,
        help_text="Force the page to be regenerated, do not use cache.",
    )

    @property
    def cache_key(self):
        chosen_libraries = [
            self.cleaned_data["library_1"],
            self.cleaned_data["library_2"],
            self.cleaned_data["library_3"],
            self.cleaned_data["library_4"],
            self.cleaned_data["library_5"],
            self.cleaned_data["library_6"],
            self.cleaned_data["library_7"],
            self.cleaned_data["library_8"],
        ]
        lib_string = ",".join(str(x.id) if x else "" for x in chosen_libraries)
        return f"full-report-{lib_string}"

    def _get_top_libraries(self):
        return self.library_queryset.annotate(
            commit_count=Count("library_version__commit")
        ).order_by("-commit_count")[:5]

    def _get_library_order(self, top_libraries):
        library_order = [
            x.id
            for x in [
                self.cleaned_data["library_1"],
                self.cleaned_data["library_2"],
                self.cleaned_data["library_3"],
                self.cleaned_data["library_4"],
                self.cleaned_data["library_5"],
                self.cleaned_data["library_6"],
                self.cleaned_data["library_7"],
                self.cleaned_data["library_8"],
            ]
            if x is not None
        ]
        if not library_order:
            library_order = [x.id for x in top_libraries]
        return library_order

    def _get_library_full_counts(self, libraries, library_order):
        return sorted(
            list(
                libraries.annotate(
                    commit_count=Count("library_version__commit")
                ).values("commit_count", "id")
            ),
            key=lambda x: library_order.index(x["id"]),
        )

    def _get_top_contributors_overall(self):
        return (
            CommitAuthor.objects.all()
            .annotate(
                commit_count=Count(
                    "commit",
                    filter=Q(
                        commit__library_version__library__in=self.library_queryset
                    ),
                )
            )
            .values("name", "avatar_url", "commit_count", "github_profile_url")
            .order_by("-commit_count")[:10]
        )

    def _get_top_contributors_for_library(self, library_order):
        top_contributors_library = []
        for library_id in library_order:
            top_contributors_library.append(
                CommitAuthor.objects.filter(
                    commit__library_version__library_id=library_id
                )
                .annotate(commit_count=Count("commit"))
                .order_by("-commit_count")[:10]
            )
        return top_contributors_library

    def get_stats(self):
        commit_count = Commit.objects.filter(
            library_version__library__in=self.library_queryset
        ).count()

        top_libraries = self._get_top_libraries()
        library_order = self._get_library_order(top_libraries)
        libraries = Library.objects.filter(id__in=library_order)
        library_data = [
            {
                "library": x[0],
                "full_count": x[1],
                "top_contributors": x[2],
            }
            for x in zip(
                sorted(list(libraries), key=lambda x: library_order.index(x.id)),
                self._get_library_full_counts(libraries, library_order),
                self._get_top_contributors_for_library(library_order),
            )
        ]
        top_contributors = self._get_top_contributors_overall()
        mailinglist_total = EmailData.objects.all().aggregate(total=Sum("count"))[
            "total"
        ]
        first_version = Version.objects.order_by("release_date").first()
        return {
            "mailinglist_counts": EmailData.objects.with_total_counts().order_by(
                "-total_count"
            )[:10],
            "mailinglist_total": mailinglist_total,
            "first_version": first_version,
            "commit_count": commit_count,
            "top_contributors": top_contributors,
            "library_data": library_data,
            "top_libraries": top_libraries,
            "library_count": self.library_queryset.count(),
        }

    def cache_html(self):
        """Render and cache the html for this report."""
        # ensure we have "cleaned_data"
        if not self.is_valid():
            return ""
        try:
            html = render_to_string(self.html_template_name, self.get_stats())
        except FileNotFoundError as e:
            html = (
                f"An error occurred generating the report: {e}. To see the image "
                f"which is broken copy the error and run the report again. This "
                f"error isn't shown on subsequent runs."
            )
        self.cache_set(html)
        return html

    def cache_get(self) -> RenderedContent | None:
        return RenderedContent.objects.filter(cache_key=self.cache_key).first()

    def cache_clear(self):
        return RenderedContent.objects.filter(cache_key=self.cache_key).delete()

    def cache_set(self, content_html):
        """Cache the html for this report."""
        return RenderedContent.objects.update_or_create(
            cache_key=self.cache_key,
            defaults={
                "content_html": content_html,
                "content_type": "text/html",
            },
        )


class CreateReportForm(CreateReportFullForm):
    """Form for creating a report for a specific release."""

    html_template_name = "admin/release_report_detail.html"

    version = ModelChoiceField(
        queryset=Version.objects.minor_versions().order_by("-version_array")
    )

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.fields["library_1"].help_text = (
            "If none are selected, all libraries will be selected."
        )

    @property
    def cache_key(self):
        chosen_libraries = [
            self.cleaned_data["library_1"],
            self.cleaned_data["library_2"],
            self.cleaned_data["library_3"],
            self.cleaned_data["library_4"],
            self.cleaned_data["library_5"],
            self.cleaned_data["library_6"],
            self.cleaned_data["library_7"],
            self.cleaned_data["library_8"],
        ]
        lib_string = ",".join(str(x.id) if x else "" for x in chosen_libraries)
        version = self.cleaned_data["version"]
        return f"release-report-{lib_string}-{version.name}"

    def _get_top_contributors_for_version(self):
        return (
            CommitAuthor.objects.filter(
                commit__library_version__version=self.cleaned_data["version"]
            )
            .annotate(
                commit_count=Count(
                    "commit",
                    filter=Q(
                        commit__library_version__library__in=self.library_queryset
                    ),
                )
            )
            .order_by("-commit_count")[:10]
        )

    def _get_library_queryset_by_version(
        self, version: Version, annotate_commit_count=False
    ):
        qs = self.library_queryset.filter(
            library_version=LibraryVersion.objects.filter(
                library=OuterRef("id"), version=version
            )[:1],
        )
        if annotate_commit_count:
            qs = qs.annotate(commit_count=Count("library_version__commit"))
        return qs

    def _get_top_libraries_for_version(self):
        library_qs = self._get_library_queryset_by_version(
            self.cleaned_data["version"], annotate_commit_count=True
        )
        return library_qs.order_by("-commit_count")

    def _get_libraries_by_name(self):
        library_qs = self._get_library_queryset_by_version(
            self.cleaned_data["version"], annotate_commit_count=True
        )
        return library_qs.order_by("name")

    def _get_libraries_by_quality(self):
        # returns "great", "good", and "standard" libraries in that order
        library_qs = self._get_library_queryset_by_version(self.cleaned_data["version"])
        return list(
            chain(
                library_qs.filter(graphic__isnull=False),
                library_qs.filter(graphic__isnull=True, is_good=True),
                library_qs.filter(graphic__isnull=True, is_good=False),
            )
        )

    def _get_library_version_counts(self, libraries, library_order):
        library_qs = self._get_library_queryset_by_version(
            self.cleaned_data["version"], annotate_commit_count=True
        )
        return sorted(
            list(library_qs.values("commit_count", "id")),
            key=lambda x: library_order.index(x["id"]),
        )

    def _global_new_contributors(self, library_version):
        version = self.cleaned_data["version"]
        version_lt = list(
            Version.objects.minor_versions()
            .filter(version_array__lt=version.cleaned_version_parts_int)
            .order_by("id")
            .values_list("id", flat=True)
        )

        prior_version_author_ids = (
            CommitAuthor.objects.filter(commit__library_version__version__in=version_lt)
            .distinct()
            .values_list("id", flat=True)
        )

        version_author_ids = (
            CommitAuthor.objects.filter(
                commit__library_version__version__in=version_lt + [version.id]
            )
            .distinct()
            .values_list("id", flat=True)
        )

        return set(version_author_ids) - set(prior_version_author_ids)

    def _count_new_contributors(self, libraries, library_order):
        version = self.cleaned_data["version"]
        version_lt = list(
            Version.objects.minor_versions()
            .filter(version_array__lt=version.cleaned_version_parts_int)
            .values_list("id", flat=True)
        )
        version_lte = version_lt + [version.id]
        lt_subquery = LibraryVersion.objects.filter(
            version__in=version_lt,
            library=OuterRef("id"),
        ).values("id")
        lte_subquery = LibraryVersion.objects.filter(
            version__in=version_lte,
            library=OuterRef("id"),
        ).values("id")
        return sorted(
            list(
                libraries.annotate(
                    authors_before_release_count=Count(
                        "library_version__commit__author",
                        filter=Q(library_version__in=lt_subquery),
                        distinct=True,
                    ),
                    authors_through_release_count=Count(
                        "library_version__commit__author",
                        filter=Q(library_version__in=lte_subquery),
                        distinct=True,
                    ),
                )
                .annotate(
                    count=F("authors_through_release_count")
                    - F("authors_before_release_count")
                )
                .values("id", "count")
            ),
            key=lambda x: library_order.index(x["id"]),
        )

    def _count_issues(self, libraries, library_order, version):
        data = {
            x["library_id"]: x
            for x in Issue.objects.count_opened_closed_during_release(version).filter(
                library_id__in=[x.id for x in libraries]
            )
        }
        ret = []
        for lib_id in library_order:
            if lib_id in data:
                ret.append(data[lib_id])
            else:
                ret.append({"opened": 0, "closed": 0, "library_id": lib_id})
        return ret

    def _count_commit_contributors_totals(self, version):
        """Get a count of contributors for this release, and a count of
        new contributors.

        """
        version_lt = list(
            Version.objects.minor_versions()
            .filter(version_array__lt=version.cleaned_version_parts_int)
            .values_list("id", flat=True)
        )
        version_lte = version_lt + [version.id]
        lt_subquery = LibraryVersion.objects.filter(
            version__in=version_lt,
            library=OuterRef("id"),
        ).values("id")
        lte_subquery = LibraryVersion.objects.filter(
            version__in=version_lte,
            library=OuterRef("id"),
        ).values("id")
        qs = self.library_queryset.aggregate(
            this_release_count=Count(
                "library_version__commit__author",
                filter=Q(library_version__version=version),
                distinct=True,
            ),
            authors_before_release_count=Count(
                "library_version__commit__author",
                filter=Q(library_version__in=lt_subquery),
                distinct=True,
            ),
            authors_through_release_count=Count(
                "library_version__commit__author",
                filter=Q(library_version__in=lte_subquery),
                distinct=True,
            ),
        )
        new_count = (
            qs["authors_through_release_count"] - qs["authors_before_release_count"]
        )
        this_release_count = qs["this_release_count"]
        return this_release_count, new_count

    def _get_top_contributors_for_library_version(self, library_order):
        top_contributors_release = []
        for library_id in library_order:
            top_contributors_release.append(
                CommitAuthor.objects.filter(
                    commit__library_version=LibraryVersion.objects.get(
                        version=self.cleaned_data["version"], library_id=library_id
                    )
                )
                .annotate(commit_count=Count("commit"))
                .order_by("-commit_count")[:10]
            )
        return top_contributors_release

    def _count_mailinglist_contributors(self, version):
        version_lt = list(
            Version.objects.minor_versions()
            .filter(version_array__lt=version.cleaned_version_parts_int)
            .values_list("id", flat=True)
        )
        version_lte = version_lt + [version.id]
        current = (
            EmailData.objects.filter(version__in=version_lte)
            .distinct("author_id")
            .count()
        )
        prior = (
            EmailData.objects.filter(version__in=version_lt)
            .distinct("author_id")
            .count()
        )
        release = EmailData.objects.filter(version=version).count()
        return release, current - prior

    def _get_library_versions(self, library_order, version):
        return sorted(
            list(
                LibraryVersion.objects.filter(
                    version=version, library_id__in=library_order
                )
            ),
            key=lambda x: library_order.index(x.library_id),
        )

    def _get_git_graph_data(self, prior_version: Version | None, version: Version):
        """Fetch commit count data for a release and return an instance of Graph.

        Returns data in a format to easily create a github style green box commit graph.

        """
        if prior_version is None:
            return None
        if prior_version.release_date is None or version.release_date is None:
            return None

        @dataclass
        class Day:
            date: date
            count: int
            color: str = ""

        @dataclass
        class Week:
            days: list[Day] = field(default_factory=list)

            @cached_property
            def max(self):
                """The max number of commits this week."""
                return max(x.count for x in self.days)

        @dataclass
        class Graph:
            weeks: list[Week] = field(default_factory=list)
            colors: list[str] = field(
                default_factory=lambda: [
                    "#E8F5E9",
                    "#C8E6C9",
                    "#A5D6A7",
                    "#81C784",
                    "#66BB6A",
                    "#4CAF50",
                    "#43A047",
                    "#388E3C",
                    "#2E7D32",
                    "#1B5E20",
                ],
            )

            @cached_property
            def graph_start(self):
                return start.strftime("%B '%y")

            @cached_property
            def graph_end(self):
                return end.strftime("%B '%y")

            @cached_property
            def max(self):
                """The max number of commits in all weeks."""
                return max(x.max for x in self.weeks)

            def append_day(self, day: Day):
                """Append a day into the last week of self.weeks.

                - Automatically create a new week if there are already 7 days in the
                last week.
                """
                if len(self.weeks) == 0 or len(self.weeks[-1].days) == 7:
                    self.weeks.append(Week())
                self.weeks[-1].days.append(day)

            def apply_colors(self):
                """Iterate through each day and apply a color.

                - The color is selected based on the number of commits made on
                that day, relative to the highest number of commits in all days in
                Graph.weeks.days.

                """
                if not (high := self.max):
                    # No commits this release
                    # TODO: we may want a more elegant solution
                    #  than just not graphing this library
                    return
                for week in self.weeks:
                    for day in week.days:
                        decimal = day.count / high
                        if decimal == 1:
                            day.color = self.colors[-1]
                        else:
                            idx = int(decimal * len(self.colors))
                            day.color = self.colors[idx]

        count_query = (
            Commit.objects.filter(library_version__version=version)
            .values("committed_at__date")
            .annotate(count=Count("id"))
        )
        counts_by_date = {x["committed_at__date"]: x["count"] for x in count_query}

        graph = Graph()
        # The start date is the release date of the previous version
        # The end date is one day before the release date of the current version
        start: date = prior_version.release_date
        end: date = version.release_date - timedelta(days=1)

        # if the release started on a Thursday, we want to add Sun -> Wed to the data
        # with empty counts, even if they aren't part of the release.
        for i in range(start.weekday(), 0, -1):
            day = Day(date=start - timedelta(days=i), count=0)
            graph.append_day(day)

        current_date = start
        while current_date <= end:
            day = Day(date=current_date, count=counts_by_date.get(current_date, 0))
            graph.append_day(day)
            current_date = current_date + timedelta(days=1)
        graph.apply_colors()
        return graph

    def _get_slack_stats(self, prior_version, version):
        """Returns all slack related stats.

        Only returns channels with activity.
        """
        stats = []
        for channel in Channel.objects.filter(name__istartswith="boost"):
            channel_stat = self._get_slack_stats_for_channels(
                prior_version, version, channels=[channel]
            )
            channel_stat["channel"] = channel
            if channel_stat["user_count"] > 0:
                stats.append(channel_stat)
        stats.sort(key=lambda x: -(x["total"] or 0))  # Convert None to 0
        return stats

    def _get_slack_stats_for_channels(
        self, prior_version, version, channels: list[Channel] | None = None
    ):
        """Get slack stats for specific channels, or all channels."""
        start = prior_version.release_date
        end = version.release_date - timedelta(days=1)
        # count of all messages in the date range
        q = Q(day__range=[start, end])
        if channels:
            q &= Q(channel__in=channels)
        total = SlackActivityBucket.objects.filter(q).aggregate(total=Sum("count"))[
            "total"
        ]
        # message counts per user in the date range
        q = Q(slackactivitybucket__day__range=[start, end])
        if channels:
            q &= Q(slackactivitybucket__channel__in=channels)
        per_user = (
            SlackUser.objects.annotate(
                total=Sum(
                    "slackactivitybucket__count",
                    filter=q,
                )
            )
            .filter(total__gt=0)
            .order_by("-total")
        )
        q = Q()
        if channels:
            q &= Q(channel__in=channels)
        distinct_users = (
            SlackActivityBucket.objects.filter(q)
            .order_by("user_id")
            .distinct("user_id")
        )
        new_user_count = (
            distinct_users.filter(day__lte=end).count()
            - distinct_users.filter(day__lt=start).count()
        )
        return {
            "users": per_user[:10],
            "user_count": per_user.count(),
            "total": total,
            "new_user_count": new_user_count,
        }

    def _get_dependency_data(self, library_order, version):
        diffs_by_id = {
            x["library_id"]: x for x in version.get_dependency_diffs().values()
        }
        diffs = []
        for lib_id in library_order:
            diffs.append(diffs_by_id.get(lib_id, {}))
        return diffs

    def get_stats(self):
        version = self.cleaned_data["version"]

        downloads = {
            k: list(v)
            for k, v in groupby(
                version.downloads.all().order_by("operating_system"),
                key=attrgetter("operating_system"),
            )
        }
        prior_version = (
            Version.objects.minor_versions()
            .filter(version_array__lt=version.cleaned_version_parts_int)
            .order_by("-version_array")
            .first()
        )

        commit_count = Commit.objects.filter(
            library_version__version__name__lte=version.name,
            library_version__library__in=self.library_queryset,
        ).count()
        version_commit_count = Commit.objects.filter(
            library_version__version=version,
            library_version__library__in=self.library_queryset,
        ).count()

        top_libraries_for_version = self._get_top_libraries_for_version()
        top_libraries_by_name = self._get_libraries_by_name()
        library_order = self._get_library_order(top_libraries_by_name)
        libraries = Library.objects.filter(id__in=library_order).order_by(
            Case(
                *[When(id=pk, then=Value(pos)) for pos, pk in enumerate(library_order)]
            )
        )

        library_data = [
            {
                "library": item[0],
                "full_count": item[1],
                "version_count": item[2],
                "top_contributors_release": item[3],
                "new_contributors_count": item[4],
                "issues": item[5],
                "library_version": item[6],
                "deps": item[7],
            }
            for item in zip(
                libraries,
                self._get_library_full_counts(libraries, library_order),
                self._get_library_version_counts(libraries, library_order),
                self._get_top_contributors_for_library_version(library_order),
                self._count_new_contributors(libraries, library_order),
                self._count_issues(libraries, library_order, version),
                self._get_library_versions(library_order, version),
                self._get_dependency_data(library_order, version),
            )
        ]
        library_data = [
            x for x in library_data if x["version_count"]["commit_count"] > 0
        ]
        top_contributors = self._get_top_contributors_for_version()
        # total messages sent during this release (version)
        total_mailinglist_count = EmailData.objects.filter(version=version).aggregate(
            total=Sum("count")
        )["total"]
        mailinglist_counts = (
            EmailData.objects.filter(version=version)
            .with_total_counts()
            .order_by("-total_count")[:10]
        )
        (
            mailinglist_contributor_release_count,
            mailinglist_contributor_new_count,
        ) = self._count_mailinglist_contributors(version)
        (
            commit_contributors_release_count,
            commit_contributors_new_count,
        ) = self._count_commit_contributors_totals(version)
        library_count = LibraryVersion.objects.filter(
            version=version,
            library__in=self.library_queryset,
        ).count()
        if prior_version:
            library_count_prior = LibraryVersion.objects.filter(
                version=prior_version,
                library__in=self.library_queryset,
            ).count()
        else:
            library_count_prior = 0

        added_library_count = max(0, library_count - library_count_prior)
        removed_library_count = max(0, library_count_prior - library_count)
        lines_added = LibraryVersion.objects.filter(
            version=version,
            library__in=self.library_queryset,
        ).aggregate(lines=Sum("insertions"))["lines"]
        lines_removed = LibraryVersion.objects.filter(
            version=version,
            library__in=self.library_queryset,
        ).aggregate(lines=Sum("deletions"))["lines"]
        # we want 2 channels per pdf page, use batched to get groups of 2
        slack_stats = batched(self._get_slack_stats(prior_version, version), 2)
        slack_channels = batched(
            Channel.objects.filter(name__istartswith="boost").order_by("name"), 10
        )
        committee_members = version.financial_committee_members.all()
        mailinglist_post_stats = get_mailing_list_post_stats(
            prior_version.release_date, version.release_date
        )
        new_subscribers_stats = get_new_subscribers_stats(
            prior_version.release_date, version.release_date
        )
        library_index_library_data = []
        for library in self._get_libraries_by_quality():
            library_index_library_data.append(
                (
                    library,
                    library in [lib["library"] for lib in library_data],
                )
            )
        wordcloud_base64, wordcloud_top_words = generate_wordcloud(version)

        return {
            "committee_members": committee_members,
            "lines_added": lines_added,
            "lines_removed": lines_removed,
            "wordcloud_base64": wordcloud_base64,
            "wordcloud_frequencies": wordcloud_top_words,
            "version": version,
            "prior_version": prior_version,
            "opened_issues_count": Issue.objects.filter(
                library__in=self.library_queryset
            )
            .opened_during_release(version)
            .count(),
            "closed_issues_count": Issue.objects.filter(
                library__in=self.library_queryset
            )
            .closed_during_release(version)
            .count(),
            "mailinglist_counts": mailinglist_counts,
            "mailinglist_total": total_mailinglist_count or 0,
            "mailinglist_contributor_release_count": mailinglist_contributor_release_count,  # noqa: E501
            "mailinglist_contributor_new_count": mailinglist_contributor_new_count,
            "mailinglist_post_stats": mailinglist_post_stats,
            "mailinglist_new_subscribers_stats": new_subscribers_stats,
            "commit_contributors_release_count": commit_contributors_release_count,
            "commit_contributors_new_count": commit_contributors_new_count,
            "global_contributors_new_count": len(
                self._global_new_contributors(version)
            ),
            "commit_count": commit_count,
            "version_commit_count": version_commit_count,
            "top_contributors_release_overall": top_contributors,
            "library_data": library_data,
            "top_libraries_for_version": top_libraries_for_version,
            "library_count": library_count,
            "library_count_prior": library_count_prior,
            "library_index_libraries": library_index_library_data,
            "added_library_count": added_library_count,
            "removed_library_count": removed_library_count,
            "downloads": downloads,
            "contribution_box_graph": self._get_git_graph_data(prior_version, version),
            "slack_channels": slack_channels,
            "slack": slack_stats,
        }