Website traffic stats gathering (#1679)

This commit is contained in:
Greg Kaleka
2025-03-23 16:11:21 -04:00
committed by GitHub
parent 4d5bf61dc6
commit 8b43040399
16 changed files with 474 additions and 166 deletions

0
reports/__init__.py Normal file
View File

32
reports/admin.py Normal file
View File

@@ -0,0 +1,32 @@
from django.contrib import admin
from reports.models import WebsiteStatReport, WebsiteStatItem
class StatInline(admin.StackedInline):
model = WebsiteStatItem
extra = 0
fields = ("value",)
readonly_fields = fields
can_delete = False
@admin.register(WebsiteStatReport)
class WebsiteStatReportAdmin(admin.ModelAdmin):
inlines = (StatInline,)
list_display = ("version", "pageviews", "unique_visitors", "period")
ordering = ("-version",)
# def get_queryset(self, request):
# qs = super().get_queryset(request)
# return qs.prefetch_related("stats")
def pageviews(self, obj):
return f"{int(obj.stats.get(code_name='pageviews').value):,}"
def unique_visitors(self, obj):
return f"{int(obj.stats.get(code_name='visitors').value):,}"
@admin.register(WebsiteStatItem)
class WebsiteStatItemAdmin(admin.ModelAdmin): ...

6
reports/apps.py Normal file
View File

@@ -0,0 +1,6 @@
from django.apps import AppConfig
class ReportsConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "reports"

6
reports/constants.py Normal file
View File

@@ -0,0 +1,6 @@
WORDCLOUD_FONT = "notosans_mono.woff"
WEB_ANALYTICS_DOMAIN = "preview.boost.org"
WEB_ANALYTICS_API_URL = (
f"https://plausible.io/api/stats/{WEB_ANALYTICS_DOMAIN}/top-stats/?period=custom"
"&from={:%Y-%m-%d}&to={:%Y-%m-%d}"
)

10
reports/forms.py Normal file
View File

@@ -0,0 +1,10 @@
from django import forms
from versions.models import Version
class ImportWebAnalyticsForm(forms.Form):
version = forms.ModelChoiceField(
Version.objects.get_dropdown_versions(),
widget=forms.Select(attrs={"class": "dropdown !mb-0 h-[38px]"}),
)

112
reports/generation.py Normal file
View File

@@ -0,0 +1,112 @@
import base64
import io
import random
import psycopg2
from django.conf import settings
from matplotlib import pyplot as plt
from wordcloud import WordCloud, STOPWORDS
from core.models import SiteSettings
from libraries.models import WordcloudMergeWord # TODO: move model to this app
from reports.constants import WORDCLOUD_FONT
from versions.models import Version
def generate_wordcloud(version: Version) -> tuple[str | None, list]:
"""Generates a wordcloud png and returns it as a base64 string and word frequencies.
Returns:
Tuple of (base64_encoded_png_string, wordcloud_top_words)
"""
wc = WordCloud(
mode="RGBA",
background_color=None,
width=1400,
height=700,
stopwords=STOPWORDS | SiteSettings.load().wordcloud_ignore_set,
font_path=settings.STATIC_ROOT / "font" / WORDCLOUD_FONT,
)
word_frequencies = {}
for content in get_mail_content(version):
for key, val in wc.process_text(content).items():
if len(key) < 2:
continue
key_lower = key.lower()
if key_lower not in word_frequencies:
word_frequencies[key_lower] = 0
word_frequencies[key_lower] += val
if not word_frequencies:
return None, []
word_frequencies = boost_normalize_words(
word_frequencies,
{x.from_word: x.to_word for x in WordcloudMergeWord.objects.all()},
)
# first sort by number, then sort the top 200 alphabetically
word_frequencies = {
key: val
for key, val in sorted(
word_frequencies.items(),
key=lambda x: x[1],
reverse=True,
)
}
wordcloud_top_words = sorted(list(word_frequencies.keys())[:200])
wc.generate_from_frequencies(word_frequencies)
plt.figure(figsize=(14, 7), facecolor=None)
plt.imshow(
wc.recolor(color_func=grey_color_func, random_state=3),
interpolation="bilinear",
)
plt.axis("off")
image_bytes = io.BytesIO()
plt.savefig(
image_bytes,
format="png",
dpi=100,
bbox_inches="tight",
pad_inches=0,
transparent=True,
)
image_bytes.seek(0)
return base64.b64encode(image_bytes.read()).decode(), wordcloud_top_words
def boost_normalize_words(frequencies, word_map):
# from word, to word
for o, n in word_map.items():
from_count = frequencies.get(o, 0)
if not from_count:
continue
to_count = frequencies.get(n, 0)
frequencies[n] = from_count + to_count
del frequencies[o]
return frequencies
def grey_color_func(*args, **kwargs):
return "hsl(0, 0%%, %d%%)" % random.randint(10, 80)
def get_mail_content(version: Version):
prior_version = (
Version.objects.minor_versions()
.filter(version_array__lt=version.cleaned_version_parts_int)
.order_by("-release_date")
.first()
)
if not prior_version or not settings.HYPERKITTY_DATABASE_NAME:
return []
conn = psycopg2.connect(settings.HYPERKITTY_DATABASE_URL)
with conn.cursor(name="fetch-mail-content") as cursor:
cursor.execute(
"""
SELECT content FROM hyperkitty_email
WHERE date >= %(start)s AND date < %(end)s;
""",
{"start": prior_version.release_date, "end": version.release_date},
)
for [content] in cursor:
yield content

View File

@@ -0,0 +1,99 @@
# Generated by Django 4.2.18 on 2025-02-26 19:02
import django.contrib.postgres.fields.ranges
from django.db import migrations, models
import django.db.models.deletion
import django_extensions.db.fields
class Migration(migrations.Migration):
initial = True
dependencies = [
("versions", "0018_version_financial_committee_members"),
]
operations = [
migrations.CreateModel(
name="WebsiteStatReport",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"created",
django_extensions.db.fields.CreationDateTimeField(
auto_now_add=True, verbose_name="created"
),
),
(
"modified",
django_extensions.db.fields.ModificationDateTimeField(
auto_now=True, verbose_name="modified"
),
),
("period", django.contrib.postgres.fields.ranges.DateRangeField()),
(
"version",
models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
to="versions.version",
),
),
],
options={
"get_latest_by": "modified",
"abstract": False,
},
),
migrations.CreateModel(
name="WebsiteStatItem",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"created",
django_extensions.db.fields.CreationDateTimeField(
auto_now_add=True, verbose_name="created"
),
),
(
"modified",
django_extensions.db.fields.ModificationDateTimeField(
auto_now=True, verbose_name="modified"
),
),
("name", models.CharField()),
("code_name", models.CharField()),
("value", models.FloatField()),
(
"report",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="stats",
to="reports.websitestatreport",
),
),
],
),
migrations.AddConstraint(
model_name="websitestatitem",
constraint=models.UniqueConstraint(
fields=("report", "code_name"), name="unique_report_code_name"
),
),
]

View File

96
reports/models.py Normal file
View File

@@ -0,0 +1,96 @@
from datetime import timedelta
import requests
from django.contrib.postgres.fields import DateRangeField
from django.db import models
from django.db.backends.postgresql.psycopg_any import DateRange
from django_extensions.db.models import TimeStampedModel
from reports.constants import WEB_ANALYTICS_API_URL
from versions.models import Version
INCLUSIVE = "[]"
class WebsiteStatReport(TimeStampedModel):
version = models.OneToOneField(Version, on_delete=models.CASCADE)
period = DateRangeField()
def __str__(self):
return f"Stat report for {self.version}"
def save(self, **kwargs):
"""Allow creation of reports while omitting period and/or version"""
if self.version_id is None:
self.version = Version.objects.most_recent()
if not self.period:
previous_version = (
Version.objects.filter(
beta=False, release_date__lt=self.version.release_date
)
.order_by("-release_date")
.first()
)
start_date = previous_version.release_date + timedelta(days=1)
self.period = DateRange(start_date, self.version.release_date, INCLUSIVE)
super().save(**kwargs)
@property
def analytics_api_url(self) -> str:
return WEB_ANALYTICS_API_URL.format(self.period.lower, self.period.upper)
def populate_from_api(self):
"""Fetch stats from API and generate child WebsiteStatItem instances."""
response = requests.get(self.analytics_api_url)
data = response.json()
if not data or "top_stats" not in data:
raise ValueError(f"Invalid Plausible API response: {data}")
# Clear existing stat items
WebsiteStatItem.objects.filter(report=self).delete()
stat_items = []
for stat_data in data["top_stats"]:
stat = WebsiteStatItem(
report=self,
name=stat_data["name"],
value=stat_data["value"],
code_name=stat_data["graph_metric"],
)
stat_items.append(stat)
WebsiteStatItem.objects.bulk_create(stat_items)
class WebsiteStatItem(TimeStampedModel):
"""Individual stat item (e.g. unique visitors)"""
report = models.ForeignKey(
WebsiteStatReport, on_delete=models.CASCADE, related_name="stats"
)
name = models.CharField()
code_name = models.CharField()
value = models.FloatField()
def __str__(self):
return f"{self.report.version} {self.name}"
@property
def formatted_value(self) -> str:
"""Format value based on metric type"""
if self.code_name == "visit_duration":
minutes, seconds = divmod(int(self.value), 60)
return f"{minutes}m {seconds}s"
elif self.code_name == "bounce_rate":
return f"{self.value}%"
return str(self.value)
class Meta:
constraints = [
models.UniqueConstraint(
fields=["report", "code_name"], name="unique_report_code_name"
)
]

View File

@@ -0,0 +1,16 @@
{% extends "base.html" %}
{% block content %}
<main class="content">
<div class="py-3 px-3 md:mt-3 md:px-0">
<h3 class="mb-4">Import Web Statistics</h3>
<form method="post">
{% csrf_token %}
{{ form }}
<div class="flex flex-row my-4">
<input class="py-2 px-3 text-sm text-white rounded bg-orange cursor-pointer" type="submit" value="Import">
</div>
</form>
</div>
</main>
{% endblock %}

1
reports/tests.py Normal file
View File

@@ -0,0 +1 @@
# Create your tests here.

1
reports/views.py Normal file
View File

@@ -0,0 +1 @@
# Create your views here.