mirror of
https://github.com/boostorg/website-v2.git
synced 2026-01-19 04:42:17 +00:00
Website traffic stats gathering (#1679)
This commit is contained in:
0
reports/__init__.py
Normal file
0
reports/__init__.py
Normal file
32
reports/admin.py
Normal file
32
reports/admin.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from django.contrib import admin
|
||||
|
||||
from reports.models import WebsiteStatReport, WebsiteStatItem
|
||||
|
||||
|
||||
class StatInline(admin.StackedInline):
|
||||
model = WebsiteStatItem
|
||||
extra = 0
|
||||
fields = ("value",)
|
||||
readonly_fields = fields
|
||||
can_delete = False
|
||||
|
||||
|
||||
@admin.register(WebsiteStatReport)
|
||||
class WebsiteStatReportAdmin(admin.ModelAdmin):
|
||||
inlines = (StatInline,)
|
||||
list_display = ("version", "pageviews", "unique_visitors", "period")
|
||||
ordering = ("-version",)
|
||||
|
||||
# def get_queryset(self, request):
|
||||
# qs = super().get_queryset(request)
|
||||
# return qs.prefetch_related("stats")
|
||||
|
||||
def pageviews(self, obj):
|
||||
return f"{int(obj.stats.get(code_name='pageviews').value):,}"
|
||||
|
||||
def unique_visitors(self, obj):
|
||||
return f"{int(obj.stats.get(code_name='visitors').value):,}"
|
||||
|
||||
|
||||
@admin.register(WebsiteStatItem)
|
||||
class WebsiteStatItemAdmin(admin.ModelAdmin): ...
|
||||
6
reports/apps.py
Normal file
6
reports/apps.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class ReportsConfig(AppConfig):
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "reports"
|
||||
6
reports/constants.py
Normal file
6
reports/constants.py
Normal file
@@ -0,0 +1,6 @@
|
||||
WORDCLOUD_FONT = "notosans_mono.woff"
|
||||
WEB_ANALYTICS_DOMAIN = "preview.boost.org"
|
||||
WEB_ANALYTICS_API_URL = (
|
||||
f"https://plausible.io/api/stats/{WEB_ANALYTICS_DOMAIN}/top-stats/?period=custom"
|
||||
"&from={:%Y-%m-%d}&to={:%Y-%m-%d}"
|
||||
)
|
||||
10
reports/forms.py
Normal file
10
reports/forms.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from django import forms
|
||||
|
||||
from versions.models import Version
|
||||
|
||||
|
||||
class ImportWebAnalyticsForm(forms.Form):
|
||||
version = forms.ModelChoiceField(
|
||||
Version.objects.get_dropdown_versions(),
|
||||
widget=forms.Select(attrs={"class": "dropdown !mb-0 h-[38px]"}),
|
||||
)
|
||||
112
reports/generation.py
Normal file
112
reports/generation.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import base64
|
||||
import io
|
||||
import random
|
||||
|
||||
import psycopg2
|
||||
from django.conf import settings
|
||||
from matplotlib import pyplot as plt
|
||||
from wordcloud import WordCloud, STOPWORDS
|
||||
|
||||
from core.models import SiteSettings
|
||||
from libraries.models import WordcloudMergeWord # TODO: move model to this app
|
||||
from reports.constants import WORDCLOUD_FONT
|
||||
from versions.models import Version
|
||||
|
||||
|
||||
def generate_wordcloud(version: Version) -> tuple[str | None, list]:
|
||||
"""Generates a wordcloud png and returns it as a base64 string and word frequencies.
|
||||
|
||||
Returns:
|
||||
Tuple of (base64_encoded_png_string, wordcloud_top_words)
|
||||
"""
|
||||
wc = WordCloud(
|
||||
mode="RGBA",
|
||||
background_color=None,
|
||||
width=1400,
|
||||
height=700,
|
||||
stopwords=STOPWORDS | SiteSettings.load().wordcloud_ignore_set,
|
||||
font_path=settings.STATIC_ROOT / "font" / WORDCLOUD_FONT,
|
||||
)
|
||||
word_frequencies = {}
|
||||
for content in get_mail_content(version):
|
||||
for key, val in wc.process_text(content).items():
|
||||
if len(key) < 2:
|
||||
continue
|
||||
key_lower = key.lower()
|
||||
if key_lower not in word_frequencies:
|
||||
word_frequencies[key_lower] = 0
|
||||
word_frequencies[key_lower] += val
|
||||
if not word_frequencies:
|
||||
return None, []
|
||||
|
||||
word_frequencies = boost_normalize_words(
|
||||
word_frequencies,
|
||||
{x.from_word: x.to_word for x in WordcloudMergeWord.objects.all()},
|
||||
)
|
||||
# first sort by number, then sort the top 200 alphabetically
|
||||
word_frequencies = {
|
||||
key: val
|
||||
for key, val in sorted(
|
||||
word_frequencies.items(),
|
||||
key=lambda x: x[1],
|
||||
reverse=True,
|
||||
)
|
||||
}
|
||||
wordcloud_top_words = sorted(list(word_frequencies.keys())[:200])
|
||||
|
||||
wc.generate_from_frequencies(word_frequencies)
|
||||
plt.figure(figsize=(14, 7), facecolor=None)
|
||||
plt.imshow(
|
||||
wc.recolor(color_func=grey_color_func, random_state=3),
|
||||
interpolation="bilinear",
|
||||
)
|
||||
plt.axis("off")
|
||||
image_bytes = io.BytesIO()
|
||||
plt.savefig(
|
||||
image_bytes,
|
||||
format="png",
|
||||
dpi=100,
|
||||
bbox_inches="tight",
|
||||
pad_inches=0,
|
||||
transparent=True,
|
||||
)
|
||||
image_bytes.seek(0)
|
||||
return base64.b64encode(image_bytes.read()).decode(), wordcloud_top_words
|
||||
|
||||
|
||||
def boost_normalize_words(frequencies, word_map):
|
||||
# from word, to word
|
||||
for o, n in word_map.items():
|
||||
from_count = frequencies.get(o, 0)
|
||||
if not from_count:
|
||||
continue
|
||||
to_count = frequencies.get(n, 0)
|
||||
frequencies[n] = from_count + to_count
|
||||
del frequencies[o]
|
||||
return frequencies
|
||||
|
||||
|
||||
def grey_color_func(*args, **kwargs):
|
||||
return "hsl(0, 0%%, %d%%)" % random.randint(10, 80)
|
||||
|
||||
|
||||
def get_mail_content(version: Version):
|
||||
prior_version = (
|
||||
Version.objects.minor_versions()
|
||||
.filter(version_array__lt=version.cleaned_version_parts_int)
|
||||
.order_by("-release_date")
|
||||
.first()
|
||||
)
|
||||
if not prior_version or not settings.HYPERKITTY_DATABASE_NAME:
|
||||
return []
|
||||
conn = psycopg2.connect(settings.HYPERKITTY_DATABASE_URL)
|
||||
with conn.cursor(name="fetch-mail-content") as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT content FROM hyperkitty_email
|
||||
WHERE date >= %(start)s AND date < %(end)s;
|
||||
""",
|
||||
{"start": prior_version.release_date, "end": version.release_date},
|
||||
)
|
||||
for [content] in cursor:
|
||||
yield content
|
||||
99
reports/migrations/0001_initial.py
Normal file
99
reports/migrations/0001_initial.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# Generated by Django 4.2.18 on 2025-02-26 19:02
|
||||
|
||||
import django.contrib.postgres.fields.ranges
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
import django_extensions.db.fields
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
("versions", "0018_version_financial_committee_members"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="WebsiteStatReport",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"created",
|
||||
django_extensions.db.fields.CreationDateTimeField(
|
||||
auto_now_add=True, verbose_name="created"
|
||||
),
|
||||
),
|
||||
(
|
||||
"modified",
|
||||
django_extensions.db.fields.ModificationDateTimeField(
|
||||
auto_now=True, verbose_name="modified"
|
||||
),
|
||||
),
|
||||
("period", django.contrib.postgres.fields.ranges.DateRangeField()),
|
||||
(
|
||||
"version",
|
||||
models.OneToOneField(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
to="versions.version",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"get_latest_by": "modified",
|
||||
"abstract": False,
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="WebsiteStatItem",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"created",
|
||||
django_extensions.db.fields.CreationDateTimeField(
|
||||
auto_now_add=True, verbose_name="created"
|
||||
),
|
||||
),
|
||||
(
|
||||
"modified",
|
||||
django_extensions.db.fields.ModificationDateTimeField(
|
||||
auto_now=True, verbose_name="modified"
|
||||
),
|
||||
),
|
||||
("name", models.CharField()),
|
||||
("code_name", models.CharField()),
|
||||
("value", models.FloatField()),
|
||||
(
|
||||
"report",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="stats",
|
||||
to="reports.websitestatreport",
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name="websitestatitem",
|
||||
constraint=models.UniqueConstraint(
|
||||
fields=("report", "code_name"), name="unique_report_code_name"
|
||||
),
|
||||
),
|
||||
]
|
||||
0
reports/migrations/__init__.py
Normal file
0
reports/migrations/__init__.py
Normal file
96
reports/models.py
Normal file
96
reports/models.py
Normal file
@@ -0,0 +1,96 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import requests
|
||||
from django.contrib.postgres.fields import DateRangeField
|
||||
from django.db import models
|
||||
from django.db.backends.postgresql.psycopg_any import DateRange
|
||||
from django_extensions.db.models import TimeStampedModel
|
||||
|
||||
from reports.constants import WEB_ANALYTICS_API_URL
|
||||
from versions.models import Version
|
||||
|
||||
INCLUSIVE = "[]"
|
||||
|
||||
|
||||
class WebsiteStatReport(TimeStampedModel):
|
||||
version = models.OneToOneField(Version, on_delete=models.CASCADE)
|
||||
period = DateRangeField()
|
||||
|
||||
def __str__(self):
|
||||
return f"Stat report for {self.version}"
|
||||
|
||||
def save(self, **kwargs):
|
||||
"""Allow creation of reports while omitting period and/or version"""
|
||||
if self.version_id is None:
|
||||
self.version = Version.objects.most_recent()
|
||||
if not self.period:
|
||||
previous_version = (
|
||||
Version.objects.filter(
|
||||
beta=False, release_date__lt=self.version.release_date
|
||||
)
|
||||
.order_by("-release_date")
|
||||
.first()
|
||||
)
|
||||
start_date = previous_version.release_date + timedelta(days=1)
|
||||
self.period = DateRange(start_date, self.version.release_date, INCLUSIVE)
|
||||
super().save(**kwargs)
|
||||
|
||||
@property
|
||||
def analytics_api_url(self) -> str:
|
||||
return WEB_ANALYTICS_API_URL.format(self.period.lower, self.period.upper)
|
||||
|
||||
def populate_from_api(self):
|
||||
"""Fetch stats from API and generate child WebsiteStatItem instances."""
|
||||
|
||||
response = requests.get(self.analytics_api_url)
|
||||
data = response.json()
|
||||
|
||||
if not data or "top_stats" not in data:
|
||||
raise ValueError(f"Invalid Plausible API response: {data}")
|
||||
|
||||
# Clear existing stat items
|
||||
WebsiteStatItem.objects.filter(report=self).delete()
|
||||
|
||||
stat_items = []
|
||||
|
||||
for stat_data in data["top_stats"]:
|
||||
stat = WebsiteStatItem(
|
||||
report=self,
|
||||
name=stat_data["name"],
|
||||
value=stat_data["value"],
|
||||
code_name=stat_data["graph_metric"],
|
||||
)
|
||||
stat_items.append(stat)
|
||||
|
||||
WebsiteStatItem.objects.bulk_create(stat_items)
|
||||
|
||||
|
||||
class WebsiteStatItem(TimeStampedModel):
|
||||
"""Individual stat item (e.g. unique visitors)"""
|
||||
|
||||
report = models.ForeignKey(
|
||||
WebsiteStatReport, on_delete=models.CASCADE, related_name="stats"
|
||||
)
|
||||
name = models.CharField()
|
||||
code_name = models.CharField()
|
||||
value = models.FloatField()
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.report.version} {self.name}"
|
||||
|
||||
@property
|
||||
def formatted_value(self) -> str:
|
||||
"""Format value based on metric type"""
|
||||
if self.code_name == "visit_duration":
|
||||
minutes, seconds = divmod(int(self.value), 60)
|
||||
return f"{minutes}m {seconds}s"
|
||||
elif self.code_name == "bounce_rate":
|
||||
return f"{self.value}%"
|
||||
return str(self.value)
|
||||
|
||||
class Meta:
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=["report", "code_name"], name="unique_report_code_name"
|
||||
)
|
||||
]
|
||||
16
reports/templates/reports/import_web_analytics.html
Normal file
16
reports/templates/reports/import_web_analytics.html
Normal file
@@ -0,0 +1,16 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block content %}
|
||||
<main class="content">
|
||||
<div class="py-3 px-3 md:mt-3 md:px-0">
|
||||
<h3 class="mb-4">Import Web Statistics</h3>
|
||||
<form method="post">
|
||||
{% csrf_token %}
|
||||
{{ form }}
|
||||
<div class="flex flex-row my-4">
|
||||
<input class="py-2 px-3 text-sm text-white rounded bg-orange cursor-pointer" type="submit" value="Import">
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</main>
|
||||
{% endblock %}
|
||||
1
reports/tests.py
Normal file
1
reports/tests.py
Normal file
@@ -0,0 +1 @@
|
||||
# Create your tests here.
|
||||
1
reports/views.py
Normal file
1
reports/views.py
Normal file
@@ -0,0 +1 @@
|
||||
# Create your views here.
|
||||
Reference in New Issue
Block a user