diff --git a/docs/commands.md b/docs/commands.md
index fddff569..f1068737 100644
--- a/docs/commands.md
+++ b/docs/commands.md
@@ -320,3 +320,19 @@ For this to work `SLACK_BOT_API` must be set in the `.env` file.
| Options | Format | Description |
|----------------------|--------|--------------------------------------------------------------|
| `--user_id` | int | If passed, the user with this ID will receive email notifications when this task is started and finished, or if the task raises and exception. |
+
+
+## `import_ml_counts`
+
+**Purpose**: Import mailing list counts from the mailman archives.
+
+```bash
+./manage.py import_ml_counts
+```
+
+**Options**
+
+| Options | Format | Description |
+|----------------|--------|----------------------------------------------------------------------------------------------------------------------|
+| `--start_date` | date | If passed, retrieves data from the start date supplied, d-m-y, default 20-11-1998 (the start of the data in mailman) |
+| `--end_date` | date | If passed, If passed, retrieves data until the start date supplied, d-m-y, default today |
diff --git a/docs/development_setup_notes.md b/docs/development_setup_notes.md
index c0281a3b..308481cf 100644
--- a/docs/development_setup_notes.md
+++ b/docs/development_setup_notes.md
@@ -313,9 +313,9 @@ https://docs.allauth.org/en/latest/socialaccount/providers/google.html
1. `TF_VAR_google_cloud_email` (the email address of your Google Cloud account)
2. `TF_VAR_google_organization_domain` (usually the domain of your Google Cloud account, e.g. "boost.org" if you will be using an @boost.org email address)
3. `TF_VAR_google_cloud_project_name` (optional, default: localboostdev) - needs to change if destroyed and a setup is needed within 30 days
-2. Run `make development-tofu-init` to initialize tofu.
-3. Run `make development-tofu-plan` to confirm the planned changes.
-4. Run `make development-tofu-apply` to apply the changes.
+2. Run `just development-tofu-init` to initialize tofu.
+3. Run `just development-tofu-plan` to confirm the planned changes.
+4. Run `just development-tofu-apply` to apply the changes.
5. Go to https://console.developers.google.com/
1. Search for the newly created project, named "Boost Development" (ID: localboostdev by default).
2. Type "credentials" in the search input at the top of the page.
@@ -352,6 +352,7 @@ In your env:
#### Set Up Pycharm
You can set up your IDE with a new "Python Debug Server" configuration as:
+
#### Common Usage
diff --git a/docs/first_time_data_import.md b/docs/first_time_data_import.md
index 259908dd..f5cc8125 100644
--- a/docs/first_time_data_import.md
+++ b/docs/first_time_data_import.md
@@ -38,6 +38,7 @@ The `boost_setup` command will run all of the processes listed here:
# Get the most recent beta release, and delete old beta releases
./manage.py import_beta_release --delete-versions
+./manage.py import_ml_counts
```
Read more aboout these [management commands](./commands.md).
diff --git a/docs/release_reports.md b/docs/release_reports.md
index 0a29b7fd..cb089bb4 100644
--- a/docs/release_reports.md
+++ b/docs/release_reports.md
@@ -1,5 +1,17 @@
# Release Reports
+## Prerequisites
+
+1. You should upload updated subscriber data.
+ 1. Ask Sam for a copy of the "subscribe" data.
+ 2. In the Django admin interface go to "Subscription datas" under "MAILING_LIST".
+ 3. At the top of the page click on the "IMPORT 'SUBSCRIBE' DATA" button.
+2. To update the mailing list counts, if you haven't already run the "DO IT ALL" button:
+ 1. Go to "Versions" under "VERSIONS" in the admin interface
+ 2. At the top of the page click on the "DO IT ALL" button.
+
+## Report Creation
+
1. Go to /admin
2. Go to the "Libraries" section
3. In the top menu click on "GET RELEASE REPORT".
diff --git a/libraries/forms.py b/libraries/forms.py
index 348109dd..ae189173 100644
--- a/libraries/forms.py
+++ b/libraries/forms.py
@@ -10,7 +10,11 @@ from django.db.models import F, Q, Count, OuterRef, Sum, When, Value, Case
from django.forms import Form, ModelChoiceField, ModelForm, BooleanField
from core.models import RenderedContent
-from reports.generation import generate_wordcloud
+from reports.generation import (
+ generate_wordcloud,
+ get_mailing_list_post_stats,
+ get_new_subscribers_stats,
+)
from slack.models import Channel, SlackActivityBucket, SlackUser
from versions.models import Version
from .models import (
@@ -772,6 +776,12 @@ class CreateReportForm(CreateReportFullForm):
Channel.objects.filter(name__istartswith="boost").order_by("name"), 10
)
committee_members = version.financial_committee_members.all()
+ mailinglist_post_stats = get_mailing_list_post_stats(
+ prior_version.release_date, version.release_date
+ )
+ new_subscribers_stats = get_new_subscribers_stats(
+ prior_version.release_date, version.release_date
+ )
library_index_library_data = []
for library in self._get_libraries_by_quality():
library_index_library_data.append(
@@ -804,6 +814,8 @@ class CreateReportForm(CreateReportFullForm):
"mailinglist_total": total_mailinglist_count or 0,
"mailinglist_contributor_release_count": mailinglist_contributor_release_count, # noqa: E501
"mailinglist_contributor_new_count": mailinglist_contributor_new_count,
+ "mailinglist_post_stats": mailinglist_post_stats,
+ "mailinglist_new_subscribers_stats": new_subscribers_stats,
"commit_contributors_release_count": commit_contributors_release_count,
"commit_contributors_new_count": commit_contributors_new_count,
"global_contributors_new_count": len(
diff --git a/libraries/management/commands/release_tasks.py b/libraries/management/commands/release_tasks.py
index 986105d8..75504742 100644
--- a/libraries/management/commands/release_tasks.py
+++ b/libraries/management/commands/release_tasks.py
@@ -82,6 +82,7 @@ class ReleaseTasksManager:
ReleaseTask("Updating github issues", ["update_issues"]),
ReleaseTask("Updating slack activity buckets", ["fetch_slack_activity"]),
ReleaseTask("Updating website statistics", self.update_website_statistics),
+ ReleaseTask("Importing mailing list counts", ["import_ml_counts"]),
ReleaseTask("Generating report", self.generate_report),
]
diff --git a/mailing_list/admin.py b/mailing_list/admin.py
index 5ac81a5e..778b9d4a 100644
--- a/mailing_list/admin.py
+++ b/mailing_list/admin.py
@@ -1,11 +1,21 @@
+import csv
+import logging
+import re
+from datetime import datetime
+from io import TextIOWrapper
+
+from django import forms
+from django.shortcuts import redirect, render
from django.urls import path
from django.http import HttpResponseRedirect
from django.contrib import admin, messages
from django.conf import settings
-from mailing_list.models import EmailData
+from mailing_list.models import EmailData, SubscriptionData
from mailing_list.tasks import sync_mailinglist_stats
+logger = logging.getLogger(__name__)
+
@admin.register(EmailData)
class EmailDataAdmin(admin.ModelAdmin):
@@ -43,3 +53,62 @@ class EmailDataAdmin(admin.ModelAdmin):
def has_add_permission(self, request):
return False
+
+
+class SubscribesCSVForm(forms.Form):
+ csv_file = forms.FileField()
+
+
+@admin.register(SubscriptionData)
+class SubscriptionDataAdmin(admin.ModelAdmin):
+ list_display = ["subscription_dt", "email"]
+ search_fields = ["email"]
+ change_list_template = "admin/mailinglist_change_list.html"
+
+ email_regex = re.compile("([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})")
+
+ def get_urls(self):
+ return [
+ path("import-csv", self.import_csv, name="import_csv")
+ ] + super().get_urls()
+
+ def parse_rows(self, reader):
+ for row in reader:
+ date_str = " ".join(row[0:4])
+ try:
+ dt = datetime.strptime(date_str, "%b %d %H:%M:%S %Y")
+ except ValueError:
+ logger.error(f"Error parsing date {date_str} from {row=}")
+ dt = None
+ # re-merge, the email address isn't always in a consistent position
+ email_matches = re.search(self.email_regex, " ".join(row[6:]))
+ email = email_matches.group(0) if email_matches else None
+ entry_type = row[6]
+ # only save confirmed subscriber entries, it's all we need for now
+ if entry_type != "new":
+ continue
+ if not email:
+ logger.error(
+ f"Invalid email {row=} {email_matches=} {' '.join(row[6:])=}"
+ )
+ continue
+ yield SubscriptionData(
+ email=email,
+ entry_type=entry_type,
+ list=row[5].rstrip(":-1"),
+ subscription_dt=dt,
+ )
+
+ def import_csv(self, request):
+ if request.method == "POST":
+ csv_file = request.FILES["csv_file"]
+ rows = TextIOWrapper(csv_file, encoding="ISO-8859-1", newline="")
+ reader = csv.reader(rows, delimiter=" ")
+ SubscriptionData.objects.bulk_create(
+ self.parse_rows(reader), batch_size=500, ignore_conflicts=True
+ )
+ self.message_user(request, "Subscribe CSV file imported.")
+ return redirect("..")
+
+ payload = {"form": SubscribesCSVForm()}
+ return render(request, "admin/mailinglist_subscribe_csv_form.html", payload)
diff --git a/mailing_list/constants.py b/mailing_list/constants.py
new file mode 100644
index 00000000..7ca5ad7a
--- /dev/null
+++ b/mailing_list/constants.py
@@ -0,0 +1,41 @@
+# we only want boost devel for now, leaving the others in case that changes.
+ML_STATS_URLS = [
+ "https://lists.boost.org/Archives/boost/{:04}/{:02}/author.php",
+ # "https://lists.boost.org/boost-users/{:04}/{:02}/author.php",
+ # "https://lists.boost.org/boost-announce/{:04}/{:02}/author.php",
+]
+ARG_DATE_REGEX = r"^([0-9]+)(?:$|(?:-|/)([0-9]+)(?:$|(?:-|/)([0-9]+)$))"
+AUTHOR_PATTERN_REGEX = r"
(.*)"
+DATE_PATTERN_REGEX = r".*\((\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\)"
+
+# used to map latin-1 characters to their utf-8 equivalents in the mailing list
+# page html parser
+LATIN_1_EQUIVS = {
+ 8364: 128,
+ 8218: 130,
+ 402: 131,
+ 8222: 132,
+ 8230: 133,
+ 8224: 134,
+ 8225: 135,
+ 710: 136,
+ 8240: 137,
+ 352: 138,
+ 8249: 139,
+ 338: 140,
+ 381: 142,
+ 8216: 145,
+ 8217: 146,
+ 8220: 147,
+ 8221: 148,
+ 8226: 149,
+ 8211: 150,
+ 8212: 151,
+ 732: 152,
+ 8482: 153,
+ 353: 154,
+ 8250: 155,
+ 339: 156,
+ 382: 158,
+ 376: 159,
+}
diff --git a/mailing_list/management/commands/import_ml_counts.py b/mailing_list/management/commands/import_ml_counts.py
new file mode 100644
index 00000000..6afaaa8f
--- /dev/null
+++ b/mailing_list/management/commands/import_ml_counts.py
@@ -0,0 +1,133 @@
+# Copyright 2024 Dave O'Connor
+# Derived from code by Joaquin M Lopez Munoz.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+import djclick as click
+import logging
+import re
+import warnings
+from datetime import timedelta, datetime
+import html
+
+from dateutil.relativedelta import relativedelta
+from unidecode import unidecode
+
+import requests
+
+from mailing_list.constants import (
+ ML_STATS_URLS,
+ LATIN_1_EQUIVS,
+ ARG_DATE_REGEX,
+ AUTHOR_PATTERN_REGEX,
+ DATE_PATTERN_REGEX,
+)
+from mailing_list.models import PostingData
+
+logger = logging.getLogger(__name__)
+
+arg_date_pattern = re.compile(ARG_DATE_REGEX)
+author_pattern = re.compile(AUTHOR_PATTERN_REGEX)
+date_pattern = re.compile(DATE_PATTERN_REGEX)
+
+
+def decode_broken_html(str):
+ def latin_1_ord(char):
+ n = ord(char)
+ return LATIN_1_EQUIVS.get(n, n)
+
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+ return unidecode(
+ bytearray(map(latin_1_ord, html.unescape(str))).decode("utf-8", "ignore")
+ )
+
+
+def parse_start_datetime(date_str):
+ m = arg_date_pattern.match(date_str)
+ if not m:
+ raise ValueError("wrong date format")
+ logger.info(f"{m=} {m.group(1)=} {m.group(2)=} {m.group(3)=}")
+ return datetime(
+ int(m.group(3)) if m.group(3) else 1,
+ int(m.group(2)) if m.group(2) else 1,
+ int(m.group(1)),
+ 0,
+ 0,
+ 0,
+ )
+
+
+def parse_end_datetime(date_str):
+ m = arg_date_pattern.match(date_str)
+ if not m:
+ raise ValueError("wrong date format")
+ logger.info(f"{m=} {m.group(1)=} {m.group(2)=} {m.group(3)=}")
+ if m.group(2):
+ if m.group(3):
+ return datetime(
+ int(m.group(3)), int(m.group(2)), int(m.group(1)), 23, 59, 59
+ )
+ else:
+ return (
+ datetime(int(m.group(1)), int(m.group(2)), 1) + timedelta(days=31),
+ 23,
+ 59,
+ 59,
+ ).replace(day=1) - timedelta(days=1)
+ return datetime(int(m.group(1)), 12, 31, 23, 59, 59)
+
+
+def retrieve_authors_from_ml(url, start_date, end_date):
+ posts = []
+ logger.info(f"Retrieving data from {url=}.")
+ r = requests.get(url)
+ if r.status_code == 404:
+ return posts
+
+ author = None
+ for line in r.text.splitlines():
+ author_match = author_pattern.match(line)
+ if author_match:
+ # needs multiple passes to work
+ author = decode_broken_html(author_match.group(1))
+ else:
+ date_pattern_match = date_pattern.match(line)
+ if author and date_pattern_match:
+ post_date = datetime.strptime(
+ date_pattern_match.group(1), "%Y-%m-%d %H:%M:%S"
+ )
+ if start_date <= post_date and post_date <= end_date:
+ posts.append(PostingData(name=author, post_time=post_date))
+ return posts
+
+
+def retrieve_authors(start_date, end_date):
+ logger.info(f"retrieve_authors from {start_date=} to {end_date=}")
+ start_month = datetime(start_date.year, start_date.month, 1)
+ end_month = datetime(end_date.year, end_date.month, 1)
+ authors = []
+ while start_month <= end_month:
+ for ml in ML_STATS_URLS:
+ authors += retrieve_authors_from_ml(
+ ml.format(start_month.year, start_month.month), start_date, end_date
+ )
+ start_month = start_month + relativedelta(months=+1)
+ PostingData.objects.filter(
+ post_time__gte=start_date, post_time__lte=end_date
+ ).delete()
+ PostingData.objects.bulk_create(authors)
+
+
+@click.command()
+@click.option("--start_date", is_flag=False, help="Start Date", default=None)
+@click.option("--end_date", is_flag=False, help="End Date", default=None)
+def command(start_date, end_date):
+ logger.info(f"Starting import_ml_counts {start_date=} {end_date=}")
+ start_date = (
+ parse_start_datetime(start_date) if start_date else datetime(1998, 11, 11)
+ )
+ logger.info(f"{start_date=}")
+ end_date = parse_end_datetime(end_date) if end_date else datetime.now()
+ logger.info(f"{end_date=}")
+ retrieve_authors(start_date, end_date)
diff --git a/mailing_list/migrations/0005_postingdata_subscriptiondata.py b/mailing_list/migrations/0005_postingdata_subscriptiondata.py
new file mode 100644
index 00000000..8f8d55c1
--- /dev/null
+++ b/mailing_list/migrations/0005_postingdata_subscriptiondata.py
@@ -0,0 +1,52 @@
+# Generated by Django 4.2.16 on 2025-03-20 18:02
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("mailing_list", "0004_initial"),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name="PostingData",
+ fields=[
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ ("name", models.CharField(max_length=255)),
+ ("post_time", models.DateTimeField()),
+ ("created", models.DateTimeField(auto_now_add=True)),
+ ],
+ ),
+ migrations.CreateModel(
+ name="SubscriptionData",
+ fields=[
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ ("subscription_dt", models.DateTimeField()),
+ ("email", models.EmailField(max_length=255)),
+ ("entry_type", models.CharField(max_length=24)),
+ ("list", models.CharField(max_length=24)),
+ ("created", models.DateTimeField(auto_now_add=True)),
+ ],
+ options={
+ "unique_together": {("subscription_dt", "email", "list")},
+ },
+ ),
+ ]
diff --git a/mailing_list/models.py b/mailing_list/models.py
index 5e666540..cd8a1f3a 100644
--- a/mailing_list/models.py
+++ b/mailing_list/models.py
@@ -35,3 +35,25 @@ class EmailData(models.Model):
def __str__(self):
return self.author.name
+
+
+class PostingData(models.Model):
+ name = models.CharField(max_length=255)
+ post_time = models.DateTimeField()
+
+ created = models.DateTimeField(auto_now_add=True)
+
+ def __str__(self):
+ return f"{self.name} {self.post_time}"
+
+
+class SubscriptionData(models.Model):
+ subscription_dt = models.DateTimeField()
+ email = models.EmailField(max_length=255)
+ entry_type = models.CharField(max_length=24)
+ list = models.CharField(max_length=24)
+
+ created = models.DateTimeField(auto_now_add=True)
+
+ class Meta:
+ unique_together = ["subscription_dt", "email", "list"]
diff --git a/reports/generation.py b/reports/generation.py
index 197cddd5..2b77c8c8 100644
--- a/reports/generation.py
+++ b/reports/generation.py
@@ -1,17 +1,24 @@
import base64
import io
+import logging
import random
+from datetime import datetime
import psycopg2
from django.conf import settings
+from django.db.models import Count
+from django.db.models.functions import ExtractWeek, ExtractIsoYear
from matplotlib import pyplot as plt
from wordcloud import WordCloud, STOPWORDS
from core.models import SiteSettings
from libraries.models import WordcloudMergeWord # TODO: move model to this app
+from mailing_list.models import PostingData, SubscriptionData
from reports.constants import WORDCLOUD_FONT
from versions.models import Version
+logger = logging.getLogger(__name__)
+
def generate_wordcloud(version: Version) -> tuple[str | None, list]:
"""Generates a wordcloud png and returns it as a base64 string and word frequencies.
@@ -25,7 +32,7 @@ def generate_wordcloud(version: Version) -> tuple[str | None, list]:
width=1400,
height=700,
stopwords=STOPWORDS | SiteSettings.load().wordcloud_ignore_set,
- font_path=settings.STATIC_ROOT / "font" / WORDCLOUD_FONT,
+ font_path=f"{settings.STATIC_ROOT}/font/{WORDCLOUD_FONT}",
)
word_frequencies = {}
for content in get_mail_content(version):
@@ -110,3 +117,40 @@ def get_mail_content(version: Version):
)
for [content] in cursor:
yield content
+
+
+def get_mailing_list_post_stats(start_date: datetime, end_date: datetime):
+ logger.info(f"from {start_date} to {end_date}")
+ data = (
+ PostingData.objects.filter(post_time__gt=start_date, post_time__lte=end_date)
+ .annotate(week=ExtractWeek("post_time"), iso_year=ExtractIsoYear("post_time"))
+ .values("week")
+ .annotate(count=Count("id"))
+ .order_by("iso_year", "week")
+ )
+ return [{"y": s.get("count"), "x": s.get("week")} for s in data]
+
+
+def get_new_subscribers_stats(start_date: datetime, end_date: datetime):
+ data = (
+ SubscriptionData.objects.filter(
+ subscription_dt__gte=start_date,
+ subscription_dt__lte=end_date,
+ list="boost",
+ )
+ .annotate(
+ week=ExtractWeek("subscription_dt"),
+ iso_year=ExtractIsoYear("subscription_dt"),
+ )
+ .values("week", "list")
+ .annotate(count=Count("id"))
+ .order_by("iso_year", "week")
+ )
+
+ formatted_data = [{"x": s.get("week"), "y": s.get("count")} for s in data]
+ referenced_weeks = [x.get("week") for x in data]
+ # account for weeks that no data is retrieved
+ for w in range(start_date.isocalendar().week, end_date.isocalendar().week + 1):
+ if w not in referenced_weeks:
+ formatted_data.append({"x": w, "y": 0})
+ return formatted_data
diff --git a/requirements.in b/requirements.in
index 0a9c964e..363a21e6 100644
--- a/requirements.in
+++ b/requirements.in
@@ -31,6 +31,7 @@ wheel
cryptography
boto3
jsoncomment
+unidecode
wordcloud
# Logging
diff --git a/requirements.txt b/requirements.txt
index 54235d6e..acdb7513 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -371,6 +371,8 @@ tzdata==2024.2
# via
# celery
# kombu
+unidecode==1.3.8
+ # via -r ./requirements.in
urllib3==1.26.20
# via
# botocore
diff --git a/templates/admin/mailinglist_change_list.html b/templates/admin/mailinglist_change_list.html
index 744589d3..62623527 100644
--- a/templates/admin/mailinglist_change_list.html
+++ b/templates/admin/mailinglist_change_list.html
@@ -1,11 +1,13 @@
-{% extends "admin/change_list.html" %}
+{% extends 'admin/change_list.html' %}
{% load i18n admin_urls %}
{% block object-tools %}
-
+
+
{% endblock %}
diff --git a/templates/admin/mailinglist_subscribe_csv_form.html b/templates/admin/mailinglist_subscribe_csv_form.html
new file mode 100644
index 00000000..c9e7d093
--- /dev/null
+++ b/templates/admin/mailinglist_subscribe_csv_form.html
@@ -0,0 +1,15 @@
+{% extends "admin/base_site.html" %}
+{% load static %}
+{% block content %}
+ {{ block.super }}
+
+
Upload Subscribe File
+
+
+
+
+{% endblock content %}
diff --git a/templates/admin/release_report_detail.html b/templates/admin/release_report_detail.html
index 5929921c..20327ba9 100644
--- a/templates/admin/release_report_detail.html
+++ b/templates/admin/release_report_detail.html
@@ -30,6 +30,14 @@ body {
.committee_members img {
filter: grayscale(1);
}
+#top-committed-libraries-chart .apexcharts-xaxis-label:nth-child(odd) {
+ transform: translateY(-8px);
+}
+
+#top-committed-libraries-chart .apexcharts-xaxis-label:nth-child(even) {
+ transform: translateY(8px);
+}
+
{% endblock css %}
{% block content %}
@@ -219,7 +227,7 @@ body {
-
Top Contributors
+
Top Contributors
{% for item in mailinglist_counts %}
@@ -249,21 +257,22 @@ body {
poster{{ mailinglist_contributor_release_count|pluralize }}
in this version. ({{ mailinglist_contributor_new_count }} New)
+
Weekly mailing list posts from {{prior_version.release_date}} to {{version.release_date}} on the Boost Developers mailing list.
+
- {% if wordcloud_base64 %}
-
-
-
Mailing List Word Cloud
-
-

-
-
+
+
+
Mailing List New Subscribers
+
Mailing list new subscribers from from {{prior_version.release_date}} to {{version.release_date}} on the Boost Developers mailing list.
+
- {% endif %}
+
+
+
Mailing List Top 200 Most Frequently Used Words
@@ -276,6 +285,16 @@ body {
+ {% if wordcloud_base64 %}
+
+
+
Mailing List Word Cloud
+
+

+
+
+
+ {% endif %}
{% if slack %}
{% for slack_channel_group in slack_channels %}
@@ -441,11 +460,7 @@ body {
{% endwith %}
-