mirror of
https://github.com/boostorg/website-v2.git
synced 2026-01-19 04:42:17 +00:00
Updated import_ml_counts to pull data from hyperkitty db (#2054)
This commit is contained in:
1
.github/workflows/actions-gcp.yaml
vendored
1
.github/workflows/actions-gcp.yaml
vendored
@@ -68,6 +68,7 @@ jobs:
|
|||||||
- name: Test with pytest
|
- name: Test with pytest
|
||||||
env:
|
env:
|
||||||
DATABASE_URL: "postgres://postgres:postgres@localhost:${{ job.services.postgres.ports[5432] }}/postgres"
|
DATABASE_URL: "postgres://postgres:postgres@localhost:${{ job.services.postgres.ports[5432] }}/postgres"
|
||||||
|
HYPERKITTY_DATABASE_URL: "postgres://postgres:postgres@localhost:${{ job.services.postgres.ports[5432] }}/lists_production_web"
|
||||||
SECRET_KEY: "for-testing-only"
|
SECRET_KEY: "for-testing-only"
|
||||||
REDIS_HOST: "localhost"
|
REDIS_HOST: "localhost"
|
||||||
CI: "true"
|
CI: "true"
|
||||||
|
|||||||
1
.github/workflows/actions.yml
vendored
1
.github/workflows/actions.yml
vendored
@@ -58,6 +58,7 @@ jobs:
|
|||||||
- name: Test with pytest
|
- name: Test with pytest
|
||||||
env:
|
env:
|
||||||
DATABASE_URL: "postgres://postgres:postgres@localhost:${{ job.services.postgres.ports[5432] }}/postgres"
|
DATABASE_URL: "postgres://postgres:postgres@localhost:${{ job.services.postgres.ports[5432] }}/postgres"
|
||||||
|
HYPERKITTY_DATABASE_URL: "postgres://postgres:postgres@localhost:${{ job.services.postgres.ports[5432] }}/lists_production_web"
|
||||||
SECRET_KEY: "for-testing-only"
|
SECRET_KEY: "for-testing-only"
|
||||||
REDIS_HOST: "localhost"
|
REDIS_HOST: "localhost"
|
||||||
CI: "true"
|
CI: "true"
|
||||||
|
|||||||
@@ -189,7 +189,10 @@ WSGI_APPLICATION = "config.wsgi.application"
|
|||||||
# https://docs.djangoproject.com/en/1.10/ref/settings/#databases
|
# https://docs.djangoproject.com/en/1.10/ref/settings/#databases
|
||||||
|
|
||||||
try:
|
try:
|
||||||
DATABASES = {"default": env.dj_db_url("DATABASE_URL")}
|
DATABASES = {
|
||||||
|
"default": env.dj_db_url("DATABASE_URL"),
|
||||||
|
"hyperkitty": env.dj_db_url("HYPERKITTY_DATABASE_URL"),
|
||||||
|
}
|
||||||
except (ImproperlyConfigured, environs.EnvError):
|
except (ImproperlyConfigured, environs.EnvError):
|
||||||
DATABASES = {
|
DATABASES = {
|
||||||
"default": {
|
"default": {
|
||||||
@@ -201,7 +204,17 @@ except (ImproperlyConfigured, environs.EnvError):
|
|||||||
"USER": env("PGUSER"),
|
"USER": env("PGUSER"),
|
||||||
"CONN_MAX_AGE": 0,
|
"CONN_MAX_AGE": 0,
|
||||||
"OPTIONS": {"MAX_CONNS": env("MAX_CONNECTIONS", default=20)},
|
"OPTIONS": {"MAX_CONNS": env("MAX_CONNECTIONS", default=20)},
|
||||||
}
|
},
|
||||||
|
"hyperkitty": {
|
||||||
|
"ENGINE": "django_db_geventpool.backends.postgresql_psycopg2",
|
||||||
|
"HOST": env("PGHOST"),
|
||||||
|
"NAME": env("HYPERKITTY_DATABASE_NAME", default=""),
|
||||||
|
"PASSWORD": env("PGPASSWORD"),
|
||||||
|
"PORT": env.int("PGPORT", default=5432),
|
||||||
|
"USER": env("PGUSER"),
|
||||||
|
"CONN_MAX_AGE": 0,
|
||||||
|
"OPTIONS": {"MAX_CONNS": env("MAX_CONNECTIONS", default=20)},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
# Password validation
|
# Password validation
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ PROD_MEDIA_CONTENT_AWS_S3_ENDPOINT_URL=$STATIC_CONTENT_AWS_S3_ENDPOINT_URL
|
|||||||
# Mailman database settings
|
# Mailman database settings
|
||||||
HYPERKITTY_DATABASE_NAME="lists_production_web"
|
HYPERKITTY_DATABASE_NAME="lists_production_web"
|
||||||
DATABASE_URL="postgresql://postgres@db:5432/postgres"
|
DATABASE_URL="postgresql://postgres@db:5432/postgres"
|
||||||
|
HYPERKITTY_DATABASE_URL="postgresql://postgres@db:5432/lists_production_web"
|
||||||
DATABASE_TYPE="postgres"
|
DATABASE_TYPE="postgres"
|
||||||
DATABASE_CLASS="mailman.database.postgresql.PostgreSQLDatabase"
|
DATABASE_CLASS="mailman.database.postgresql.PostgreSQLDatabase"
|
||||||
HYPERKITTY_API_KEY="changeme!"
|
HYPERKITTY_API_KEY="changeme!"
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from django.http import HttpResponseRedirect
|
|||||||
from django.contrib import admin, messages
|
from django.contrib import admin, messages
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
from mailing_list.models import EmailData, SubscriptionData
|
from mailing_list.models import EmailData, SubscriptionData, ListPosting
|
||||||
from mailing_list.tasks import sync_mailinglist_stats
|
from mailing_list.tasks import sync_mailinglist_stats
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -112,3 +112,18 @@ class SubscriptionDataAdmin(admin.ModelAdmin):
|
|||||||
|
|
||||||
payload = {"form": SubscribesCSVForm()}
|
payload = {"form": SubscribesCSVForm()}
|
||||||
return render(request, "admin/mailinglist_subscribe_csv_form.html", payload)
|
return render(request, "admin/mailinglist_subscribe_csv_form.html", payload)
|
||||||
|
|
||||||
|
|
||||||
|
@admin.register(ListPosting)
|
||||||
|
class ListPostingAdmin(admin.ModelAdmin):
|
||||||
|
list_display = ["id", "date", "sender_id"]
|
||||||
|
search_fields = ["sender_id"]
|
||||||
|
|
||||||
|
def has_add_permission(self, request):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def has_change_permission(self, request, obj=None):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def has_delete_permission(self, request, obj=None):
|
||||||
|
return False
|
||||||
|
|||||||
@@ -6,23 +6,15 @@
|
|||||||
import djclick as click
|
import djclick as click
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import warnings
|
|
||||||
from datetime import timedelta, datetime
|
from datetime import timedelta, datetime
|
||||||
import html
|
|
||||||
|
|
||||||
from dateutil.relativedelta import relativedelta
|
|
||||||
from unidecode import unidecode
|
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
from mailing_list.constants import (
|
from mailing_list.constants import (
|
||||||
ML_STATS_URLS,
|
|
||||||
LATIN_1_EQUIVS,
|
|
||||||
ARG_DATE_REGEX,
|
ARG_DATE_REGEX,
|
||||||
AUTHOR_PATTERN_REGEX,
|
AUTHOR_PATTERN_REGEX,
|
||||||
DATE_PATTERN_REGEX,
|
DATE_PATTERN_REGEX,
|
||||||
)
|
)
|
||||||
from mailing_list.models import PostingData
|
from mailing_list.models import PostingData, ListPosting
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -31,18 +23,6 @@ author_pattern = re.compile(AUTHOR_PATTERN_REGEX)
|
|||||||
date_pattern = re.compile(DATE_PATTERN_REGEX)
|
date_pattern = re.compile(DATE_PATTERN_REGEX)
|
||||||
|
|
||||||
|
|
||||||
def decode_broken_html(str):
|
|
||||||
def latin_1_ord(char):
|
|
||||||
n = ord(char)
|
|
||||||
return LATIN_1_EQUIVS.get(n, n)
|
|
||||||
|
|
||||||
with warnings.catch_warnings():
|
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
return unidecode(
|
|
||||||
bytearray(map(latin_1_ord, html.unescape(str))).decode("utf-8", "ignore")
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_datetime(date_str: str, is_start: bool) -> datetime:
|
def parse_datetime(date_str: str, is_start: bool) -> datetime:
|
||||||
"""
|
"""
|
||||||
Parse a date string (YYYY, YYYY-MM, YYYY-MM-DD) into a datetime object.
|
Parse a date string (YYYY, YYYY-MM, YYYY-MM-DD) into a datetime object.
|
||||||
@@ -75,41 +55,12 @@ def parse_datetime(date_str: str, is_start: bool) -> datetime:
|
|||||||
return datetime(year, month, day, 23, 59, 59)
|
return datetime(year, month, day, 23, 59, 59)
|
||||||
|
|
||||||
|
|
||||||
def retrieve_authors_from_ml(url, start_date, end_date):
|
|
||||||
posts = []
|
|
||||||
logger.info(f"Retrieving data from {url=}.")
|
|
||||||
r = requests.get(url)
|
|
||||||
if r.status_code == 404:
|
|
||||||
return posts
|
|
||||||
|
|
||||||
author = None
|
|
||||||
for line in r.text.splitlines():
|
|
||||||
author_match = author_pattern.match(line)
|
|
||||||
if author_match:
|
|
||||||
# needs multiple passes to work
|
|
||||||
author = decode_broken_html(author_match.group(1))
|
|
||||||
else:
|
|
||||||
date_pattern_match = date_pattern.match(line)
|
|
||||||
if author and date_pattern_match:
|
|
||||||
post_date = datetime.strptime(
|
|
||||||
date_pattern_match.group(1), "%Y-%m-%d %H:%M:%S"
|
|
||||||
)
|
|
||||||
if start_date <= post_date and post_date <= end_date:
|
|
||||||
posts.append(PostingData(name=author, post_time=post_date))
|
|
||||||
return posts
|
|
||||||
|
|
||||||
|
|
||||||
def retrieve_authors(start_date, end_date):
|
def retrieve_authors(start_date, end_date):
|
||||||
logger.info(f"Retrieve_authors from {start_date:%Y-%m-%d} to {end_date:%Y-%m-%d}")
|
logger.info(f"Retrieve_authors from {start_date:%Y-%m-%d} to {end_date:%Y-%m-%d}")
|
||||||
start_month = datetime(start_date.year, start_date.month, 1)
|
|
||||||
end_month = datetime(end_date.year, end_date.month, 1)
|
|
||||||
authors = []
|
authors = []
|
||||||
while start_month <= end_month:
|
for p in ListPosting.objects.filter(date__gte=start_date, date__lte=end_date):
|
||||||
for ml in ML_STATS_URLS:
|
authors.append(PostingData(name=p.sender_id, post_time=p.date))
|
||||||
authors += retrieve_authors_from_ml(
|
|
||||||
ml.format(start_month.year, start_month.month), start_date, end_date
|
|
||||||
)
|
|
||||||
start_month = start_month + relativedelta(months=+1)
|
|
||||||
PostingData.objects.filter(
|
PostingData.objects.filter(
|
||||||
post_time__gte=start_date, post_time__lte=end_date
|
post_time__gte=start_date, post_time__lte=end_date
|
||||||
).delete()
|
).delete()
|
||||||
|
|||||||
@@ -57,3 +57,20 @@ class SubscriptionData(models.Model):
|
|||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
unique_together = ["subscription_dt", "email", "list"]
|
unique_together = ["subscription_dt", "email", "list"]
|
||||||
|
|
||||||
|
|
||||||
|
class ListPostingManager(models.Manager):
|
||||||
|
def get_queryset(self):
|
||||||
|
return super().get_queryset().using("hyperkitty")
|
||||||
|
|
||||||
|
|
||||||
|
class ListPosting(models.Model):
|
||||||
|
id = models.IntegerField(primary_key=True, blank=False, null=False)
|
||||||
|
date = models.DateTimeField(blank=False, null=False)
|
||||||
|
sender_id = models.CharField(blank=False, null=False)
|
||||||
|
|
||||||
|
objects = ListPostingManager()
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
managed = False
|
||||||
|
db_table = "hyperkitty_email"
|
||||||
|
|||||||
Reference in New Issue
Block a user