mirror of
https://github.com/boostorg/website-v2.git
synced 2026-01-19 04:42:17 +00:00
Updated import_ml_counts to pull data from hyperkitty db (#2054)
This commit is contained in:
1
.github/workflows/actions-gcp.yaml
vendored
1
.github/workflows/actions-gcp.yaml
vendored
@@ -68,6 +68,7 @@ jobs:
|
||||
- name: Test with pytest
|
||||
env:
|
||||
DATABASE_URL: "postgres://postgres:postgres@localhost:${{ job.services.postgres.ports[5432] }}/postgres"
|
||||
HYPERKITTY_DATABASE_URL: "postgres://postgres:postgres@localhost:${{ job.services.postgres.ports[5432] }}/lists_production_web"
|
||||
SECRET_KEY: "for-testing-only"
|
||||
REDIS_HOST: "localhost"
|
||||
CI: "true"
|
||||
|
||||
1
.github/workflows/actions.yml
vendored
1
.github/workflows/actions.yml
vendored
@@ -58,6 +58,7 @@ jobs:
|
||||
- name: Test with pytest
|
||||
env:
|
||||
DATABASE_URL: "postgres://postgres:postgres@localhost:${{ job.services.postgres.ports[5432] }}/postgres"
|
||||
HYPERKITTY_DATABASE_URL: "postgres://postgres:postgres@localhost:${{ job.services.postgres.ports[5432] }}/lists_production_web"
|
||||
SECRET_KEY: "for-testing-only"
|
||||
REDIS_HOST: "localhost"
|
||||
CI: "true"
|
||||
|
||||
@@ -189,7 +189,10 @@ WSGI_APPLICATION = "config.wsgi.application"
|
||||
# https://docs.djangoproject.com/en/1.10/ref/settings/#databases
|
||||
|
||||
try:
|
||||
DATABASES = {"default": env.dj_db_url("DATABASE_URL")}
|
||||
DATABASES = {
|
||||
"default": env.dj_db_url("DATABASE_URL"),
|
||||
"hyperkitty": env.dj_db_url("HYPERKITTY_DATABASE_URL"),
|
||||
}
|
||||
except (ImproperlyConfigured, environs.EnvError):
|
||||
DATABASES = {
|
||||
"default": {
|
||||
@@ -201,7 +204,17 @@ except (ImproperlyConfigured, environs.EnvError):
|
||||
"USER": env("PGUSER"),
|
||||
"CONN_MAX_AGE": 0,
|
||||
"OPTIONS": {"MAX_CONNS": env("MAX_CONNECTIONS", default=20)},
|
||||
}
|
||||
},
|
||||
"hyperkitty": {
|
||||
"ENGINE": "django_db_geventpool.backends.postgresql_psycopg2",
|
||||
"HOST": env("PGHOST"),
|
||||
"NAME": env("HYPERKITTY_DATABASE_NAME", default=""),
|
||||
"PASSWORD": env("PGPASSWORD"),
|
||||
"PORT": env.int("PGPORT", default=5432),
|
||||
"USER": env("PGUSER"),
|
||||
"CONN_MAX_AGE": 0,
|
||||
"OPTIONS": {"MAX_CONNS": env("MAX_CONNECTIONS", default=20)},
|
||||
},
|
||||
}
|
||||
|
||||
# Password validation
|
||||
|
||||
@@ -35,6 +35,7 @@ PROD_MEDIA_CONTENT_AWS_S3_ENDPOINT_URL=$STATIC_CONTENT_AWS_S3_ENDPOINT_URL
|
||||
# Mailman database settings
|
||||
HYPERKITTY_DATABASE_NAME="lists_production_web"
|
||||
DATABASE_URL="postgresql://postgres@db:5432/postgres"
|
||||
HYPERKITTY_DATABASE_URL="postgresql://postgres@db:5432/lists_production_web"
|
||||
DATABASE_TYPE="postgres"
|
||||
DATABASE_CLASS="mailman.database.postgresql.PostgreSQLDatabase"
|
||||
HYPERKITTY_API_KEY="changeme!"
|
||||
|
||||
@@ -11,7 +11,7 @@ from django.http import HttpResponseRedirect
|
||||
from django.contrib import admin, messages
|
||||
from django.conf import settings
|
||||
|
||||
from mailing_list.models import EmailData, SubscriptionData
|
||||
from mailing_list.models import EmailData, SubscriptionData, ListPosting
|
||||
from mailing_list.tasks import sync_mailinglist_stats
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -112,3 +112,18 @@ class SubscriptionDataAdmin(admin.ModelAdmin):
|
||||
|
||||
payload = {"form": SubscribesCSVForm()}
|
||||
return render(request, "admin/mailinglist_subscribe_csv_form.html", payload)
|
||||
|
||||
|
||||
@admin.register(ListPosting)
|
||||
class ListPostingAdmin(admin.ModelAdmin):
|
||||
list_display = ["id", "date", "sender_id"]
|
||||
search_fields = ["sender_id"]
|
||||
|
||||
def has_add_permission(self, request):
|
||||
return False
|
||||
|
||||
def has_change_permission(self, request, obj=None):
|
||||
return False
|
||||
|
||||
def has_delete_permission(self, request, obj=None):
|
||||
return False
|
||||
|
||||
@@ -6,23 +6,15 @@
|
||||
import djclick as click
|
||||
import logging
|
||||
import re
|
||||
import warnings
|
||||
from datetime import timedelta, datetime
|
||||
import html
|
||||
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from unidecode import unidecode
|
||||
|
||||
import requests
|
||||
|
||||
from mailing_list.constants import (
|
||||
ML_STATS_URLS,
|
||||
LATIN_1_EQUIVS,
|
||||
ARG_DATE_REGEX,
|
||||
AUTHOR_PATTERN_REGEX,
|
||||
DATE_PATTERN_REGEX,
|
||||
)
|
||||
from mailing_list.models import PostingData
|
||||
from mailing_list.models import PostingData, ListPosting
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -31,18 +23,6 @@ author_pattern = re.compile(AUTHOR_PATTERN_REGEX)
|
||||
date_pattern = re.compile(DATE_PATTERN_REGEX)
|
||||
|
||||
|
||||
def decode_broken_html(str):
|
||||
def latin_1_ord(char):
|
||||
n = ord(char)
|
||||
return LATIN_1_EQUIVS.get(n, n)
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
return unidecode(
|
||||
bytearray(map(latin_1_ord, html.unescape(str))).decode("utf-8", "ignore")
|
||||
)
|
||||
|
||||
|
||||
def parse_datetime(date_str: str, is_start: bool) -> datetime:
|
||||
"""
|
||||
Parse a date string (YYYY, YYYY-MM, YYYY-MM-DD) into a datetime object.
|
||||
@@ -75,41 +55,12 @@ def parse_datetime(date_str: str, is_start: bool) -> datetime:
|
||||
return datetime(year, month, day, 23, 59, 59)
|
||||
|
||||
|
||||
def retrieve_authors_from_ml(url, start_date, end_date):
|
||||
posts = []
|
||||
logger.info(f"Retrieving data from {url=}.")
|
||||
r = requests.get(url)
|
||||
if r.status_code == 404:
|
||||
return posts
|
||||
|
||||
author = None
|
||||
for line in r.text.splitlines():
|
||||
author_match = author_pattern.match(line)
|
||||
if author_match:
|
||||
# needs multiple passes to work
|
||||
author = decode_broken_html(author_match.group(1))
|
||||
else:
|
||||
date_pattern_match = date_pattern.match(line)
|
||||
if author and date_pattern_match:
|
||||
post_date = datetime.strptime(
|
||||
date_pattern_match.group(1), "%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
if start_date <= post_date and post_date <= end_date:
|
||||
posts.append(PostingData(name=author, post_time=post_date))
|
||||
return posts
|
||||
|
||||
|
||||
def retrieve_authors(start_date, end_date):
|
||||
logger.info(f"Retrieve_authors from {start_date:%Y-%m-%d} to {end_date:%Y-%m-%d}")
|
||||
start_month = datetime(start_date.year, start_date.month, 1)
|
||||
end_month = datetime(end_date.year, end_date.month, 1)
|
||||
authors = []
|
||||
while start_month <= end_month:
|
||||
for ml in ML_STATS_URLS:
|
||||
authors += retrieve_authors_from_ml(
|
||||
ml.format(start_month.year, start_month.month), start_date, end_date
|
||||
)
|
||||
start_month = start_month + relativedelta(months=+1)
|
||||
for p in ListPosting.objects.filter(date__gte=start_date, date__lte=end_date):
|
||||
authors.append(PostingData(name=p.sender_id, post_time=p.date))
|
||||
|
||||
PostingData.objects.filter(
|
||||
post_time__gte=start_date, post_time__lte=end_date
|
||||
).delete()
|
||||
|
||||
@@ -57,3 +57,20 @@ class SubscriptionData(models.Model):
|
||||
|
||||
class Meta:
|
||||
unique_together = ["subscription_dt", "email", "list"]
|
||||
|
||||
|
||||
class ListPostingManager(models.Manager):
|
||||
def get_queryset(self):
|
||||
return super().get_queryset().using("hyperkitty")
|
||||
|
||||
|
||||
class ListPosting(models.Model):
|
||||
id = models.IntegerField(primary_key=True, blank=False, null=False)
|
||||
date = models.DateTimeField(blank=False, null=False)
|
||||
sender_id = models.CharField(blank=False, null=False)
|
||||
|
||||
objects = ListPostingManager()
|
||||
|
||||
class Meta:
|
||||
managed = False
|
||||
db_table = "hyperkitty_email"
|
||||
|
||||
Reference in New Issue
Block a user