diff --git a/libraries/management/commands/release_tasks.py b/libraries/management/commands/release_tasks.py index 75504742..d58fa8db 100644 --- a/libraries/management/commands/release_tasks.py +++ b/libraries/management/commands/release_tasks.py @@ -1,6 +1,7 @@ import traceback from contextlib import suppress from dataclasses import dataclass +from datetime import timedelta from typing import Callable import djclick as click @@ -82,13 +83,16 @@ class ReleaseTasksManager: ReleaseTask("Updating github issues", ["update_issues"]), ReleaseTask("Updating slack activity buckets", ["fetch_slack_activity"]), ReleaseTask("Updating website statistics", self.update_website_statistics), - ReleaseTask("Importing mailing list counts", ["import_ml_counts"]), + ReleaseTask("Importing mailing list counts", self.import_ml_counts), ReleaseTask("Generating report", self.generate_report), ] def update_release_data(self) -> dict[str:int]: for task in self.tasks: - self.progress_messages.append(progress_message(f"{task.description}...")) + # "Release Task: " prefix for easy log parsing + self.progress_messages.append( + progress_message(f"Release Task: {task.description}...") + ) task.run() self.progress_messages.append( progress_message(f"Finished {task.description.lower()}") @@ -110,6 +114,15 @@ class ReleaseTasksManager: report, _ = WebsiteStatReport.objects.get_or_create(version=self.latest_version) report.populate_from_api() + def import_ml_counts(self): + """Import counts for the last four months. Should be more than enough, + and saves lots of time vs importing all. + """ + start_date = timezone.now() - timedelta(days=120) + date_string = start_date.strftime("%Y-%m-%d") + print(f"{date_string = }") + call_command("import_ml_counts", start_date=date_string) + def generate_report(self): if not self.should_generate_report: self.progress_messages.append( diff --git a/mailing_list/management/commands/import_ml_counts.py b/mailing_list/management/commands/import_ml_counts.py index 6afaaa8f..df930f08 100644 --- a/mailing_list/management/commands/import_ml_counts.py +++ b/mailing_list/management/commands/import_ml_counts.py @@ -43,39 +43,36 @@ def decode_broken_html(str): ) -def parse_start_datetime(date_str): +def parse_datetime(date_str: str, is_start: bool) -> datetime: + """ + Parse a date string (YYYY, YYYY-MM, YYYY-MM-DD) into a datetime object. + + If is_start=True, returns the earliest time possible for the data given. + If is_start=False, returns the latest time possible for the data given. + """ m = arg_date_pattern.match(date_str) if not m: - raise ValueError("wrong date format") - logger.info(f"{m=} {m.group(1)=} {m.group(2)=} {m.group(3)=}") - return datetime( - int(m.group(3)) if m.group(3) else 1, - int(m.group(2)) if m.group(2) else 1, - int(m.group(1)), - 0, - 0, - 0, - ) + raise ValueError(f"Invalid date format: {date_str!r}") + year_text, month_text, day_text = m.groups() + year = int(year_text) + month = int(month_text) if month_text is not None else (1 if is_start else 12) + day = int(day_text) if day_text is not None else 1 -def parse_end_datetime(date_str): - m = arg_date_pattern.match(date_str) - if not m: - raise ValueError("wrong date format") - logger.info(f"{m=} {m.group(1)=} {m.group(2)=} {m.group(3)=}") - if m.group(2): - if m.group(3): - return datetime( - int(m.group(3)), int(m.group(2)), int(m.group(1)), 23, 59, 59 - ) - else: - return ( - datetime(int(m.group(1)), int(m.group(2)), 1) + timedelta(days=31), - 23, - 59, - 59, - ).replace(day=1) - timedelta(days=1) - return datetime(int(m.group(1)), 12, 31, 23, 59, 59) + if is_start: + # Start date - return start of day + return datetime(year, month, day, 0, 0, 0) + + # End date - return latest datetime possible from given criteria + if day_text is None: + # No day provided: find the last day of the month + first_of_next_month = (datetime(year, month, 1) + timedelta(days=31)).replace( + day=1 + ) + last_day_of_month = first_of_next_month - timedelta(days=1) + day = last_day_of_month.day + + return datetime(year, month, day, 23, 59, 59) def retrieve_authors_from_ml(url, start_date, end_date): @@ -103,7 +100,7 @@ def retrieve_authors_from_ml(url, start_date, end_date): def retrieve_authors(start_date, end_date): - logger.info(f"retrieve_authors from {start_date=} to {end_date=}") + logger.info(f"Retrieve_authors from {start_date:%Y-%m-%d} to {end_date:%Y-%m-%d}") start_month = datetime(start_date.year, start_date.month, 1) end_month = datetime(end_date.year, end_date.month, 1) authors = [] @@ -125,9 +122,11 @@ def retrieve_authors(start_date, end_date): def command(start_date, end_date): logger.info(f"Starting import_ml_counts {start_date=} {end_date=}") start_date = ( - parse_start_datetime(start_date) if start_date else datetime(1998, 11, 11) + parse_datetime(start_date, is_start=True) + if start_date + else datetime(1998, 11, 11) ) - logger.info(f"{start_date=}") - end_date = parse_end_datetime(end_date) if end_date else datetime.now() - logger.info(f"{end_date=}") + logger.info(f"{start_date = }") + end_date = parse_datetime(end_date, is_start=False) if end_date else datetime.now() + logger.info(f"{end_date = }") retrieve_authors(start_date, end_date)