Correct slack activity miscount from fetch_slack_activity (#2056)

This commit is contained in:
daveoconnor
2026-01-12 14:46:49 -08:00
committed by GitHub
parent d24e46ac12
commit 64e113835f
7 changed files with 134 additions and 9 deletions

View File

@@ -16,6 +16,7 @@
- [`update_library_version_dependencies`](#update_library_version_dependencies) - [`update_library_version_dependencies`](#update_library_version_dependencies)
- [`release_tasks`](#release_tasks) - [`release_tasks`](#release_tasks)
- [`refresh_users_github_photos`](#refresh_users_github_photos) - [`refresh_users_github_photos`](#refresh_users_github_photos)
- [`clear_slack_activity`](#clear_slack_activity)
## `boost_setup` ## `boost_setup`
@@ -356,3 +357,36 @@ Preview which users would be updated:
- Calls the `refresh_users_github_photos()` Celery task which queues photo updates for all users with GitHub usernames - Calls the `refresh_users_github_photos()` Celery task which queues photo updates for all users with GitHub usernames
- With `--dry-run`, displays information about which users would be updated without making any changes - With `--dry-run`, displays information about which users would be updated without making any changes
## `clear_slack_activity`
**Purpose**: Delete all slack activity tracking data from the database. This command removes all records from the `SlackActivityBucket` and `ChannelUpdateGap` tables, and resets the `last_update_ts` field to "0" for all channels. This is useful for resetting the slack activity tracking system to its initial state.
**Example**
```bash
./manage.py clear_slack_activity --confirm
```
**Options**
| Options | Format | Description |
|--------------|--------|----------------------------------------------------------------------------------------------|
| `--confirm` | bool | Required flag to confirm deletion. The command will not execute without this flag. |
**Usage Examples**
Execute the deletion:
```bash
./manage.py clear_slack_activity --confirm
```
**Process**
- Deletes all `SlackActivityBucket` records (message counts per user per channel per day)
- Deletes all `ChannelUpdateGap` records (tracking of message fetch progress)
- Resets `last_update_ts` to "0" for all `Channel` records
- All operations are performed within a database transaction to ensure atomicity
- Logs the number of records affected in each table
**Warning**: This command permanently deletes all slack activity data. Use with caution.

View File

@@ -0,0 +1,25 @@
# Generated by Django 5.2.8 on 2026-01-06 01:11
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("mailing_list", "0005_postingdata_subscriptiondata"),
]
operations = [
migrations.CreateModel(
name="ListPosting",
fields=[
("id", models.IntegerField(primary_key=True, serialize=False)),
("date", models.DateTimeField()),
("sender_id", models.CharField()),
],
options={
"db_table": "hyperkitty_email",
"managed": False,
},
),
]

View File

@@ -1,8 +1,12 @@
def set_trace(): def set_trace():
import socket
import struct
import pydevd_pycharm import pydevd_pycharm
# this ip address is for the gateway IP, equivalent to host.docker.internal which with open("/proc/net/route") as f:
# isn't available on all platforms for line in f.readlines()[1:]:
gateway_ip = "172.17.0.1" p = line.split()
# Use the same port number configured in PyCharm if p and p[1] == "00000000":
pydevd_pycharm.settrace(host=gateway_ip, port=12345, suspend=False) gw = socket.inet_ntoa(struct.pack("<L", int(p[2], 16)))
break
pydevd_pycharm.settrace(host=gw, port=12345, suspend=False)

View File

@@ -1,3 +1,3 @@
-c requirements.txt -c requirements.txt
django-debug-toolbar django-debug-toolbar
pydevd-pycharm==252.27397.106 # pinned to appropriate version for current pycharm pydevd-pycharm==253.29346.142 # pinned to appropriate version for current pycharm

View File

@@ -10,7 +10,7 @@ django==5.2.8
# django-debug-toolbar # django-debug-toolbar
django-debug-toolbar==6.1.0 django-debug-toolbar==6.1.0
# via -r ./requirements-dev.in # via -r ./requirements-dev.in
pydevd-pycharm==252.27397.106 pydevd-pycharm==253.29346.142
# via -r ./requirements-dev.in # via -r ./requirements-dev.in
sqlparse==0.5.3 sqlparse==0.5.3
# via # via

View File

@@ -0,0 +1,51 @@
import logging
import djclick as click
from django.db import transaction
from slack.models import (
SlackActivityBucket,
Channel,
ChannelUpdateGap,
)
logger = logging.getLogger(__name__)
@click.command()
@click.option(
"--confirm",
is_flag=True,
help="Confirm deletion of all slack activity data.",
)
def command(confirm):
"""
Delete all records in SlackActivityBucket and ChannelUpdateGap tables,
and set last_update_ts to "0" for all Channels.
WARNING: This will delete all slack activity tracking data and reset
all channels to their initial state. Use with caution.
"""
if not confirm:
logger.error(
"This command will delete ALL slack activity data. "
"Use --confirm flag to proceed."
)
return
with transaction.atomic():
activity_count = SlackActivityBucket.objects.count()
gap_count = ChannelUpdateGap.objects.count()
channel_count = Channel.objects.count()
logger.info(f"Deleting {activity_count:,} SlackActivityBucket records...")
SlackActivityBucket.objects.all().delete()
logger.info(f"Deleting {gap_count:,} ChannelUpdateGap records...")
ChannelUpdateGap.objects.all().delete()
logger.info(f"Resetting last_update_ts for {channel_count:,} Channels...")
Channel.objects.all().update(last_update_ts="0")
logger.info("Successfully cleared all slack activity data.")

View File

@@ -51,6 +51,8 @@ def channel_messages_in_range(channel, oldest, latest):
inclusive=False, inclusive=False,
) )
for page in pages: for page in pages:
# rate-limit to prevent 429 responses
time.sleep(1)
yield page["messages"] yield page["messages"]
@@ -108,22 +110,31 @@ def fill_channel_gap(gap: ChannelUpdateGap, debug: bool):
logger.info( logger.info(
f"Fetching channel history for {gap.channel.name} ({gap.channel.id}) " f"Fetching channel history for {gap.channel.name} ({gap.channel.id}) "
f"in range ({gap.oldest_message_ts}, {gap.newest_message_ts})" f"in range ({gap.oldest_message_ts}, {gap.newest_message_ts})"
f"({parse_ts(gap.oldest_message_ts)}Z to {parse_ts(gap.oldest_message_ts)}Z)"
) )
pages = channel_messages_in_range( pages = channel_messages_in_range(
channel=gap.channel.id, channel=gap.channel.id,
latest=gap.newest_message_ts, latest=gap.newest_message_ts,
oldest=gap.oldest_message_ts, oldest=gap.oldest_message_ts,
) )
first = True
# pages contain a grouping of 100 messages, oldest 100 returned first
for page in pages: for page in pages:
# use a separate transaction per page to allow restoring from an # use a separate transaction per page to allow restoring from an
# interrupted run. # interrupted run.
with transaction.atomic(): with transaction.atomic():
# messages within a page of 100 however are newest first, so we need to update the channel on the first
# message to have the future ranges retrieved without overlap
first = True
for message in page: for message in page:
if first and gap.newest_message_ts is None: readable_dt = parse_ts(message["ts"])
if first:
gap.channel.last_update_ts = message["ts"] gap.channel.last_update_ts = message["ts"]
msg = f"saving {readable_dt}Z as last_update_ts for channel"
logger.debug(msg)
gap.channel.save() gap.channel.save()
first = False first = False
logger.debug(f"next message ts {readable_dt}Z")
# Shrink the gap, but no need to save until we've finished this # Shrink the gap, but no need to save until we've finished this
# page (transactionally). # page (transactionally).
gap.newest_message_ts = message["ts"] gap.newest_message_ts = message["ts"]