Correct slack activity miscount from fetch_slack_activity (#2056)

This commit is contained in:
daveoconnor
2026-01-12 14:46:49 -08:00
committed by GitHub
parent d24e46ac12
commit 64e113835f
7 changed files with 134 additions and 9 deletions

View File

@@ -0,0 +1,51 @@
import logging
import djclick as click
from django.db import transaction
from slack.models import (
SlackActivityBucket,
Channel,
ChannelUpdateGap,
)
logger = logging.getLogger(__name__)
@click.command()
@click.option(
"--confirm",
is_flag=True,
help="Confirm deletion of all slack activity data.",
)
def command(confirm):
"""
Delete all records in SlackActivityBucket and ChannelUpdateGap tables,
and set last_update_ts to "0" for all Channels.
WARNING: This will delete all slack activity tracking data and reset
all channels to their initial state. Use with caution.
"""
if not confirm:
logger.error(
"This command will delete ALL slack activity data. "
"Use --confirm flag to proceed."
)
return
with transaction.atomic():
activity_count = SlackActivityBucket.objects.count()
gap_count = ChannelUpdateGap.objects.count()
channel_count = Channel.objects.count()
logger.info(f"Deleting {activity_count:,} SlackActivityBucket records...")
SlackActivityBucket.objects.all().delete()
logger.info(f"Deleting {gap_count:,} ChannelUpdateGap records...")
ChannelUpdateGap.objects.all().delete()
logger.info(f"Resetting last_update_ts for {channel_count:,} Channels...")
Channel.objects.all().update(last_update_ts="0")
logger.info("Successfully cleared all slack activity data.")

View File

@@ -51,6 +51,8 @@ def channel_messages_in_range(channel, oldest, latest):
inclusive=False,
)
for page in pages:
# rate-limit to prevent 429 responses
time.sleep(1)
yield page["messages"]
@@ -108,22 +110,31 @@ def fill_channel_gap(gap: ChannelUpdateGap, debug: bool):
logger.info(
f"Fetching channel history for {gap.channel.name} ({gap.channel.id}) "
f"in range ({gap.oldest_message_ts}, {gap.newest_message_ts})"
f"({parse_ts(gap.oldest_message_ts)}Z to {parse_ts(gap.oldest_message_ts)}Z)"
)
pages = channel_messages_in_range(
channel=gap.channel.id,
latest=gap.newest_message_ts,
oldest=gap.oldest_message_ts,
)
first = True
# pages contain a grouping of 100 messages, oldest 100 returned first
for page in pages:
# use a separate transaction per page to allow restoring from an
# interrupted run.
with transaction.atomic():
# messages within a page of 100 however are newest first, so we need to update the channel on the first
# message to have the future ranges retrieved without overlap
first = True
for message in page:
if first and gap.newest_message_ts is None:
readable_dt = parse_ts(message["ts"])
if first:
gap.channel.last_update_ts = message["ts"]
msg = f"saving {readable_dt}Z as last_update_ts for channel"
logger.debug(msg)
gap.channel.save()
first = False
logger.debug(f"next message ts {readable_dt}Z")
# Shrink the gap, but no need to save until we've finished this
# page (transactionally).
gap.newest_message_ts = message["ts"]