mirror of
https://github.com/boostorg/website-v2.git
synced 2026-01-19 04:42:17 +00:00
Download slack activity (#1395)
This commit is contained in:
@@ -37,3 +37,9 @@ def setup_periodic_tasks(sender, **kwargs):
|
||||
crontab(hour=4, minute=5),
|
||||
app.signature("core.tasks.clear_static_content_cache"),
|
||||
)
|
||||
|
||||
# Fetch Slack activity. Executes daily at 3:07 AM.
|
||||
sender.add_periodic_task(
|
||||
crontab(hour=3, minute=7),
|
||||
app.signature("slack.tasks.fetch_slack_activity"),
|
||||
)
|
||||
|
||||
@@ -90,6 +90,7 @@ INSTALLED_APPS += [
|
||||
"mailing_list",
|
||||
"news",
|
||||
"core",
|
||||
"slack",
|
||||
]
|
||||
|
||||
AUTH_USER_MODEL = "users.User"
|
||||
@@ -560,3 +561,5 @@ OAUTH_APP_NAME = (
|
||||
|
||||
# Frame loading
|
||||
X_FRAME_OPTIONS = "SAMEORIGIN"
|
||||
|
||||
SLACK_BOT_TOKEN = env("SLACK_BOT_TOKEN", default="")
|
||||
|
||||
@@ -53,3 +53,5 @@ GITHUB_OAUTH_CLIENT_SECRET=
|
||||
|
||||
GOOGLE_OAUTH_CLIENT_ID=
|
||||
GOOGLE_OAUTH_CLIENT_SECRET=
|
||||
|
||||
SLACK_BOT_TOKEN=changeme
|
||||
|
||||
@@ -204,6 +204,11 @@ Env:
|
||||
secretKeyRef:
|
||||
name: hyperkitty
|
||||
key: database_name
|
||||
- name: SLACK_BOT_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: slack
|
||||
key: slack_bot_token
|
||||
# Static content cache timeout
|
||||
- name: STATIC_CACHE_TIMEOUT
|
||||
value: "60"
|
||||
|
||||
@@ -204,6 +204,11 @@ Env:
|
||||
secretKeyRef:
|
||||
name: hyperkitty
|
||||
key: database_name
|
||||
- name: SLACK_BOT_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: slack
|
||||
key: slack_bot_token
|
||||
# Static content cache timeout
|
||||
- name: STATIC_CACHE_TIMEOUT
|
||||
value: "60"
|
||||
|
||||
@@ -204,6 +204,11 @@ Env:
|
||||
secretKeyRef:
|
||||
name: hyperkitty
|
||||
key: database_name
|
||||
- name: SLACK_BOT_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: slack
|
||||
key: slack_bot_token
|
||||
# Static content cache timeout
|
||||
- name: STATIC_CACHE_TIMEOUT
|
||||
value: "60"
|
||||
|
||||
@@ -184,6 +184,11 @@ Env:
|
||||
secretKeyRef:
|
||||
name: hyperkitty
|
||||
key: database_name
|
||||
- name: SLACK_BOT_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: slack
|
||||
key: slack_bot_token
|
||||
# Static content cache timeout
|
||||
- name: STATIC_CACHE_TIMEOUT
|
||||
value: "60"
|
||||
|
||||
@@ -69,3 +69,4 @@ elasticsearch==7.17.9
|
||||
# Github
|
||||
ghapi
|
||||
requests
|
||||
slack_sdk
|
||||
|
||||
@@ -333,6 +333,8 @@ six==1.16.0
|
||||
# django-rest-auth
|
||||
# fs
|
||||
# python-dateutil
|
||||
slack-sdk==3.33.2
|
||||
# via -r ./requirements.in
|
||||
soupsieve==2.6
|
||||
# via beautifulsoup4
|
||||
sqlparse==0.5.1
|
||||
|
||||
0
slack/__init__.py
Normal file
0
slack/__init__.py
Normal file
6
slack/apps.py
Normal file
6
slack/apps.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class SlackConfig(AppConfig):
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "slack"
|
||||
0
slack/management/__init__.py
Normal file
0
slack/management/__init__.py
Normal file
0
slack/management/commands/__init__.py
Normal file
0
slack/management/commands/__init__.py
Normal file
326
slack/management/commands/fetch_slack_activity.py
Normal file
326
slack/management/commands/fetch_slack_activity.py
Normal file
@@ -0,0 +1,326 @@
|
||||
import logging
|
||||
import datetime
|
||||
import functools
|
||||
|
||||
from slack_sdk import WebClient
|
||||
from slack_sdk.http_retry.builtin_handlers import RateLimitErrorRetryHandler
|
||||
import djclick as click
|
||||
from django.db import transaction, connection
|
||||
from django.db.models.functions import Now, Cast
|
||||
from django.db.models import Q, FloatField
|
||||
from django.conf import settings
|
||||
from django.core.management import CommandError
|
||||
|
||||
from slack.models import (
|
||||
SlackUser,
|
||||
SlackActivityBucket,
|
||||
Channel,
|
||||
ChannelUpdateGap,
|
||||
Thread,
|
||||
parse_ts,
|
||||
ToTimestamp,
|
||||
)
|
||||
|
||||
|
||||
client = WebClient(token=settings.SLACK_BOT_TOKEN)
|
||||
client.retry_handlers.append(RateLimitErrorRetryHandler(max_retry_count=10))
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_my_channels():
|
||||
for page in client.conversations_list():
|
||||
for channel in page["channels"]:
|
||||
if channel["is_member"]:
|
||||
yield channel
|
||||
|
||||
|
||||
def channel_messages_in_range(channel, oldest, latest):
|
||||
"""
|
||||
All messages in a channel newer than oldest (not inclusive so we don't
|
||||
double count). Returns an iterator over pages, which are iterators over
|
||||
messages. Newest messages come first.
|
||||
"""
|
||||
pages = client.conversations_history(
|
||||
channel=channel,
|
||||
oldest=oldest,
|
||||
latest=latest,
|
||||
inclusive=False,
|
||||
)
|
||||
for page in pages:
|
||||
yield page["messages"]
|
||||
|
||||
|
||||
def thread_messages_newer(channel, thread_ts, oldest):
|
||||
"""
|
||||
All messages in a thread newer than oldest (not inclusive so we don't
|
||||
double count). Returns an iterator over pages. Oldest messages come first.
|
||||
"""
|
||||
pages = client.conversations_replies(
|
||||
channel=channel,
|
||||
ts=thread_ts,
|
||||
oldest=oldest,
|
||||
inclusive=False,
|
||||
)
|
||||
for page in pages:
|
||||
yield page["messages"]
|
||||
|
||||
|
||||
# Track users whose profile information has been updated in our DB and
|
||||
# doesn't need to be checked again.
|
||||
USERS_CACHE = {}
|
||||
|
||||
|
||||
def get_or_create_user(user_id):
|
||||
try:
|
||||
return USERS_CACHE[user_id]
|
||||
except KeyError:
|
||||
# Even if the user exists already in our db, they may have changed
|
||||
# their information in slack so we need to check.
|
||||
user_data = client.users_info(user=user_id)
|
||||
obj, _ = SlackUser.objects.update_or_create(
|
||||
id=user_id,
|
||||
defaults={
|
||||
"name": user_data.data["user"]["name"],
|
||||
"real_name": user_data.data["user"].get("real_name", ""),
|
||||
"email": user_data.data["user"]["profile"].get("email", ""),
|
||||
"image_48": user_data["user"]["profile"].get("image_48", ""),
|
||||
},
|
||||
)
|
||||
USERS_CACHE[user_id] = obj
|
||||
return obj
|
||||
|
||||
|
||||
def should_track_message(message):
|
||||
# These are not regular messages
|
||||
# https://api.slack.com/events/message#subtypes
|
||||
return message.get("subtype") in {None, "me_message"} and "bot_id" not in message
|
||||
|
||||
|
||||
def fill_channel_gap(gap: ChannelUpdateGap, debug: bool):
|
||||
"""
|
||||
Download and process channel messages (not including replies to threads) in
|
||||
the (possibly unbounded) range specified by `gap`.
|
||||
"""
|
||||
logger.info(
|
||||
"Fetching channel history for %r (%r) in range (%s,%s)",
|
||||
gap.channel.name,
|
||||
gap.channel.id,
|
||||
gap.oldest_message_ts,
|
||||
gap.newest_message_ts,
|
||||
)
|
||||
pages = channel_messages_in_range(
|
||||
channel=gap.channel.id,
|
||||
latest=gap.newest_message_ts,
|
||||
oldest=gap.oldest_message_ts,
|
||||
)
|
||||
first = True
|
||||
for page in pages:
|
||||
# use a separate transaction per page to allow restoring from an
|
||||
# interrupted run.
|
||||
with transaction.atomic():
|
||||
for message in page:
|
||||
if first and gap.newest_message_ts is None:
|
||||
gap.channel.last_update_ts = message["ts"]
|
||||
gap.channel.save()
|
||||
first = False
|
||||
# Shrink the gap, but no need to save until we've finished this
|
||||
# page (transactionally).
|
||||
gap.newest_message_ts = message["ts"]
|
||||
|
||||
if not should_track_message(message):
|
||||
continue
|
||||
|
||||
if "user" in message:
|
||||
user = get_or_create_user(message["user"])
|
||||
if debug:
|
||||
gap.channel.seenmessage_set.create(ts=message["ts"])
|
||||
SlackActivityBucket.track_activity(gap.channel, user, message["ts"])
|
||||
|
||||
if message.get("thread_ts"):
|
||||
# Track this thread in the db to be able to check for
|
||||
# updates later.
|
||||
Thread.objects.create(
|
||||
channel=gap.channel,
|
||||
thread_ts=message["thread_ts"],
|
||||
# None indicates that this thread still must be updated
|
||||
# even if it's old.
|
||||
last_update_ts=None,
|
||||
)
|
||||
gap.save()
|
||||
logger.debug(
|
||||
"Channel %r retrieved up to %s (%s)",
|
||||
gap.channel.name,
|
||||
# for the 'up to current' gap, newest_message_ts will be None
|
||||
# and instead oldest_message_ts will be where we stopped.
|
||||
gap.newest_message_ts or gap.oldest_message_ts,
|
||||
parse_ts(gap.newest_message_ts or gap.oldest_message_ts),
|
||||
)
|
||||
# If we get here we must have gotten up to gap.oldest_message_ts, the gap
|
||||
# is now empty. If we're interrupted before we get here, the gap will stay
|
||||
# and be picked up from where we left off on the next run.
|
||||
gap.delete()
|
||||
|
||||
|
||||
def do_thread(thread: Thread, debug: bool):
|
||||
"""
|
||||
Download and process new messages in the specified thread.
|
||||
"""
|
||||
pages = thread_messages_newer(
|
||||
channel=thread.channel_id,
|
||||
thread_ts=thread.thread_ts,
|
||||
oldest=thread.last_update_ts,
|
||||
)
|
||||
for page in pages:
|
||||
with transaction.atomic():
|
||||
for message in page:
|
||||
if message["thread_ts"] == message["ts"]:
|
||||
# This is the parent message, it was already counted as a
|
||||
# channel message. Slack always returns the first message
|
||||
# even if it's older than the oldest we requested.
|
||||
if thread.last_update_ts is None:
|
||||
# However, still record that this thread was updated.
|
||||
# I think this will only will only matter if all
|
||||
# messages in the thread have been deleted.
|
||||
thread.last_update_ts = message["ts"]
|
||||
continue
|
||||
|
||||
# We never need to look at this message again. Oldest messages
|
||||
# come first unlike for channels.
|
||||
thread.last_update_ts = message["ts"]
|
||||
|
||||
if not should_track_message(message):
|
||||
continue
|
||||
|
||||
if debug:
|
||||
thread.channel.seenmessage_set.create(
|
||||
ts=message["ts"],
|
||||
thread=thread,
|
||||
)
|
||||
user = get_or_create_user(message["user"])
|
||||
SlackActivityBucket.track_activity(thread.channel, user, message["ts"])
|
||||
thread.save()
|
||||
|
||||
|
||||
def locked(fn):
|
||||
"""
|
||||
Runs the decorated function while holding a lock to prevent multiple
|
||||
concurrent instances.
|
||||
"""
|
||||
|
||||
@functools.wraps(fn)
|
||||
def inner(*args, **kwargs):
|
||||
cur = connection.cursor()
|
||||
ID = 1028307 # random number to identify this command
|
||||
cur.execute("SELECT pg_try_advisory_lock(%s);", [ID])
|
||||
(got_lock,) = cur.fetchone()
|
||||
if not got_lock:
|
||||
raise CommandError(
|
||||
"Could not obtain lock: "
|
||||
"another instance of this command must be running."
|
||||
)
|
||||
try:
|
||||
return fn(*args, **kwargs)
|
||||
finally:
|
||||
cur.execute("SELECT pg_advisory_unlock(%s);", [ID])
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("channels", nargs=-1)
|
||||
@click.option(
|
||||
"--debug",
|
||||
is_flag=True,
|
||||
help=(
|
||||
"Store all messages seen to be able to "
|
||||
"detect bugs (uses lots of database space)."
|
||||
),
|
||||
)
|
||||
@locked
|
||||
def command(channels, debug):
|
||||
"""
|
||||
Download slack activity from channels the bot is a member of.
|
||||
|
||||
CHANNELS is an optional list of channel names (without the #) to limit to.
|
||||
If not provided, all channels the bot is a member of will be fetched.
|
||||
|
||||
This is resumable -- it can be interrupted and restarted without losing
|
||||
progress.
|
||||
|
||||
Do not run multiple instances of this command in parallel.
|
||||
"""
|
||||
|
||||
channels = set(channels)
|
||||
selected_channels = []
|
||||
if channels:
|
||||
for channel_data in get_my_channels():
|
||||
if channel_data["name"] in channels:
|
||||
selected_channels.append(channel_data)
|
||||
channels.remove(channel_data["name"])
|
||||
if channels:
|
||||
raise click.BadParameter(
|
||||
f"Could not find channels {channels} (maybe the bot isn't a member?)"
|
||||
)
|
||||
else:
|
||||
# materialize this generator so we can iterate multiple times
|
||||
selected_channels.extend(get_my_channels())
|
||||
|
||||
for channel_data in selected_channels:
|
||||
with transaction.atomic():
|
||||
channel, created = Channel.objects.update_or_create(
|
||||
id=channel_data["id"],
|
||||
defaults={
|
||||
"name": channel_data["name"],
|
||||
"topic": channel_data["topic"]["value"],
|
||||
"purpose": channel_data["purpose"]["value"],
|
||||
},
|
||||
)
|
||||
if created:
|
||||
# we don't have any messages for this channel we just created
|
||||
channel.channelupdategap_set.create(
|
||||
oldest_message_ts=None,
|
||||
newest_message_ts=None,
|
||||
)
|
||||
elif (
|
||||
channel.last_update_ts
|
||||
and not channel.channelupdategap_set.filter(
|
||||
newest_message_ts=None
|
||||
).exists()
|
||||
):
|
||||
# gap from the most recent fetch till now
|
||||
channel.channelupdategap_set.create(
|
||||
oldest_message_ts=channel.last_update_ts,
|
||||
newest_message_ts=None,
|
||||
)
|
||||
else:
|
||||
assert (
|
||||
channel.channelupdategap_set.exists()
|
||||
), "We must have SOME gaps, time has passed since the last run!"
|
||||
|
||||
gaps = ChannelUpdateGap.objects.filter(
|
||||
channel__id__in={c["id"] for c in selected_channels}
|
||||
)
|
||||
for gap in gaps:
|
||||
fill_channel_gap(gap, debug)
|
||||
|
||||
# We have to track threads we've seen and update independently, replies
|
||||
# don't show up in main channel history[1].
|
||||
#
|
||||
# [1]: <https://github.com/slackapi/python-slack-sdk/issues/1306>
|
||||
logger.info("Fetching threads")
|
||||
threads = Thread.objects.annotate(
|
||||
last_update_as_datetime=ToTimestamp(
|
||||
Cast("last_update_ts", output_field=FloatField())
|
||||
),
|
||||
).filter(
|
||||
# Assume threads not updated for more than 1 month won't get posted to
|
||||
# again. Otherwise it's too much work to check all threads ever.
|
||||
# last_update_ts will be null for the threads do_channel just created,
|
||||
# indicating they need to be updated at least once.
|
||||
Q(last_update_as_datetime=None)
|
||||
| Q(last_update_as_datetime__gte=Now() - datetime.timedelta(days=30)),
|
||||
channel_id__in={c["id"] for c in selected_channels},
|
||||
)
|
||||
for thread in threads:
|
||||
do_thread(thread, debug)
|
||||
186
slack/migrations/0001_initial.py
Normal file
186
slack/migrations/0001_initial.py
Normal file
@@ -0,0 +1,186 @@
|
||||
# Generated by Django 4.2.16 on 2024-11-12 17:15
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
import django.db.models.functions.comparison
|
||||
import django.db.models.lookups
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = []
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="Channel",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.CharField(max_length=16, primary_key=True, serialize=False),
|
||||
),
|
||||
("name", models.TextField()),
|
||||
("topic", models.TextField()),
|
||||
("purpose", models.TextField()),
|
||||
("last_update_ts", models.CharField(max_length=32, null=True)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="SlackUser",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.CharField(max_length=16, primary_key=True, serialize=False),
|
||||
),
|
||||
("name", models.TextField()),
|
||||
("real_name", models.TextField()),
|
||||
("email", models.TextField()),
|
||||
("image_48", models.URLField()),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="Thread",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("thread_ts", models.CharField(max_length=32)),
|
||||
("last_update_ts", models.CharField(max_length=32, null=True)),
|
||||
("db_created_at", models.DateTimeField(auto_now_add=True)),
|
||||
(
|
||||
"channel",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE, to="slack.channel"
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="SlackActivityBucket",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("day", models.DateField()),
|
||||
("count", models.PositiveIntegerField()),
|
||||
(
|
||||
"channel",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE, to="slack.channel"
|
||||
),
|
||||
),
|
||||
(
|
||||
"user",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
to="slack.slackuser",
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="SeenMessage",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("ts", models.CharField(max_length=32)),
|
||||
("db_created_at", models.DateTimeField(auto_now_add=True)),
|
||||
(
|
||||
"channel",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE, to="slack.channel"
|
||||
),
|
||||
),
|
||||
(
|
||||
"thread",
|
||||
models.ForeignKey(
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
to="slack.thread",
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="ChannelUpdateGap",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("oldest_message_ts", models.CharField(max_length=32, null=True)),
|
||||
("newest_message_ts", models.CharField(max_length=32, null=True)),
|
||||
(
|
||||
"channel",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE, to="slack.channel"
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name="thread",
|
||||
constraint=models.CheckConstraint(
|
||||
check=django.db.models.lookups.GreaterThanOrEqual(
|
||||
django.db.models.functions.comparison.Cast(
|
||||
"last_update_ts", output_field=models.FloatField()
|
||||
),
|
||||
django.db.models.functions.comparison.Cast(
|
||||
"thread_ts", output_field=models.FloatField()
|
||||
),
|
||||
),
|
||||
name="update_newer_than_created",
|
||||
),
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name="thread",
|
||||
unique_together={("channel", "thread_ts")},
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name="slackactivitybucket",
|
||||
unique_together={("channel", "day", "user")},
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name="seenmessage",
|
||||
unique_together={("channel", "ts")},
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name="channelupdategap",
|
||||
constraint=models.CheckConstraint(
|
||||
check=django.db.models.lookups.GreaterThan(
|
||||
django.db.models.functions.comparison.Cast(
|
||||
"newest_message_ts", output_field=models.FloatField()
|
||||
),
|
||||
django.db.models.functions.comparison.Cast(
|
||||
"oldest_message_ts", output_field=models.FloatField()
|
||||
),
|
||||
),
|
||||
name="newest_newer_than_oldest",
|
||||
),
|
||||
),
|
||||
]
|
||||
26
slack/migrations/0002_thread_last_update_as_datetime.py
Normal file
26
slack/migrations/0002_thread_last_update_as_datetime.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# Generated by Django 4.2.16 on 2024-11-13 19:05
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.functions.comparison
|
||||
import slack.models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("slack", "0001_initial"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddIndex(
|
||||
model_name="thread",
|
||||
index=models.Index(
|
||||
slack.models.ToTimestamp(
|
||||
django.db.models.functions.comparison.Cast(
|
||||
"last_update_ts", output_field=models.FloatField()
|
||||
)
|
||||
),
|
||||
name="last_update_as_datetime",
|
||||
),
|
||||
),
|
||||
]
|
||||
0
slack/migrations/__init__.py
Normal file
0
slack/migrations/__init__.py
Normal file
162
slack/models.py
Normal file
162
slack/models.py
Normal file
@@ -0,0 +1,162 @@
|
||||
import datetime
|
||||
|
||||
from django.db import models, connection
|
||||
from django.db.models.expressions import Func
|
||||
from django.db.models.functions import Cast
|
||||
|
||||
"""
|
||||
`ts` fields from slack are actually IDs that can be interpreted into
|
||||
timestamps. We store them in a string as we get them from slack instead
|
||||
of a DateTimeField to be able to pass them back exactly to slack without
|
||||
precision or something causing round-tripping errors.
|
||||
"""
|
||||
|
||||
|
||||
def parse_ts(ts):
|
||||
seconds = float(ts)
|
||||
return datetime.datetime.fromtimestamp(seconds, tz=datetime.timezone.utc)
|
||||
|
||||
|
||||
class ToTimestamp(Func):
|
||||
"""
|
||||
Implements the postgres to_timestamp(double precision) function.
|
||||
|
||||
https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TABLE
|
||||
"""
|
||||
|
||||
function = "to_timestamp"
|
||||
output_field = models.DateTimeField()
|
||||
|
||||
|
||||
class SlackUser(models.Model):
|
||||
id = models.CharField(max_length=16, primary_key=True)
|
||||
name = models.TextField()
|
||||
real_name = models.TextField()
|
||||
email = models.TextField()
|
||||
image_48 = models.URLField()
|
||||
|
||||
|
||||
class Channel(models.Model):
|
||||
id = models.CharField(max_length=16, primary_key=True)
|
||||
name = models.TextField()
|
||||
topic = models.TextField()
|
||||
purpose = models.TextField()
|
||||
# We only need to check for new messages strictly newer than this.
|
||||
last_update_ts = models.CharField(max_length=32, null=True)
|
||||
|
||||
|
||||
class ChannelUpdateGap(models.Model):
|
||||
"""
|
||||
Stores a (potentially unbounded) period of time where we haven't counted
|
||||
messages for a channel. Used to store progress and allow resumption.
|
||||
"""
|
||||
|
||||
channel = models.ForeignKey(Channel, on_delete=models.CASCADE)
|
||||
# There may be more messages to fetch that are strictly newer than
|
||||
# oldest_message_ts (null meaning no limit)
|
||||
oldest_message_ts = models.CharField(max_length=32, null=True)
|
||||
# but strictly older than newest_message_ts (null meaning no limit)
|
||||
newest_message_ts = models.CharField(max_length=32, null=True)
|
||||
|
||||
class Meta:
|
||||
constraints = [
|
||||
models.CheckConstraint(
|
||||
check=models.lookups.GreaterThan(
|
||||
models.functions.Cast(
|
||||
"newest_message_ts", output_field=models.FloatField()
|
||||
),
|
||||
models.functions.Cast(
|
||||
"oldest_message_ts", output_field=models.FloatField()
|
||||
),
|
||||
),
|
||||
name="newest_newer_than_oldest",
|
||||
)
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"({self.oldest_message_ts}, {self.newest_message_ts})"
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ChannelUpdateGap: {self} ({self.id})>"
|
||||
|
||||
|
||||
class SeenMessage(models.Model):
|
||||
"""
|
||||
DEBUG ONLY: Store all seen messages to double-check we don't see them
|
||||
twice.
|
||||
|
||||
Invariant:
|
||||
|
||||
SeenMessage.objects.count() == \
|
||||
SlackActivityBucket.objects.aggregate(sum=Sum('count'))['sum']
|
||||
"""
|
||||
|
||||
channel = models.ForeignKey(Channel, on_delete=models.CASCADE)
|
||||
ts = models.CharField(max_length=32)
|
||||
thread = models.ForeignKey("Thread", on_delete=models.CASCADE, null=True)
|
||||
db_created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
unique_together = [("channel", "ts")]
|
||||
|
||||
|
||||
class Thread(models.Model):
|
||||
channel = models.ForeignKey(Channel, on_delete=models.CASCADE)
|
||||
thread_ts = models.CharField(max_length=32)
|
||||
last_update_ts = models.CharField(max_length=32, null=True)
|
||||
db_created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
unique_together = [("channel", "thread_ts")]
|
||||
constraints = [
|
||||
models.CheckConstraint(
|
||||
check=models.lookups.GreaterThanOrEqual(
|
||||
models.functions.Cast(
|
||||
"last_update_ts", output_field=models.FloatField()
|
||||
),
|
||||
models.functions.Cast(
|
||||
"thread_ts", output_field=models.FloatField()
|
||||
),
|
||||
),
|
||||
name="update_newer_than_created",
|
||||
)
|
||||
]
|
||||
indexes = [
|
||||
models.Index(
|
||||
ToTimestamp(Cast("last_update_ts", output_field=models.FloatField())),
|
||||
name="last_update_as_datetime",
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
class SlackActivityBucket(models.Model):
|
||||
"""
|
||||
Message count per user per channel per UTC day.
|
||||
"""
|
||||
|
||||
day = models.DateField()
|
||||
user = models.ForeignKey(SlackUser, on_delete=models.CASCADE)
|
||||
channel = models.ForeignKey(Channel, on_delete=models.CASCADE)
|
||||
count = models.PositiveIntegerField()
|
||||
|
||||
class Meta:
|
||||
unique_together = [("channel", "day", "user")]
|
||||
|
||||
@classmethod
|
||||
def track_activity(self, channel, user, ts):
|
||||
day = parse_ts(ts).date()
|
||||
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO slack_slackactivitybucket (day, user_id, channel_id, count)
|
||||
VALUES (%(day)s, %(user_id)s, %(channel_id)s, 1)
|
||||
ON CONFLICT (day, user_id, channel_id)
|
||||
DO UPDATE SET count = slack_slackactivitybucket.count + 1
|
||||
""",
|
||||
{
|
||||
"user_id": user.id,
|
||||
"channel_id": channel.id,
|
||||
"day": day,
|
||||
},
|
||||
)
|
||||
8
slack/tasks.py
Normal file
8
slack/tasks.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from django.core.management import call_command
|
||||
|
||||
from config.celery import app
|
||||
|
||||
|
||||
@app.task
|
||||
def fetch_slack_activity():
|
||||
call_command("fetch_slack_activity")
|
||||
Reference in New Issue
Block a user