Link commit authors to users (#1709, #1711) (#1717)

This commit is contained in:
daveoconnor
2025-03-31 11:34:25 -07:00
committed by GitHub
parent 7e81d7eb61
commit 1344c85617
11 changed files with 199 additions and 12 deletions

View File

@@ -336,3 +336,14 @@ For this to work `SLACK_BOT_API` must be set in the `.env` file.
|----------------|--------|----------------------------------------------------------------------------------------------------------------------|
| `--start_date` | date | If passed, retrieves data from the start date supplied, d-m-y, default 20-11-1998 (the start of the data in mailman) |
| `--end_date` | date | If passed, If passed, retrieves data until the start date supplied, d-m-y, default today |
## `link_contributors_to_users`
**Purpose**: Links commit authors to users in the database by setting `user.github_username` for users where no `github_username` value has been set, by matching the commit author email address against a user's account email address.
**Example**
```bash
./manage.py link_contributors_to_users
```

View File

@@ -32,6 +32,7 @@ from .tasks import (
update_libraries,
update_library_version_documentation_urls_all_versions,
generate_release_report,
synchronize_commit_author_user_data,
)
@@ -39,7 +40,7 @@ from .tasks import (
class CommitAdmin(admin.ModelAdmin):
list_display = ["library_version", "sha", "author"]
autocomplete_fields = ["author", "library_version"]
list_filter = ["library_version__library"]
list_filter = ["library_version__library", "library_version__version"]
search_fields = ["sha", "author__name"]
change_list_template = "admin/commit_change_list.html"
@@ -92,6 +93,11 @@ class CommitAuthorAdmin(admin.ModelAdmin):
self.admin_site.admin_view(self.update_github_data),
name="commit_author_update_github_data",
),
path(
"synchronize_ca_user_data/",
self.admin_site.admin_view(self.synchronize_ca_user_data),
name="synchronize_ca_user_data",
),
]
return my_urls + urls
@@ -105,6 +111,14 @@ class CommitAuthorAdmin(admin.ModelAdmin):
)
return HttpResponseRedirect("../")
def synchronize_ca_user_data(self, request):
synchronize_commit_author_user_data.delay()
self.message_user(
request,
"Synchronizing CommitAuthor and User data",
)
return HttpResponseRedirect("../")
@admin.action(
description="Combine 2 or more authors into one. References will be updated."
)

View File

@@ -0,0 +1,25 @@
# Generated by Django 4.2.16 on 2025-03-25 00:20
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("libraries", "0029_merge_20250211_1626"),
]
operations = [
migrations.AddField(
model_name="commitauthor",
name="user",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
),
),
]

View File

@@ -2,13 +2,14 @@ import re
from typing import Self
from urllib.parse import urlparse
from django.contrib.auth import get_user_model
from django.core.cache import caches
from django.db import models, transaction
from django.db.models import Sum
from django.utils.functional import cached_property
from django.utils.text import slugify
from django.db.models.functions import Upper
from config import settings
from core.custom_model_fields import NullableFileField
from core.markdown import process_md
from core.models import RenderedContent
@@ -48,6 +49,9 @@ class CommitAuthor(models.Model):
name = models.CharField(max_length=100)
avatar_url = models.URLField(null=True, max_length=100)
github_profile_url = models.URLField(null=True, max_length=100)
user = models.ForeignKey(
settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True
)
@property
def display_name(self):
@@ -59,11 +63,6 @@ class CommitAuthor(models.Model):
return self.user.display_name
return self.name
@property
def user(self):
User = get_user_model()
return User.get_user_by_github_url(self.github_profile_url)
def __str__(self):
return self.name
@@ -82,7 +81,7 @@ class CommitAuthor(models.Model):
self.avatar_url = other.avatar_url
if not self.github_profile_url:
self.github_profile_url = other.github_profile_url
self.save(update_fields=["avatar_url", "github_profile_url"])
self.save(update_fields=["avatar_url", "github_profile_url", "user_id"])
other.delete()
@transaction.atomic

View File

@@ -1,14 +1,16 @@
from celery import shared_task, chain
from django.core.management import call_command
import structlog
from config.celery import app
from django.conf import settings
from django.db.models import Q
from django.db.models import Q, Count
from core.boostrenderer import get_content_from_s3
from core.htmlhelper import get_library_documentation_urls
from libraries.forms import CreateReportForm, CreateReportFullForm
from libraries.github import LibraryUpdater
from libraries.models import Library, LibraryVersion
from libraries.models import Library, LibraryVersion, CommitAuthorEmail, CommitAuthor
from users.tasks import User
from versions.models import Version
from .constants import (
LIBRARY_DOCS_EXCEPTIONS,
@@ -256,3 +258,101 @@ def release_tasks(user_id=None, generate_report=False):
if generate_report:
command.append("--generate_report")
call_command(*command)
@app.task
def synchronize_commit_author_user_data():
logger.info("Starting synchronize_commit_author_user_data")
chain(
merge_commit_authors_by_github_url.si(),
update_users_githubs.si(),
update_commit_authors_users.si(),
)()
logger.info("synchronize_commit_author_user_data finished.")
@shared_task
def merge_commit_authors_by_github_url():
# select all commit authors with duplicated github_profile_url, order the ones with a user id at the top, and if there's more than one with a userid, order by last_login
logger.info("merging commit authors by github url")
duplicated_author_urls = (
CommitAuthor.objects.values("github_profile_url")
.annotate(count=Count("id"))
.filter(github_profile_url__isnull=False, count__gt=1)
)
logger.info(f"Found {duplicated_author_urls.count()} {duplicated_author_urls=}")
for d in duplicated_author_urls:
# this prioritizes a record which has a user associated, if there is one, and
# then the one with the most recent login if there are any. This is still
# more prioritization than when we merge manually
duplicate_authors = CommitAuthor.objects.filter(
github_profile_url=d["github_profile_url"]
).order_by("user_id", "-user__last_login")
logger.debug(f"{duplicate_authors=}")
primary = duplicate_authors.first()
for da in duplicate_authors[1:]:
logger.debug(f"{primary.id} {primary=} will have {da=} merged into it")
primary.merge_author(da)
logger.info(f"{primary.id} {primary=} has had {da.id=} merged into it")
logger.info("merged commit authors by github url")
@shared_task
def update_users_githubs():
logger.info("Linking contributors to users")
for user in User.objects.filter(github_username=""):
logger.info(f"Linking attempt: {user.email}")
update_user_github_username(user.pk)
@shared_task
def update_user_github_username(user_id: int):
logger.debug(f"Updating user github_username for {user_id=}")
user = User.objects.get(pk=user_id)
try:
email = CommitAuthorEmail.objects.prefetch_related("author").get(
email=user.email
)
except CommitAuthorEmail.DoesNotExist:
logger.info(f"No commit author email found for {user.pk=} {user.email=}")
return
commit_author = email.author
logger.debug(f"Found {user.pk=} for {commit_author=}")
if not commit_author.github_profile_url:
logger.info(f"No github username found on {commit_author.pk=}")
return
github_username = commit_author.github_profile_url.rstrip("/").split("/")[-1]
logger.debug(f"Updating {user.pk=} from {email.author.pk=}, {github_username=}")
user.github_username = github_username
user.save()
logger.info(f"Linked {user.pk=} to {commit_author.pk=} by github_username")
@shared_task
def update_commit_authors_users():
logger.info("Linking commit authors to users")
for commit_author in CommitAuthor.objects.filter(user__isnull=True):
logger.info(f"Linking attempt: {commit_author=}")
update_commit_author_user(commit_author.pk)
logger.info("Finished linking commit authors to users.")
@shared_task
def update_commit_author_user(author_id: int):
logger.info(f"{author_id=}")
commit_author_emails = CommitAuthorEmail.objects.prefetch_related("author").filter(
author_id=author_id
)
if not commit_author_emails:
logger.info(f"No emails found for {author_id=}")
return
for email in commit_author_emails:
user = User.objects.filter(email=email.email).first()
if not user:
logger.info(f"No user found for {email.pk=} {email.email=}")
continue
email.author.user = user
email.author.save()
logger.info(f"Linked {user=} {user.pk=} to {email=} {email.author.pk=}")

View File

@@ -1,3 +1,3 @@
-c requirements.txt
django-debug-toolbar
pydevd-pycharm==243.22562.180 # pinned to appropriate version for current pycharm
pydevd-pycharm==243.26053.29 # pinned to appropriate version for current pycharm

View File

@@ -143,4 +143,4 @@ npm install
npm run build
echo "Run: 'docker compose up -d' to restart your services"
echo "If you get an error related to profile images when loading the site, clear all cookies and try again"
echo "You should now able to log into the admin interface with username: 'superadmin' and password: 'foobarone'"
echo "You should now able to log into the admin interface with username: 'superadmin@boost.org' and password: 'foobarone'"

View File

@@ -6,6 +6,7 @@
{% block object-tools-items %}
{{ block.super }}
<li><a href="{% url 'admin:commit_author_update_github_data' %}" class="addlink">{% trans "Update Github Avatar and URL" %}</a></li>
<li><a href="{% url 'admin:synchronize_ca_user_data' %}" class="addlink">{% trans "Synchronize Data" %}</a></li>
{% endblock %}
</ul>
{% endblock %}

View File

@@ -127,6 +127,19 @@
</div>
</div>
{% endif %}
<div class="rounded bg-white dark:bg-charcoal p-4">
<h3>{% trans 'Commit Email Addresses' %}</h3>
<div>This is a list of email addresses associated with your profile that have been used in commits.</div>
<ul class="mt-4 list-disc">
{% for email in commit_email_addresses %}
<li class="ml-4">{{ email }}</li>
{% endfor %}
</ul>
<div class="mt-4">
<a href=""><button class="py-2 px-3 text-sm text-white rounded bg-orange">{% trans 'Add Commit Email Address' %}</button></a>
</div>
</div>
<div class="rounded bg-white dark:bg-charcoal p-4">
<h3>{% trans 'Delete Account' %}</h3>
{% if user.delete_permanently_at %}

View File

@@ -0,0 +1,17 @@
import djclick as click
from libraries.tasks import synchronize_commit_author_user_data
@click.command()
def command():
"""
Link contributors to users by either email or github username from the commitauthor
records in the database.
The referencing in the app is done through the github_username field from the
CommitAuthor instance to the User model so we set that value.
"""
click.secho("Linking contributors to users", fg="blue")
synchronize_commit_author_user_data.delay()
click.secho("Linking contributors to users has been queued.", fg="green")

View File

@@ -22,6 +22,7 @@ from rest_framework import generics
from rest_framework import viewsets
from rest_framework.permissions import IsAuthenticated, AllowAny
from libraries.models import CommitAuthorEmail
from .forms import (
PreferencesForm,
UserProfileForm,
@@ -99,6 +100,7 @@ class CurrentUserProfileView(LoginRequiredMixin, SuccessMessageMixin, TemplateVi
instance=self.request.user.preferences
)
context["social_accounts"] = self.get_social_accounts()
context["commit_email_addresses"] = self.get_commit_author_email_addresses()
return context
def get_social_accounts(self):
@@ -114,6 +116,11 @@ class CurrentUserProfileView(LoginRequiredMixin, SuccessMessageMixin, TemplateVi
)
return account_data
def get_commit_author_email_addresses(self):
return CommitAuthorEmail.objects.filter(
author__user=self.request.user
).values_list("email", flat=True)
def post(self, request, *args, **kwargs):
"""
Process each form submission individually if present