From 08dc7be4a19fb1f37a6cda95144ac71ddb253f66 Mon Sep 17 00:00:00 2001 From: Lacey Williams Henschel Date: Thu, 16 Mar 2023 10:32:55 -0700 Subject: [PATCH 1/3] :construction: Stub methods for creating sample data --- .../management/commands/create_sample_data.py | 138 ++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 libraries/management/commands/create_sample_data.py diff --git a/libraries/management/commands/create_sample_data.py b/libraries/management/commands/create_sample_data.py new file mode 100644 index 00000000..29a1ea8e --- /dev/null +++ b/libraries/management/commands/create_sample_data.py @@ -0,0 +1,138 @@ +import djclick as click +from faker import Faker + +from django.contrib.auth import get_user_model + +from libraries.models import Library, LibraryVersion, Category, PullRequest, Issue +from versions.models import Version + + +fake = Faker() +User = get_user_model() + +USER_COUNT = 100 + + +@click.command() +@click.option('--all', is_flag=True) +@click.option('--drop', is_flag=True) +@click.option('--users', is_flag=True) +@click.option('--versions', is_flag=True) +@click.option('--categories', is_flag=True) +@click.option('--libraries', is_flag=True) +@click.option('--library_versions', is_flag=True) +@click.option('--authors', is_flag=True) +@click.option('--maintainers', is_flag=True) +@click.option('--prs', is_flag=True) +@click.option('--issues', is_flag=True) +def command(all, drop, users, versions, libraries, library_versions, authors, maintainers, prs, issues, categories): + """ + Populate the database with fake data for local development. + + --all: If True, run all methods including the drop command. + --drop: If True, drop all records in the database. + --users: If True, create fake users. + --versions: If True, create fake versions. + --categories: If True, create fake categories. + --libraries: If True, create fake libraries and assign them categories. + --library_versions: If True or if both --libraries and --versions are True, create fake library versions. + --authors: If True, add fake library authors. + --maintainers: If True, add fake library version maintainers. + --prs: If True, add fake library pull requests. + --issues: If True, add fake library issues. + """ + + if all: + call_all() + return + + if drop: + drop_all_records() + + if users: + create_users(USER_COUNT) + + if versions: + create_versions(10) + + if categories: + create_categories(20) + + if libraries: + create_libraries(50) + assign_library_categories() + + if library_versions: + create_library_versions() + else: + if libraries and versions: + create_library_versions() + + if authors: + create_authors() + + if maintainers: + create_maintainers() + + if prs: + create_pull_requests(10) + + if issues: + create_issues(10) + + +def call_all(): + drop_all_records() + create_users(USER_COUNT) + create_versions(10) + create_categories(20) + create_libraries(50) + create_library_versions() + create_authors() + create_maintainers() + create_pull_requests(10) + create_issues(10) + + +def create_users(count): + click.echo("Creating users...") + + +def create_versions(count): + click.echo("Creating versions...") + + +def create_libraries(count): + click.echo("Creating libraries...") + + +def assign_library_categories(): + click.secho("Assigning categories to libraries...") + + +def create_library_versions(): + click.echo("Creating library versions...") + + +def create_authors(): + click.echo("Adding library authors...") + + +def create_maintainers(): + click.echo("Adding library version maintainers...") + + +def create_pull_requests(count): + click.echo("Adding library pull requests...") + + +def create_issues(count): + click.echo("Adding library issues...") + + +def create_categories(count): + click.echo("Adding categories...") + + +def drop_all_records(): + click.echo("Dropping all records...") From c988c8cee4792504957615940130fc3bb90800b6 Mon Sep 17 00:00:00 2001 From: Lacey Williams Henschel Date: Thu, 16 Mar 2023 12:25:53 -0700 Subject: [PATCH 2/3] :sparkles: Add command to create sample data Does not include forum information, blog information. --- .../management/commands/create_sample_data.py | 307 +++++++++++++++--- 1 file changed, 270 insertions(+), 37 deletions(-) diff --git a/libraries/management/commands/create_sample_data.py b/libraries/management/commands/create_sample_data.py index 29a1ea8e..1d82c33a 100644 --- a/libraries/management/commands/create_sample_data.py +++ b/libraries/management/commands/create_sample_data.py @@ -1,7 +1,12 @@ import djclick as click +from datetime import timedelta from faker import Faker +from itertools import cycle +from model_bakery import baker +from random import randint, choice from django.contrib.auth import get_user_model +from django.utils import timezone from libraries.models import Library, LibraryVersion, Category, PullRequest, Issue from versions.models import Version @@ -12,20 +17,80 @@ User = get_user_model() USER_COUNT = 100 +BOOST_CATEGORIES = [ + "Algorithms", + "Assertions", + "Build", + "Collections", + "Concept Checking", + "Concurrency", + "Config", + "Conversion", + "Coroutines", + "DLL", +] + + +BOOST_LIBRARIES = [ + "algorithm", + "asio", + "assign", + "circular_buffer", + "date_time", + "filesystem", + "graph", + "iostreams", + "lexical_cast", + "math", + "program_options", + "regex", + "serialization", + "signals2", + "system", + "thread", + "uuid", +] + + +BOOST_VERSIONS = [ + "1.81.0", + "1.80.0", + "1.79.1", + "1.79.0", + "1.78.2", + "1.78.1", + "1.78.0", + "1.77.1", + "1.77.0", + "1.76.1", +] + @click.command() -@click.option('--all', is_flag=True) -@click.option('--drop', is_flag=True) -@click.option('--users', is_flag=True) -@click.option('--versions', is_flag=True) -@click.option('--categories', is_flag=True) -@click.option('--libraries', is_flag=True) -@click.option('--library_versions', is_flag=True) -@click.option('--authors', is_flag=True) -@click.option('--maintainers', is_flag=True) -@click.option('--prs', is_flag=True) -@click.option('--issues', is_flag=True) -def command(all, drop, users, versions, libraries, library_versions, authors, maintainers, prs, issues, categories): +@click.option("--all", is_flag=True) +@click.option("--drop", is_flag=True) +@click.option("--users", is_flag=True) +@click.option("--versions", is_flag=True) +@click.option("--categories", is_flag=True) +@click.option("--libraries", is_flag=True) +@click.option("--library_versions", is_flag=True) +@click.option("--authors", is_flag=True) +@click.option("--maintainers", is_flag=True) +@click.option("--prs", is_flag=True) +@click.option("--issues", is_flag=True) +def command( + all, + drop, + users, + versions, + libraries, + library_versions, + authors, + maintainers, + prs, + issues, + categories, +): """ Populate the database with fake data for local development. @@ -53,13 +118,13 @@ def command(all, drop, users, versions, libraries, library_versions, authors, ma create_users(USER_COUNT) if versions: - create_versions(10) + create_versions() if categories: - create_categories(20) + create_categories() if libraries: - create_libraries(50) + create_libraries() assign_library_categories() if library_versions: @@ -75,64 +140,232 @@ def command(all, drop, users, versions, libraries, library_versions, authors, ma create_maintainers() if prs: - create_pull_requests(10) + create_pull_requests() if issues: - create_issues(10) + create_issues() def call_all(): drop_all_records() create_users(USER_COUNT) - create_versions(10) - create_categories(20) - create_libraries(50) + create_versions() + create_categories() + create_libraries() + assign_library_categories() create_library_versions() create_authors() create_maintainers() - create_pull_requests(10) - create_issues(10) + create_pull_requests() + create_issues() def create_users(count): - click.echo("Creating users...") + """Creates fake users""" + click.secho("Creating users...") + + first_names = [fake.first_name() for i in range(1, count)] + last_names = [fake.last_name() for i in range(1, count)] + + objects = baker.make( + User, + _quantity=count, + first_name=cycle(first_names), + last_name=cycle(last_names), + ) + click.secho(f"...Created {len(objects)} users", fg="green") + return objects -def create_versions(count): - click.echo("Creating versions...") +def create_versions(): + """ + Creates fake versions using the names in BOOST_VERSIONS, and sets + their release dates at every 180 days + """ + click.secho("Creating versions...") + release_dates = get_dates() + objects = baker.make( + Version, + _quantity=len(BOOST_VERSIONS), + name=cycle(BOOST_VERSIONS), + release_date=cycle(release_dates), + ) + click.secho(f"...Created {len(objects)} versions", fg="green") + return objects -def create_libraries(count): - click.echo("Creating libraries...") +def create_libraries(): + """Creates fake libraries using the names in BOOST_LIBRARIES""" + click.secho("Creating libraries...") + objects = baker.make( + Library, _quantity=len(BOOST_LIBRARIES), name=cycle(BOOST_LIBRARIES) + ) + click.secho(f"...Created {len(objects)} versions", fg="green") + return objects def assign_library_categories(): + """Assigns 1-3 categories to each library""" click.secho("Assigning categories to libraries...") + for library in Library.objects.all(): + if library.categories.count() > 1: + continue + + count = randint(1, 3) + categories = Category.objects.order_by("?")[:count] + for category in categories: + library.categories.add(category) + click.secho(f"...{library} assigned the {category} category", fg="green") def create_library_versions(): - click.echo("Creating library versions...") + """Assigns a random number of versions to each library""" + click.secho("Creating library versions...") + for library in Library.objects.all(): + start_version = Version.objects.order_by("?").first() + for version in Version.objects.filter( + release_date__gt=start_version.release_date + ): + lib_version, created = LibraryVersion.objects.get_or_create( + library=library, version=version + ) + click.secho(f"...{lib_version} created", fg="green") def create_authors(): - click.echo("Adding library authors...") + """Assigns 1-3 authors to each library""" + click.secho("Adding library authors...") + for library in Library.objects.all(): + count = randint(1, 3) + authors = User.objects.filter(is_superuser=False).order_by("?")[:count] + for author in authors: + library.authors.add(author) + click.secho(f"...{author} assigned as {library} author", fg="green") def create_maintainers(): - click.echo("Adding library version maintainers...") + """Assigns 1-3 maintainers to each Library for the most recent version only""" + click.secho("Adding library version maintainers...") + version = Version.objects.most_recent() + for library in Library.objects.all(): + try: + library_version = LibraryVersion.objects.get( + version=version, library=library + ) + except LibraryVersion.DoesNotExist: + continue + + count = randint(1, 3) + maintainers = User.objects.filter(is_superuser=False).order_by("?")[:count] + for maintainer in maintainers: + library_version.maintainers.add(maintainer) + click.secho( + f"...{maintainer} assigned as {library_version} maintainer", fg="green" + ) -def create_pull_requests(count): - click.echo("Adding library pull requests...") +def create_pull_requests(): + """Creates 5-10 PRs for each library""" + click.secho("Adding library pull requests...") + for library in Library.objects.all(): + count = randint(5, 10) + titles = [ + fake.sentence(nb_words=4, variable_nb_words=True, ext_word_list=None) + for i in range(1, count) + ] + dates = [get_random_date() for i in range(1, count)] + numbers = [randint(5000, 9999) for i in range(1, count)] + is_open = [choice([True, False]) for i in range(1, count)] + objects = baker.make( + PullRequest, + _quantity=count, + library=library, + title=cycle(titles), + is_open=cycle(is_open), + created=cycle(dates), + number=cycle(numbers), + ) + click.secho( + f"...{len(objects)} pull requests created for {library}", fg="green" + ) -def create_issues(count): - click.echo("Adding library issues...") +def create_issues(): + """Creates 5-10 PRs for each library""" + click.secho("Adding library issues...") + for library in Library.objects.all(): + count = randint(5, 10) + titles = [ + fake.sentence(nb_words=4, variable_nb_words=True, ext_word_list=None) + for i in range(1, count) + ] + dates = [get_random_date() for i in range(1, count)] + numbers = [randint(5000, 9999) for i in range(1, count)] + is_open = [choice([True, False]) for i in range(1, count)] + objects = baker.make( + Issue, + _quantity=count, + library=library, + title=cycle(titles), + is_open=cycle(is_open), + created=cycle(dates), + number=cycle(numbers), + ) + click.secho(f"...{len(objects)} issues created for {library}", fg="green") -def create_categories(count): - click.echo("Adding categories...") +def create_categories(): + """Create categories using BOOST_CATEGORIES""" + objects = baker.make( + Category, _quantity=len(BOOST_CATEGORIES), name=cycle(BOOST_CATEGORIES) + ) + click.secho(f"...Created {len(objects)} categories", fg="green") + return objects def drop_all_records(): - click.echo("Dropping all records...") + """Drop every table""" + click.secho("Dropping all records...", fg="red") + + click.secho("Dropping Non-Superusers...", fg="red") + User.objects.filter(is_superuser=False).delete() + + click.secho("Dropping LibraryVersions...", fg="red") + LibraryVersion.objects.all().delete() + + click.secho("Dropping Versions...", fg="red") + Version.objects.all().delete() + + click.secho("Dropping Categories...", fg="red") + Category.objects.all().delete() + + click.secho("Dropping PullRequests...", fg="red") + PullRequest.objects.all().delete() + + click.secho("Dropping Issues...", fg="red") + Issue.objects.all().delete() + + click.secho("Dropping Libraries...", fg="red") + Library.objects.all().delete() + + +def get_dates(count=0): + """ + Returns a list of dates, in descending order, starting with today and + decrementing by 180 days + """ + if not count: + count = len(BOOST_VERSIONS) + + dates = [] + for i in range(count): + date = timezone.now().date() - timedelta(days=180 * i) + dates.append(date) + + return dates + + +def get_random_date(): + """Returns a date within the last 5 years""" + start_date = timezone.now() - timedelta(days=365 * 5) + return fake.date_between(start_date=start_date, end_date="today") From f33a9e76655a74ef162b1c3c352e1bb6ba7ae247 Mon Sep 17 00:00:00 2001 From: Lacey Williams Henschel Date: Thu, 16 Mar 2023 12:59:35 -0700 Subject: [PATCH 3/3] :books: Start some management command docs --- docs/commands.md | 111 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 docs/commands.md diff --git a/docs/commands.md b/docs/commands.md new file mode 100644 index 00000000..f6703ad1 --- /dev/null +++ b/docs/commands.md @@ -0,0 +1,111 @@ +# Management Commands + +## `create_sample_data` + +Running this command will populate the database with fake data for local development. + +When run, it will create fake objects for these models: + +- User +- Version +- Category +- Library +- LibraryVersion +- Authors for Libraries and Maintainers for LibraryVersions +- Issues and Pull Requests for Libraries + +The data generated is fake. Any links, information that looks like it comes from GitHub, email addresses, etc. is all fake. Some of it is made to look like realistic data. + +The following options can be used with the command: + +- `--all`: If True, run all methods including the drop command. + +If you don't want to drop all records for the above models and create a new set of fresh data, you can pass these options to clear your database or and create new records. + +- `--drop`: If True, drop all records in the database. +- `--users`: If True, create fake users. +- `--versions`: If True, create fake versions. +- `--categories`: If True, create fake categories. +- `--libraries`: If True, create fake libraries and assign them categories. +- `--library_versions`: If True or if both `--libraries` and `--versions` are True, create fake library versions. +- `--authors`: If True, add fake library authors. +- `--maintainers`: If True, add fake library version maintainers. +- `--prs`: If True, add fake library pull requests. +- `--issues`: If True, add fake library issues. + +### Example: Drop your database and create a new set of data + + ./manage.py create_sample_data --all + +Output: + + Dropping all records... + Dropping Non-Superusers... + Dropping LibraryVersions... + Dropping Versions... + Dropping Categories... + Dropping PullRequests... + Dropping Issues... + Dropping Libraries... + Creating users... + ...Created 100 users + Creating versions... + ...Created 10 versions + ...Created 10 categories + Creating libraries... + ...Created 17 versions + Assigning categories to libraries... + ...algorithm assigned the Assertions category + Creating library versions... + ...algorithm (1.81.0) created + Adding library authors... + ...ghtkeoqjao@example.com assigned as algorithm author + Adding library version maintainers... + ...hpztdsynsa@example.com assigned as algorithm (1.81.0) maintainer + Adding library pull requests... + ...6 pull requests created for algorithm + Adding library issues... + ...10 issues created for algorithm + + +### Example: Create new pull requests and issues for existing library objects + + ./manage.py create_sample_data --prs --issues + +Output: + + Adding library pull requests... + ...9 pull requests created for algorithm + ...7 pull requests created for asio + Adding library issues... + ...9 issues created for algorithm + ...10 issues created for asio + + +## `generate_fake_versions` + +Creates fake Version objects **only**, then creates LibraryVersion objects for each existing Library and the new Versions. + +### Example: + + ./manage.py generate_fake_versions + +Output: + + Version 1.30.0 created succcessfully + ---algorithm (1.30.0) created succcessfully + + +## `update_libraries` + +Runs the library update script, which cycles through the repos listed in the Boost library and syncs their information. + +Synced information: + +- Most library information comes from `meta/libraries.json` stored in each Boost library repo +- Library data and metadata from GitHub is saved to our database +- Categories are updated, if needed +- Library categories are updated, if need be. +- Issues and Pull Requests are synced + +**NOTE**: Can take upwards of a half hour to run. If you are trying to populate tables for local development, `create_sample_data` is a better option if the GitHub integrations aren't important.