diff --git a/docs/commands.md b/docs/commands.md new file mode 100644 index 00000000..f6703ad1 --- /dev/null +++ b/docs/commands.md @@ -0,0 +1,111 @@ +# Management Commands + +## `create_sample_data` + +Running this command will populate the database with fake data for local development. + +When run, it will create fake objects for these models: + +- User +- Version +- Category +- Library +- LibraryVersion +- Authors for Libraries and Maintainers for LibraryVersions +- Issues and Pull Requests for Libraries + +The data generated is fake. Any links, information that looks like it comes from GitHub, email addresses, etc. is all fake. Some of it is made to look like realistic data. + +The following options can be used with the command: + +- `--all`: If True, run all methods including the drop command. + +If you don't want to drop all records for the above models and create a new set of fresh data, you can pass these options to clear your database or and create new records. + +- `--drop`: If True, drop all records in the database. +- `--users`: If True, create fake users. +- `--versions`: If True, create fake versions. +- `--categories`: If True, create fake categories. +- `--libraries`: If True, create fake libraries and assign them categories. +- `--library_versions`: If True or if both `--libraries` and `--versions` are True, create fake library versions. +- `--authors`: If True, add fake library authors. +- `--maintainers`: If True, add fake library version maintainers. +- `--prs`: If True, add fake library pull requests. +- `--issues`: If True, add fake library issues. + +### Example: Drop your database and create a new set of data + + ./manage.py create_sample_data --all + +Output: + + Dropping all records... + Dropping Non-Superusers... + Dropping LibraryVersions... + Dropping Versions... + Dropping Categories... + Dropping PullRequests... + Dropping Issues... + Dropping Libraries... + Creating users... + ...Created 100 users + Creating versions... + ...Created 10 versions + ...Created 10 categories + Creating libraries... + ...Created 17 versions + Assigning categories to libraries... + ...algorithm assigned the Assertions category + Creating library versions... + ...algorithm (1.81.0) created + Adding library authors... + ...ghtkeoqjao@example.com assigned as algorithm author + Adding library version maintainers... + ...hpztdsynsa@example.com assigned as algorithm (1.81.0) maintainer + Adding library pull requests... + ...6 pull requests created for algorithm + Adding library issues... + ...10 issues created for algorithm + + +### Example: Create new pull requests and issues for existing library objects + + ./manage.py create_sample_data --prs --issues + +Output: + + Adding library pull requests... + ...9 pull requests created for algorithm + ...7 pull requests created for asio + Adding library issues... + ...9 issues created for algorithm + ...10 issues created for asio + + +## `generate_fake_versions` + +Creates fake Version objects **only**, then creates LibraryVersion objects for each existing Library and the new Versions. + +### Example: + + ./manage.py generate_fake_versions + +Output: + + Version 1.30.0 created succcessfully + ---algorithm (1.30.0) created succcessfully + + +## `update_libraries` + +Runs the library update script, which cycles through the repos listed in the Boost library and syncs their information. + +Synced information: + +- Most library information comes from `meta/libraries.json` stored in each Boost library repo +- Library data and metadata from GitHub is saved to our database +- Categories are updated, if needed +- Library categories are updated, if need be. +- Issues and Pull Requests are synced + +**NOTE**: Can take upwards of a half hour to run. If you are trying to populate tables for local development, `create_sample_data` is a better option if the GitHub integrations aren't important. diff --git a/libraries/management/commands/create_sample_data.py b/libraries/management/commands/create_sample_data.py new file mode 100644 index 00000000..1d82c33a --- /dev/null +++ b/libraries/management/commands/create_sample_data.py @@ -0,0 +1,371 @@ +import djclick as click +from datetime import timedelta +from faker import Faker +from itertools import cycle +from model_bakery import baker +from random import randint, choice + +from django.contrib.auth import get_user_model +from django.utils import timezone + +from libraries.models import Library, LibraryVersion, Category, PullRequest, Issue +from versions.models import Version + + +fake = Faker() +User = get_user_model() + +USER_COUNT = 100 + +BOOST_CATEGORIES = [ + "Algorithms", + "Assertions", + "Build", + "Collections", + "Concept Checking", + "Concurrency", + "Config", + "Conversion", + "Coroutines", + "DLL", +] + + +BOOST_LIBRARIES = [ + "algorithm", + "asio", + "assign", + "circular_buffer", + "date_time", + "filesystem", + "graph", + "iostreams", + "lexical_cast", + "math", + "program_options", + "regex", + "serialization", + "signals2", + "system", + "thread", + "uuid", +] + + +BOOST_VERSIONS = [ + "1.81.0", + "1.80.0", + "1.79.1", + "1.79.0", + "1.78.2", + "1.78.1", + "1.78.0", + "1.77.1", + "1.77.0", + "1.76.1", +] + + +@click.command() +@click.option("--all", is_flag=True) +@click.option("--drop", is_flag=True) +@click.option("--users", is_flag=True) +@click.option("--versions", is_flag=True) +@click.option("--categories", is_flag=True) +@click.option("--libraries", is_flag=True) +@click.option("--library_versions", is_flag=True) +@click.option("--authors", is_flag=True) +@click.option("--maintainers", is_flag=True) +@click.option("--prs", is_flag=True) +@click.option("--issues", is_flag=True) +def command( + all, + drop, + users, + versions, + libraries, + library_versions, + authors, + maintainers, + prs, + issues, + categories, +): + """ + Populate the database with fake data for local development. + + --all: If True, run all methods including the drop command. + --drop: If True, drop all records in the database. + --users: If True, create fake users. + --versions: If True, create fake versions. + --categories: If True, create fake categories. + --libraries: If True, create fake libraries and assign them categories. + --library_versions: If True or if both --libraries and --versions are True, create fake library versions. + --authors: If True, add fake library authors. + --maintainers: If True, add fake library version maintainers. + --prs: If True, add fake library pull requests. + --issues: If True, add fake library issues. + """ + + if all: + call_all() + return + + if drop: + drop_all_records() + + if users: + create_users(USER_COUNT) + + if versions: + create_versions() + + if categories: + create_categories() + + if libraries: + create_libraries() + assign_library_categories() + + if library_versions: + create_library_versions() + else: + if libraries and versions: + create_library_versions() + + if authors: + create_authors() + + if maintainers: + create_maintainers() + + if prs: + create_pull_requests() + + if issues: + create_issues() + + +def call_all(): + drop_all_records() + create_users(USER_COUNT) + create_versions() + create_categories() + create_libraries() + assign_library_categories() + create_library_versions() + create_authors() + create_maintainers() + create_pull_requests() + create_issues() + + +def create_users(count): + """Creates fake users""" + click.secho("Creating users...") + + first_names = [fake.first_name() for i in range(1, count)] + last_names = [fake.last_name() for i in range(1, count)] + + objects = baker.make( + User, + _quantity=count, + first_name=cycle(first_names), + last_name=cycle(last_names), + ) + click.secho(f"...Created {len(objects)} users", fg="green") + return objects + + +def create_versions(): + """ + Creates fake versions using the names in BOOST_VERSIONS, and sets + their release dates at every 180 days + """ + click.secho("Creating versions...") + release_dates = get_dates() + objects = baker.make( + Version, + _quantity=len(BOOST_VERSIONS), + name=cycle(BOOST_VERSIONS), + release_date=cycle(release_dates), + ) + click.secho(f"...Created {len(objects)} versions", fg="green") + return objects + + +def create_libraries(): + """Creates fake libraries using the names in BOOST_LIBRARIES""" + click.secho("Creating libraries...") + objects = baker.make( + Library, _quantity=len(BOOST_LIBRARIES), name=cycle(BOOST_LIBRARIES) + ) + click.secho(f"...Created {len(objects)} versions", fg="green") + return objects + + +def assign_library_categories(): + """Assigns 1-3 categories to each library""" + click.secho("Assigning categories to libraries...") + for library in Library.objects.all(): + if library.categories.count() > 1: + continue + + count = randint(1, 3) + categories = Category.objects.order_by("?")[:count] + for category in categories: + library.categories.add(category) + click.secho(f"...{library} assigned the {category} category", fg="green") + + +def create_library_versions(): + """Assigns a random number of versions to each library""" + click.secho("Creating library versions...") + for library in Library.objects.all(): + start_version = Version.objects.order_by("?").first() + for version in Version.objects.filter( + release_date__gt=start_version.release_date + ): + lib_version, created = LibraryVersion.objects.get_or_create( + library=library, version=version + ) + click.secho(f"...{lib_version} created", fg="green") + + +def create_authors(): + """Assigns 1-3 authors to each library""" + click.secho("Adding library authors...") + for library in Library.objects.all(): + count = randint(1, 3) + authors = User.objects.filter(is_superuser=False).order_by("?")[:count] + for author in authors: + library.authors.add(author) + click.secho(f"...{author} assigned as {library} author", fg="green") + + +def create_maintainers(): + """Assigns 1-3 maintainers to each Library for the most recent version only""" + click.secho("Adding library version maintainers...") + version = Version.objects.most_recent() + for library in Library.objects.all(): + try: + library_version = LibraryVersion.objects.get( + version=version, library=library + ) + except LibraryVersion.DoesNotExist: + continue + + count = randint(1, 3) + maintainers = User.objects.filter(is_superuser=False).order_by("?")[:count] + for maintainer in maintainers: + library_version.maintainers.add(maintainer) + click.secho( + f"...{maintainer} assigned as {library_version} maintainer", fg="green" + ) + + +def create_pull_requests(): + """Creates 5-10 PRs for each library""" + click.secho("Adding library pull requests...") + for library in Library.objects.all(): + count = randint(5, 10) + titles = [ + fake.sentence(nb_words=4, variable_nb_words=True, ext_word_list=None) + for i in range(1, count) + ] + dates = [get_random_date() for i in range(1, count)] + numbers = [randint(5000, 9999) for i in range(1, count)] + is_open = [choice([True, False]) for i in range(1, count)] + objects = baker.make( + PullRequest, + _quantity=count, + library=library, + title=cycle(titles), + is_open=cycle(is_open), + created=cycle(dates), + number=cycle(numbers), + ) + click.secho( + f"...{len(objects)} pull requests created for {library}", fg="green" + ) + + +def create_issues(): + """Creates 5-10 PRs for each library""" + click.secho("Adding library issues...") + for library in Library.objects.all(): + count = randint(5, 10) + titles = [ + fake.sentence(nb_words=4, variable_nb_words=True, ext_word_list=None) + for i in range(1, count) + ] + dates = [get_random_date() for i in range(1, count)] + numbers = [randint(5000, 9999) for i in range(1, count)] + is_open = [choice([True, False]) for i in range(1, count)] + objects = baker.make( + Issue, + _quantity=count, + library=library, + title=cycle(titles), + is_open=cycle(is_open), + created=cycle(dates), + number=cycle(numbers), + ) + click.secho(f"...{len(objects)} issues created for {library}", fg="green") + + +def create_categories(): + """Create categories using BOOST_CATEGORIES""" + objects = baker.make( + Category, _quantity=len(BOOST_CATEGORIES), name=cycle(BOOST_CATEGORIES) + ) + click.secho(f"...Created {len(objects)} categories", fg="green") + return objects + + +def drop_all_records(): + """Drop every table""" + click.secho("Dropping all records...", fg="red") + + click.secho("Dropping Non-Superusers...", fg="red") + User.objects.filter(is_superuser=False).delete() + + click.secho("Dropping LibraryVersions...", fg="red") + LibraryVersion.objects.all().delete() + + click.secho("Dropping Versions...", fg="red") + Version.objects.all().delete() + + click.secho("Dropping Categories...", fg="red") + Category.objects.all().delete() + + click.secho("Dropping PullRequests...", fg="red") + PullRequest.objects.all().delete() + + click.secho("Dropping Issues...", fg="red") + Issue.objects.all().delete() + + click.secho("Dropping Libraries...", fg="red") + Library.objects.all().delete() + + +def get_dates(count=0): + """ + Returns a list of dates, in descending order, starting with today and + decrementing by 180 days + """ + if not count: + count = len(BOOST_VERSIONS) + + dates = [] + for i in range(count): + date = timezone.now().date() - timedelta(days=180 * i) + dates.append(date) + + return dates + + +def get_random_date(): + """Returns a date within the last 5 years""" + start_date = timezone.now() - timedelta(days=365 * 5) + return fake.date_between(start_date=start_date, end_date="today")