Local mailing list db import (#1844)

This commit is contained in:
Sam Darwin
2025-07-15 14:19:40 -06:00
committed by GitHub
parent 0c4efe213d
commit b462936ebf
3 changed files with 129 additions and 22 deletions

View File

@@ -74,3 +74,7 @@ DEBUG_TOOLBAR=True
# for production database dumps on Google Cloud Storage
PROD_DB_DUMP_URL=gs://boostbackups/db1/daily/
PROD_DB_DUMP_FILE_WILDCARD=boost_production.db1*
PROD_LISTS_CORE_DB_DUMP_URL=gs://boostbackups/db1/daily/
PROD_LISTS_CORE_DB_DUMP_FILE_WILDCARD=lists_production_core.db1*
PROD_LISTS_WEB_DB_DUMP_URL=gs://boostbackups/db1/daily/
PROD_LISTS_WEB_DB_DUMP_FILE_WILDCARD=lists_production_web.db1*

View File

@@ -15,9 +15,9 @@ prereqsoption="yes"
# docker_mode either "native" or "desktop" (Docker Desktop). Only support "native" currently.
docker_mode="native"
# the 'just' install can't be run as root. Switch to 'standard_user' for that:
standard_user="ubuntu"
standard_user="${SHELL_USER:-ubuntu}"
# On Linux, there are two ways to run Docker. Either the standard native docker installation, or Docker Desktop, which runs inside a virtual machine. The most common installation is standard docker, so that is what is supported by this script currently. In the future, Docker Desktop support could be added. Each method has pros and cons. It's important that the user inside the Django containers is the same as the user on the host machine outside the containers, so that file ownership matches up. Since the user is 'root' inside the containers, it should be 'root' on the host machine. Therefore, any development work should be done as 'root'. That means, run 'sudo su -' before using docker-compose. Docker Desktop would be an alternative to that requirement, and allow running as a regular user account. But with some downside, that it is not a typical linux Docker installation as found on servers.
# On Linux, there are two ways to run Docker. Either the standard native docker installation, or Docker Desktop, which runs inside a virtual machine. The most common installation is standard docker, so that is what is supported by this script currently. In the future, Docker Desktop support could be added. Each method has pros and cons. It's important that the user inside the Django containers is the same as the user on the host machine outside the containers, so that file ownership matches up. Since the user is 'root' inside the containers, it should be 'root' on the host machine. Therefore, any development work should be done as 'root'. That means, run 'sudo su -' before using docker-compose. Docker Desktop would be an alternative to that requirement, and allow running as a regular user account. But with some downside, that it is not a typical linux Docker installation as found on servers.
if [[ ${docker_mode} == "native" ]]; then
repo_path_base="/opt/github"
@@ -27,6 +27,12 @@ if [[ ${docker_mode} == "native" ]]; then
fi
completion_message_1="When doing development work, always switch to the root user, cd to that directory location, and run 'docker compose up -d'. You should be root when running docker compose."
shell_initialization_file=/root/.bashrc
if id "$standard_user" >/dev/null 2>&1; then
true
else
echo "The script needs to be informed about a standard non-root user for certain commands. Those can be discovered by listing 'ls -al /home'. Then please run 'export SHELL_USER=__'. Exiting."
exit 1
fi
fi
if [[ ${docker_mode} == "desktop" ]]; then
repo_path_base="${HOME}/github"
@@ -156,14 +162,22 @@ fi
if [[ "$prereqsoption" == "yes" ]]; then
# sudo apt-get update
sudo apt-get update
if ! cargo --version &> /dev/null
then
echo "Installing cargo"
sudo apt-get install -y cargo
fi
x="\$nrconf{restart} = 'a';"
mkdir -p /etc/needrestart/conf.d
echo "$x" | sudo tee /etc/needrestart/conf.d/90-autorestart.conf 1>/dev/null
if ! command -v makedeb &> /dev/null
then
echo "Installing makdeb"
MAKEDEB_RELEASE=makedeb bash -ci "$(wget -qO - 'https://shlink.makedeb.org/install')"
su - "${standard_user}" -c "export MAKEDEB_RELEASE=makedeb && bash -ci $(wget -qO - 'https://shlink.makedeb.org/install')"
# Or, an alternate method:
# wget -qO - 'https://proget.makedeb.org/debian-feeds/makedeb.pub' | gpg --dearmor | sudo tee /usr/share/keyrings/makedeb-archive-keyring.gpg 1> /dev/null
# echo 'deb [signed-by=/usr/share/keyrings/makedeb-archive-keyring.gpg arch=all] https://proget.makedeb.org/ makedeb main' | sudo tee /etc/apt/sources.list.d/makedeb.list
@@ -176,6 +190,19 @@ if [[ "$prereqsoption" == "yes" ]]; then
sudo apt-get install -y git
fi
if ! command -v psql &> /dev/null
then
apt install -y postgresql-client-16
fi
if ! command -v gcloud &> /dev/null
then
echo "Installing gcloud"
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | sudo tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
apt-get update && apt-get install -y google-cloud-cli
fi
if ! command -v python3 &> /dev/null
then
echo "Installing python3"

View File

@@ -1,6 +1,50 @@
#!/bin/bash
set -eu
# Import Production Data Locally
# Instructions
#
# 1. Install prerequisites (Docker, Just, etc), either manually or using ./scripts/dev-bootstrap-macos.sh
# 2. Run this script with --help to see options. ./scripts/load_production_data.sh --help
#
#
# READ IN COMMAND-LINE OPTIONS
TEMP=$(getopt -o h:: --long help::,lists::,only-lists:: -- "$@")
eval set -- "$TEMP"
# extract options and their arguments into variables.
while true ; do
case "$1" in
-h|--help)
helpmessage="""
usage: load_production_data.sh [-h] [--lists] [--only-lists]
Load production data. By default this will import the main website database.
optional arguments:
-h, --help Show this help message and exit
--lists Import mailing list dbs also.
--only-lists Import mailing list database and not the main web database.
"""
echo ""
echo "$helpmessage" ;
echo ""
exit 0
;;
--lists)
lists_option="yes" ; shift 2 ;;
--only-lists)
lists_option="yes" ; skip_web_option="yes" ; shift 2 ;;
--) shift ; break ;;
*) echo "Internal error!" ; exit 1 ;;
esac
done
[ -f ".env" ] || { echo "Error: .env file not found"; exit 1; }
# shellcheck disable=SC1091
source .env
download_media_file() {
@@ -22,7 +66,7 @@ download_media_file() {
return 1;
}
local media_temp_dir=$(mktemp -d)
local -r media_temp_dir=$(mktemp -d)
trap 'rm -rf "$media_temp_dir"' RETURN
echo "Downloading all media files from bucket: $PROD_MEDIA_CONTENT_BUCKET_NAME to: $media_temp_dir"
@@ -53,35 +97,56 @@ download_media_file() {
}
download_latest_db_dump() {
if [ "$1" = "web_db" ]; then
DB_URL="PROD_DB_DUMP_URL"
DB_WILDCARD="PROD_DB_DUMP_FILE_WILDCARD"
DB_NAME=$(grep PGDATABASE .env | cut -d= -f2)
DB_USER=$(grep PGUSER .env | cut -d= -f2)
elif [ "$1" = "lists_core_db" ]; then
DB_URL="PROD_LISTS_CORE_DB_DUMP_URL"
DB_WILDCARD="PROD_LISTS_CORE_DB_DUMP_FILE_WILDCARD"
DB_NAME="lists_production_core"
DB_USER=$(grep PGUSER .env | cut -d= -f2)
elif [ "$1" = "lists_web_db" ]; then
DB_URL="PROD_LISTS_WEB_DB_DUMP_URL"
DB_WILDCARD="PROD_LISTS_WEB_DB_DUMP_FILE_WILDCARD"
DB_NAME="lists_production_web"
DB_USER=$(grep PGUSER .env | cut -d= -f2)
else
echo "Type of db dump not specified. Exiting"
exit 1
fi
# download the latest database dump and restore it to the db
[ -z "$PROD_DB_DUMP_URL" ] && {
echo "Error: PROD_DB_DUMP_URL not set in .env";
[ -z "${!DB_URL}" ] && {
echo "Error: ${!DB_URL} not set in .env";
return 1;
}
[ -z "$PROD_DB_DUMP_FILE_WILDCARD" ] && {
echo "Error: PROD_DB_DUMP_FILE_WILDCARD not set in .env";
[ -z "${!DB_WILDCARD}" ] && {
echo "Error: ${!DB_WILDCARD} not set in .env";
return 1;
}
local db_temp_dir=$(mktemp -d)
local -r db_temp_dir=$(mktemp -d)
echo "db_temp_dir is $db_temp_dir"
trap 'rm -rf "$db_temp_dir"' RETURN
local dump_file_path=""
# not used: local dump_file_path=""
echo "Finding latest database dump..."
# Get a list of all dump files
gcloud storage ls "$PROD_DB_DUMP_URL$PROD_DB_DUMP_FILE_WILDCARD" > "$db_temp_dir/all_files.txt" || {
echo "Failed to list files at $PROD_DB_DUMP_URL";
gcloud storage ls "${!DB_URL}${!DB_WILDCARD}" > "$db_temp_dir/all_files.txt" || {
echo "Failed to list files at ${!DB_URL}";
return 1;
}
[ -s "$db_temp_dir/all_files.txt" ] || {
echo "No files found at $PROD_DB_DUMP_URL";
echo "No files found at ${!DB_URL}";
return 1;
}
grep "\.dump$" "$db_temp_dir/all_files.txt" | sort -r > "$db_temp_dir/dump_files.txt"
[ -s "$db_temp_dir/dump_files.txt" ] || {
echo "No .dump files found at $PROD_DB_DUMP_URL";
echo "No .dump files found at ${!DB_URL}";
return 1;
}
@@ -98,8 +163,6 @@ download_latest_db_dump() {
echo "Successfully downloaded database dump: $DUMP_FILENAME"
echo "Restoring database..."
DB_NAME=$(grep PGDATABASE .env | cut -d= -f2)
DB_USER=$(grep PGUSER .env | cut -d= -f2)
echo "Using database: $DB_NAME and user: $DB_USER"
echo "Stopping all services..."
@@ -117,7 +180,7 @@ download_latest_db_dump() {
docker compose exec db bash -c "psql -U $DB_USER -d template1 -c \"CREATE DATABASE $DB_NAME;\""
echo "Restoring database from dump..."
docker compose cp "$db_temp_dir/$DUMP_FILENAME" "db:/tmp/$DUMP_FILENAME"
docker compose exec db bash -c "pg_restore -U $DB_USER -d $DB_NAME -v --no-owner --no-privileges /tmp/$DUMP_FILENAME" || true
docker compose exec db bash -c "pg_restore -U $DB_USER -d $DB_NAME -v --no-owner --no-privileges /tmp/$DUMP_FILENAME"
# apply any migrations newer than our dumped database
docker compose exec web bash -c "./manage.py migrate"
# update the database to delete all rows from socialaccount_social app, which need to be configured differently locally
@@ -130,10 +193,23 @@ download_latest_db_dump() {
return 0
}
download_latest_db_dump || {
echo "Failed to download and restore latest database dump";
exit 1;
}
if [ "${skip_web_option:-}" != "yes" ]; then
download_latest_db_dump web_db || {
echo "Failed to download and restore latest database dump";
exit 1;
}
fi
if [ "${lists_option:-}" = "yes" ]; then
download_latest_db_dump lists_web_db || {
echo "Failed to download and restore latest lists_web_db dump";
exit 1;
}
download_latest_db_dump lists_core_db || {
echo "Failed to download and restore latest lists_core_db dump";
exit 1;
}
fi
download_media_file || {
echo "Failed to download media files from bucket"