#!/usr/bin/env -S python3 -u

# Copy files from jfrog to the local machine on frequent cron schedule.
#
# Instructions
#
# This scripts reads a file /tmp/boostarchivesinfo/filelist.txt
#
# Each line of filelist.txt should contain a path to an artifactory file in this format:
# main/release/1.84.0/source/boost_1_84_0.tar.bz2
# main/release/1.84.0/source/boost_1_84_0.tar.bz2.json
#
# install dotenv:
# pip3 install python-dotenv
#
# install rclone:
# wget https://downloads.rclone.org/v1.66.0/rclone-v1.66.0-linux-amd64.deb; dpkg -i rclone-v1.66.0-linux-amd64.deb
#
# create ${HOME}/.config/rclone/rclone.conf
# [remote1]
# type = s3
# provider = AWS
# env_auth = true
# region = us-east-2
#
# create ${HOME}/.aws/credentials and config, with production profile
#
# Create a .env file in the same directory. Example:
# JFROG_URL="https://boostorg.jfrog.io/artifactory/"
# JFROG_USERNAME="_"
# JFROG_PASSWORD="_"
#
# Add a per-minute cron task:
#
# * * * * * ${HOME}/scripts/jfrog-file-sync.py > /tmp/jfrog-file-sync-output.txt 2>&1
#
# Run either s3-file-sync.py or jfrog-file-sync.py, but not both
#

import os
import subprocess
import pathlib
import re
from dotenv import load_dotenv

load_dotenv()

upload_to_s3 = True
s3_archives_bucket = "boost-archives"
debug = 1
source_file_lists = ["/tmp/boostarchivesinfo/filelist.txt", "/tmp/boostarchivesinfo/vsbinaries_filelist.txt"]
local_copy_of_archives = "/drive2/boostorg"
jfrog_executable = "/usr/bin/jfrog"

JFROG_URL = os.getenv("JFROG_URL")
JFROG_USERNAME = os.getenv("JFROG_USERNAME")
JFROG_PASSWORD = os.getenv("JFROG_PASSWORD")

os.chdir(local_copy_of_archives)

# check hostname
result = subprocess.run(["hostname", "-f"], check=True, capture_output=True, text=True)
hostname = result.stdout.strip()

for source_file_list in source_file_lists:
    if not os.path.isfile(jfrog_executable):
        if debug > 0:
            print("The jfrog executable is missing in jfrog-file-sync.py. Exiting.")
        exit(1)
    
    if not os.path.isdir(os.path.dirname(source_file_list)):
        if debug > 0:
            print(
                "The directory of source_file_list is missing. Perhaps there are no files to sync right now. Exiting."
            )
        continue
    
    if not os.path.isfile(source_file_list):
        if debug > 0:
            print(
                "The source_file_list is missing. Perhaps there are no files to sync right now. Exiting."
            )
        continue
    
    with open(source_file_list, "r") as f:
        # data = f.readlines()
        data = f.read().splitlines()
        # Remove/clean-up source_file_list so it won't be processed next time
        pathlib.Path(source_file_list).unlink()
        for file in data:
            file = file.strip()
            # Sanitize file. It exists, matches ordinary characters, doesn't contain two dots "..", doesn't start with absolute path "/"
            if (
                file
                and re.match("^[a-zA-Z0-9_/.-]+$", file)
                and not re.match(r"\.\.", file)
                and not re.match("^/", file)
            ):
                # Example download:
                # cd /drive2/boostorg && /usr/bin/jfrog rt download --url=https://boostorg.jfrog.io/artifactory/ --user= --password= --detailed-summary --flat=false --recursive=true  main/ "./" > /tmp/jfrog-all.log 2>&1
                result = subprocess.run(
                    [
                        jfrog_executable,
                        "rt",
                        "download",
                        f"--url={JFROG_URL}",
                        f"--user={JFROG_USERNAME}",
                        f"--password={JFROG_PASSWORD}",
                        "--detailed-summary",
                        "--flat=false",
                        "--recursive=true",
                        file,
                        "./",
                    ],
                    check=True,
                    capture_output=True,
                    text=True,
                )
                if debug > 0:
                    print(result)
    
                if upload_to_s3 and hostname == "brorigin1.cpp.al":
                    archivePathLocal = re.sub("^main/", "", file)
                    archivePathRemote = (
                        "remote1:" + s3_archives_bucket + "/" + archivePathLocal
                    )
                    result = subprocess.run(
                        "export AWS_PROFILE=%s;rclone -v --s3-no-check-bucket copyto --checksum %s %s"
                        % ("production", archivePathLocal, archivePathRemote),
                        check=True,
                        shell=True,
                        text=True,
                    )
    
                    if debug > 0:
                        print(result)