import json from json.decoder import JSONDecodeError import requests import structlog from bs4 import BeautifulSoup from jsoncomment import JsonComment from django.conf import settings from core.asciidoc import convert_adoc_to_html from core.boostrenderer import get_file_data, get_s3_client, does_s3_key_exist from core.htmlhelper import modernize_release_notes from core.models import RenderedContent from .models import Version, VersionFile logger = structlog.get_logger(__name__) session = requests.Session() def get_archives_download_uris_for_release(release: str = "1.81.0") -> list: """Get the download information for a Boost release from the Boost Archives. Args: release (str): The Boost release to get download information for. Defaults to "1.81.0". Returns: list: A list of URLs to download the release data from. """ file_extensions = [".tar.bz2", ".tar.gz", ".7z", ".zip"] file_name_excludes = ["_rc"] if "beta" in release: release_path = f"{settings.ARCHIVES_URL}beta/{release}/source/" else: release_path = f"{settings.ARCHIVES_URL}release/{release}/source/" try: resp = session.get(release_path) resp.raise_for_status() except requests.exceptions.HTTPError as e: logger.error( "get_archives_releases_list_error", exc_msg=str(e), url=release_path ) raise # Get the list of archives downloads for this release soup = BeautifulSoup(resp.text, "html.parser") uris = [] for a in soup.find_all("a"): uri = a.get("href") # Only include the download links with valid file extensions. if any(uri.endswith(ext) for ext in file_extensions): # Exclude release candidates if not any(exclude in uri for exclude in file_name_excludes): uris.append(f"{release_path}{uri}") return uris def get_artifactory_download_uris_for_release(release: str = "1.81.0") -> list: """Get the download information for a Boost release from the Boost artifactory. Args: release (str): The Boost release to get download information for. Defaults to "1.81.0". Returns: list: A list of URLs to download the release data from. """ file_extensions = [".tar.bz2", ".tar.gz", ".7z", ".zip"] beta = False if "beta" in release: beta = True release_path = f"{settings.ARTIFACTORY_URL}beta/{release}/source/" else: release_path = f"{settings.ARTIFACTORY_URL}release/{release}/source/" try: resp = session.get(release_path) resp.raise_for_status() except requests.exceptions.HTTPError as e: logger.error( "get_artifactory_releases_list_error", exc_msg=str(e), url=release_path ) raise # Get the list of artifactory downloads for this release children = resp.json()["children"] base_uri = release_path.rstrip("/") uris = [] for child in children: uri = child["uri"] # The directory may include the release candidates and beta releases; skip those # unless this is a beta release if any( [ ("beta" in uri and not beta), ("rc" in uri), (uri.endswith(".json")), ] ): # go to next continue if any(uri.endswith(ext) for ext in file_extensions): uris.append(f"{base_uri}{uri}") return uris def get_archives_download_data(url): """Get the download information for a Boost release from the Boost Archives.""" # Append .json to the end of the URL to get the download information. json_url = f"{url}.json" try: resp = session.get(json_url) resp.raise_for_status() except requests.exceptions.HTTPError as e: logger.error("get_archives_download_data_error", exc_msg=str(e), url=json_url) raise try: # Parse the JSON response, which sometimes has trailing commas. json_parser = JsonComment(json) resp_json = json_parser.loads(resp.text) except JSONDecodeError: logger.error("get_archives_download_data_error", url=json_url) raise ValueError(f"Invalid response from {json_url}") return { "url": url, "operating_system": "Unix" if ".tar" in url else "Windows", "checksum": resp_json["sha256"], "display_name": url.split("/")[-1], } def get_artifactory_download_data(url): """Get the download information for a Boost release from the Boost artifactory.""" try: resp = session.get(url) resp.raise_for_status() except requests.exceptions.HTTPError as e: logger.error("get_artifactory_releases_detail_error", exc_msg=str(e), url=url) raise if "downloadUri" not in resp.json() or "checksums" not in resp.json(): logger.error("get_artifactory_releases_detail_error", url=url) raise ValueError(f"Invalid response from {url}") return { "url": resp.json().get("downloadUri"), "operating_system": "Unix" if ".tar" in url else "Windows", "checksum": resp.json()["checksums"]["sha256"], "display_name": url.split("/")[-1], } def get_release_notes_for_version_s3(version_pk): """Retrieve the adoc release notes from S3 and return the converted html string""" # TODO: this and the github function have duplication (including of this comment!), # and are not extensible if we encounter additional filename patterns in the # future; we should refactor. try: version = Version.objects.get(pk=version_pk) except Version.DoesNotExist: logger.info( "get_release_notes_for_version_s3_error_version_not_found", version_pk=version_pk, ) raise # get_content_from_s3 only works for keys with matching keys # in the STATIC_CONTENT_MAPPING. Use get_file_data directly instead. # Note we are using the non-beta slug since release notes for beta # versions are named without beta suffix. filename = version.non_beta_slug.replace("-", "_") s3_client = get_s3_client() bucket_name = settings.STATIC_CONTENT_BUCKET_NAME primary_key = f"release-notes/master/{filename}.adoc" fallback_key = f"release-notes/master/{filename.rsplit('_', 1)[0] + '_x'}.adoc" response = None if does_s3_key_exist(s3_client, bucket_name, primary_key): response = get_file_data(s3_client, bucket_name, primary_key) elif does_s3_key_exist(s3_client, bucket_name, fallback_key): response = get_file_data(s3_client, bucket_name, fallback_key) else: logger.info(f"no release notes found for {filename=}") return response["content"].decode() if response else "" def get_release_notes_for_version_github(version_pk): """Retrieve the release notes for a given version. We retrieve the rendered release notes for older versions. """ # TODO: this and the S3 function have duplication (including of this comment!), # and are not extensible if we encounter additional filename patterns in the # future; we should refactor. try: version = Version.objects.get(pk=version_pk) except Version.DoesNotExist: logger.info( "get_release_notes_for_version_error_version_not_found", version_pk=version_pk, ) raise base_url = ( "https://raw.githubusercontent.com/boostorg/website/master/users/history/" ) # Note we are using the non-beta slug since release notes for beta # versions are named without beta suffix. base_filename = ( f"{version.non_beta_slug.replace('boost', 'version').replace('-', '_')}" ) url = f"{base_url}{base_filename}.html" try: response = session.get(url) if response.status_code == 404: # Some beta release notes end in _x.html instead of _0.html; try that. fallback_filename = base_filename.rsplit("_", 1)[0] + "_x" fallback_url = f"{base_url}{fallback_filename}.html" response = session.get(fallback_url) response.raise_for_status() except requests.exceptions.HTTPError as e: logger.error( "get_release_notes_for_version_http_error", exc_msg=str(e), url=fallback_url if "fallback_url" in locals() else url, version_pk=version_pk, ) raise return response.content def get_release_notes_for_version(version_pk): """Get the release notes content. Tries S3 first, and fallback to old github release notes if not found in S3. """ content = get_release_notes_for_version_s3(version_pk) if content: processed_content = convert_adoc_to_html(content) content_type = "text/asciidoc" else: content = get_release_notes_for_version_github(version_pk) processed_content = process_release_notes(content) content_type = "text/html" return content, processed_content, content_type def get_in_progress_release_notes(): try: response = session.get(settings.RELEASE_NOTES_IN_PROGRESS_URL) response.raise_for_status() except requests.exceptions.HTTPError as e: logger.error( "get_in_progress_release_notes_error", exc_msg=str(e), url=settings.RELEASE_NOTES_IN_PROGRESS_URL, ) raise return response.content def process_release_notes(content): stripped_content = modernize_release_notes(content) return stripped_content def store_release_notes_for_version(version_pk): """Check S3 and then github for release notes and store them in RenderedContent.""" # Get the version try: version = Version.objects.get(pk=version_pk) except Version.DoesNotExist: logger.info( "store_release_notes_for_version_error_version_not_found", version_pk=version_pk, ) raise Version.DoesNotExist content, processed_content, content_type = get_release_notes_for_version(version_pk) # Save the result to the rendered content model with the version cache key rendered_content, _ = RenderedContent.objects.update_or_create( cache_key=version.release_notes_cache_key, defaults={ "content_type": content_type, "content_original": content, "content_html": processed_content, }, ) logger.info( "store_release_notes_for_version_success", rendered_content_pk=rendered_content.id, version_pk=version_pk, ) return rendered_content def store_release_notes_for_in_progress(): """Retrieve and store the release notes for a given version""" # Get the release notes content content = get_in_progress_release_notes() stripped_content = process_release_notes(content) # Save the result to the rendered content model with the key rendered_content, _ = RenderedContent.objects.update_or_create( cache_key=settings.RELEASE_NOTES_IN_PROGRESS_CACHE_KEY, defaults={ "content_type": "text/html", "content_original": content, "content_html": stripped_content, }, ) logger.info( "store_release_notes_in_progress_success", rendered_content_pk=rendered_content.id, ) return rendered_content def store_release_downloads_for_version(version, release_data): """Store the release download information for a Version instance. Args: version (Version): The Version instance to store the download information for. release_data (list): A list of dictionaries containing the download information for the release. Each dictionary contains the following keys: - url (str): The URL to download the release from. - operating_system (str): The operating system the release is for. - checksum (str): The sha256 checksum for the release. - display_name (str): The name of the release file. """ for data in release_data: VersionFile.objects.update_or_create( version=version, checksum=data["checksum"], defaults=dict( url=data["url"], operating_system=data["operating_system"], display_name=data["display_name"], ), )