Merge pull request #188 from cppalliance/153-cleanup

Static content logging, docs, exception handling
2026-02-26 17:22:09 +00:00 · 2023-04-19 09:25:04 -07:00
parent 1af815d0f2 8d980068db
commit 9b86150f76
5 changed files with 163 additions and 9 deletions
--- a/README.md
+++ b/README.md
@@ -62,13 +62,7 @@ Clone the content repo to your local machine, at the same level as this repo: ht

 ## Environment Variables 

-### `GITHUB_TOKEN`
-
-[Generate a new personal access token](https://github.com/settings/tokens) and replace the value for `GITHUB_TOKEN` in your `.env` file in order to connect to certain parts of the GitHub API. 
-
-### `ENVIRONMENT_NAME`
-
-Optional. Set a name for local development that will display in a banner in the Django Admin. 
+See [Environment Variables](docs/env_vars.md) for more information on environment variables. 

 ## Running the tests

--- a/core/boostrenderer.py
+++ b/core/boostrenderer.py
@@ -3,6 +3,7 @@ from botocore.exceptions import ClientError
 import json
 import os
 import re
+import structlog

 from django.conf import settings

@@ -13,13 +14,21 @@ from pygments.styles import get_style_by_name as get_style
 from pygments.lexers import get_lexer_by_name as get_lexer, guess_lexer
 from pygments.formatters.html import HtmlFormatter

+logger = structlog.get_logger()
+

 def get_content_from_s3(key=None, bucket_name=None):
    """
    Get content from S3. Returns the decoded file contents if able
    """
    if not key:
-        raise
+        logger.info(
+            "get_content_from_s3_no_key_provided",
+            key=key,
+            bucket_name=bucket_name,
+            function_name="get_content_from_s3",
+        )
+        raise ValueError("No key provided.")

    if not bucket_name:
        bucket_name = settings.STATIC_CONTENT_BUCKET_NAME
@@ -41,9 +50,24 @@ def get_content_from_s3(key=None, bucket_name=None):
            response = client.get_object(Bucket=bucket_name, Key=s3_key.lstrip("/"))
            file_content = response["Body"].read()
            content_type = response["ContentType"]
+            logger.info(
+                "get_content_from_s3_success",
+                key=key,
+                bucket_name=bucket_name,
+                s3_key=s3_key,
+                function_name="get_content_from_s3",
+            )
            return file_content, content_type
        except ClientError as e:
            # Log the error and continue with the next key in the list
+            logger.exception(
+                "get_content_from_s3_error",
+                key=key,
+                bucket_name=bucket_name,
+                s3_key=s3_key,
+                error=str(e),
+                function_name="get_content_from_s3",
+            )
            pass

        # Handle URLs that are directories looking for `index.html` files
@@ -57,9 +81,23 @@ def get_content_from_s3(key=None, bucket_name=None):
                return file_content, content_type
            except ClientError as e:
                # Log the error and continue with the next key in the list
+                logger.exception(
+                    "get_content+from_s3_client_error",
+                    key=key,
+                    bucket_name=bucket_name,
+                    s3_key=s3_key,
+                    error=str(e),
+                    function_name="get_content_from_s3",
+                )
                pass

    # Return None if no valid object is found
+    logger.info(
+        "get_content_from_s3_no_valid_object",
+        key=key,
+        bucket_name=bucket_name,
+        function_name="get_content_from_s3",
+    )
    return None


--- a/core/views.py
+++ b/core/views.py
@@ -1,5 +1,6 @@
 import os.path
 import re
+import structlog

 from django.conf import settings
 from django.http import Http404, HttpResponse
@@ -9,6 +10,8 @@ from django.views.generic import TemplateView, View
 from .boostrenderer import get_content_from_s3
 from .markdown import process_md

+logger = structlog.get_logger()
+

 class MarkdownTemplateView(TemplateView):
    template_name = "markdown_template.html"
@@ -60,13 +63,30 @@ class MarkdownTemplateView(TemplateView):

        # Avoids a TypeError from os.path.isfile if there is no path
        if not path:
+            logger.info(
+                "markdown_template_view_no_valid_path",
+                content_path=kwargs.get("content_path"),
+                status_code=404,
+            )
            raise Http404("Page not found")

        if not os.path.isfile(path):
+            logger.info(
+                "markdown_template_view_no_valid_file",
+                content_path=kwargs.get("content_path"),
+                path=path,
+                status_code=404,
+            )
            raise Http404("Post not found")

        context = {}
        context["frontmatter"], context["content"] = process_md(path)
+        logger.info(
+            "markdown_template_view_success",
+            content_path=kwargs.get("content_path"),
+            path=path,
+            status_code=200,
+        )
        return self.render_to_response(context)


@@ -76,12 +96,21 @@ class StaticContentTemplateView(View):
        Verifies the file and returns the raw static content from S3
        mangling paths using the stage_static_config.json settings
        """
-        print(kwargs.get("content_path"))
        result = get_content_from_s3(key=kwargs.get("content_path"))
        if not result:
+            logger.info(
+                "get_content_from_s3_view_no_valid_object",
+                key=kwargs.get("content_path"),
+                status_code=404,
+            )
            raise Http404("Page not found")

        content, content_type = result

        response = HttpResponse(content, content_type=content_type)
+        logger.info(
+            "get_content_from_s3_view_success",
+            key=kwargs.get("content_path"),
+            status_code=response.status_code,
+        )
        return response
--- a/docs/env_vars.md
+++ b/docs/env_vars.md
@@ -0,0 +1,37 @@
+# Environment Variables
+
+This project uses environment variables to configure certain aspects of the application. 
+
+## `GITHUB_TOKEN`
+
+- Used to authenticate with the GitHub API when making requests. 
+- For **local development**, you should set this variable to a valid personal access token that has the necessary permissions to access the relevant repositories. [Generate a new personal access token](https://github.com/settings/tokens) and replace the value for `GITHUB_TOKEN` in your `.env` file in order to connect to certain parts of the GitHub API. 
+- In **deployed environments**, this should be set to a valid access token associated with the GitHub organization. Edit `kube/boost/values.yaml` (or the environment-specific yaml file) to change this value. 
+
+
+## `ENVIRONMENT_NAME`
+
+- Used to indicate the name of the environment where the application is running. 
+- For **local development**, set this to whatever you want. 
+- In **deployed environments**, change the value by editing `kube/boost/values.yaml` (or the environment-specific yaml file). 
+
+
+## `STATIC_CONTENT_AWS_ACCESS_KEY_ID`
+
+- Used to authenticate with the Amazon Web Services (AWS) API when accessing static content from a specified bucket. 
+- For **local development**, obtain valid value from the Boost team. 
+- In **deployed environments**, the valid value is set as a kube secret and is defined in `kube/boost/values.yaml` (or the environment-specific yaml file).
+
+
+## `STATIC_CONTENT_AWS_SECRET_ACCESS_KEY`
+
+- Used to authenticate with the Amazon Web Services (AWS) API when accessing static content from a specified bucket. 
+- For **local development**, obtain valid value from the Boost team. 
+- In **deployed environments**, the valid value is set as a kube secret and is defined in `kube/boost/values.yaml` (or the environment-specific yaml file).
+
+
+## `STATIC_CONTENT_BUCKET_NAME`
+
+- Specifies the name of the Amazon S3 bucket where static content is stored
+- For **local development**, obtain valid value from the Boost team. 
+- In **deployed environments**, the valid value is set in `kube/boost/values.yaml` (or the environment-specific yaml file).
--- a/docs/static_content.md
+++ b/docs/static_content.md
@@ -0,0 +1,56 @@
+# Retrieving Static Content from the Boost Amazon S3 Bucket 
+
+The `StaticContentTemplateView` class (in the `core/` app) is a Django view that handles requests for static content. 
+
+Its URL path is the very last path in our list of URL patterns (see `config/urls.py`) because it functions as the fallback URL pattern. If a user enters a URL that doesn't match anything else defined in our URL patterns, this view will attempt to retrieve the request as static content from S3 using the URL path.
+
+The `StaticContentTemplateView` calls S3 using the URL pattern and generates a list of potential keys to check. It then checks the specified S3 bucket for each of those keys and returns the first match it finds, along with the file content type. Passing the content type with the bucket contents allows the content to be delivered appropriately to the user (so HTML files will be rendered as HTML, etc.)
+
+Boost uses the AWS SDK for Python (boto3) to connect to an S3 bucket and retrieve the static content. If no bucket name is provided, pur process uses the `STATIC_CONTENT_BUCKET_NAME` setting from the Django project settings.
+
+## How we decide which S3 keys to try 
+
+The JSON config file `{env}_static_config.json` is used to map site paths to corresponding paths in the Amazon S3 bucket where static content is stored. This mapping is used to create shortcuts to static content files in the S3 bucket, which can be accessed using URLs that correspond to the site paths.
+
+The `{env}_static_config.json` file is a list of objects, where each object represents a mapping between a site path and an S3 path. Each object has two properties: `site_path` and `s3_path`.
+
+The `site_path` property is the path to the static content file as it appears in the web application. For example, if the static content file is located at `/static/js/main.js`, the site path would be `/static/js/`.
+
+The `s3_path` property is the path to the static content file in the S3 bucket. For example, if the S3 bucket is named `my-bucket` and the static content file is located at `my-bucket/site/develop/js/main.js`, the S3 path would be `/site/develop/js/`.
+
+When a request is made to a URL that corresponds to a site path, the get_content_from_s3() function looks up the site path in the `{env}_static_config.json` file to find the corresponding S3 path. It then uses the S3 path to retrieve the static content from the S3 bucket.
+
+Take a look at this sample `{env}_static_config.json` file: 
+
+```javascript
+[
+    {
+        "site_path": "/develop/libs/",
+        "s3_path": "/site/develop/libs/"
+    },
+    {
+        "site_path": "/develop/doc/",
+        "s3_path": "/site/develop/doc/html/"
+    },
+    {
+        "site_path": "/",
+        "s3_path": "/site/develop/"
+    }
+]
+```
+
+**Example 1**: If the URL request is for `/develop/libs/index.html`, the S3 keys that we would try are:
+
+- `/site/develop/libs/index.html`
+  
+**Example 2**: If the URL request is for `/develop/doc/index.html`, the S3 keys that the function would try are:
+
+- `/site/develop/doc/html/index.html`
+- `/site/develop/doc/index.html`
+  
+**Example 3**: If the URL request is for `/index.html`, the S3 keys that the function would try are:
+
+- `/site/develop/index.html`
+- `/site/index.html`
+
+We first try to retrieve the static content using the exact S3 key specified in the site-to-S3 mapping. If we can't find the content using that key, we will try alternative S3 keys based on the `site_path` and `s3_path` properties in the `{env}_static_config.json` file.