Skip to content

Commit

Permalink
Changes required to deploy the image using MWAA
Browse files Browse the repository at this point in the history
These open source Docker images will be used both externally by our
customers willing to experiment with the images in native Docker and
internally within an Amazon MWAA setup (which relies on Fargate.) This
commit involves multiple small changes to make this possible:

- Introduced a `/healthcheck.sh` script which is used by Fargate to
  monitor health status. This script currently always return success
  status (0 code) just to make the integration possible. In the future,
  we need to:
  - Improve this script to do some real checks.
  - Move this script to a better location (scripts shouldn't be placed
    at the root.)
- Supported reading database credentials from a JSON-formatted
  environment variable, `MWAA__DB__CREDENTIALS`, containing the username
  and password. This is needed because Amazon MWAA employs Secrets
  Manager to pass the credentials safely to the Fargate container in a
  JSON-formatted object.

During the work on this, I temporarily downgraded the Airflow version to
2.7.2 since this a version we internally support, which should make the
testing easier.
  • Loading branch information
rafidka committed Apr 22, 2024
1 parent cc4c642 commit f8c1a93
Show file tree
Hide file tree
Showing 12 changed files with 119 additions and 20 deletions.
11 changes: 10 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@
"**/Thumbs.db": true,
"**/venv": true
},
"files.watcherExclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"search.exclude": {
"**/.DS_Store": true,
"**/.conda": true,
Expand All @@ -17,5 +26,5 @@
"**/Thumbs.db": true,
"**/venv": true
},
"python.defaultInterpreterPath": "./venv/bin/python"
"python.defaultInterpreterPath": "./.venv/bin/python"
}
11 changes: 10 additions & 1 deletion images/airflow/2.8.0/.vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@
"**/Thumbs.db": true,
"**/venv": true
},
"files.watcherExclude": {
"**/.DS_Store": true,
"**/.conda": true,
"**/.git": true,
"**/.ruff_cache": true,
"**/.venv": true,
"**/Thumbs.db": true,
"**/venv": true
},
"search.exclude": {
"**/.DS_Store": true,
"**/.conda": true,
Expand All @@ -17,5 +26,5 @@
"**/Thumbs.db": true,
"**/venv": true
},
"python.defaultInterpreterPath": "./venv/bin/python"
"python.defaultInterpreterPath": "./.venv/bin/python"
}
15 changes: 12 additions & 3 deletions images/airflow/2.8.0/Dockerfile.base.j2
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
FROM public.ecr.aws/amazonlinux/amazonlinux:2023

# Environment variables
ENV AIRFLOW_AMAZON_PROVIDERS_VERSION=8.13.0
ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.8.0/constraints-3.11.txt"

# Temporarily downgrading to 2.7.2 to make it easier to test the Docker image
# within Amazon MWAA since 2.7.2 is a version we support.
ENV AIRFLOW_VERSION=2.7.2
ENV AIRFLOW_AMAZON_PROVIDERS_VERSION=8.7.1

ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.7.2/constraints-3.11.txt"
ENV AIRFLOW_USER_HOME=/usr/local/airflow
ENV AIRFLOW_HOME=${AIRFLOW_USER_HOME}
ENV AIRFLOW_VERSION=2.8.0
ENV MWAA_HOME=/usr/local/mwaa
ENV PYTHON_VERSION=3.11.7

Expand Down Expand Up @@ -96,8 +100,13 @@ EXPOSE 8080

ENV PATH=${PATH_AIRFLOW_USER}
ENV PYTHONPATH="/python"
ENV PYTHONUNBUFFERED=1

WORKDIR ${AIRFLOW_USER_HOME}

# Copy python files.
COPY ./python /python

# TODO Move this to the bin folder under airflow's home folder.
COPY healthcheck.sh /healthcheck.sh
RUN chmod +x /healthcheck.sh
2 changes: 1 addition & 1 deletion images/airflow/2.8.0/Dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-01-31 20:02:12.342796
# This file was generated on 2024-02-12 01:56:33.029839
#

FROM amazon-mwaa/airflow:2.8.0-base
Expand Down
2 changes: 1 addition & 1 deletion images/airflow/2.8.0/Dockerfiles/Dockerfile-dev
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-01-31 20:02:12.334771
# This file was generated on 2024-02-12 01:56:33.021778
#

FROM amazon-mwaa/airflow:2.8.0-base
Expand Down
2 changes: 1 addition & 1 deletion images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-01-31 20:02:12.345378
# This file was generated on 2024-02-12 01:56:33.032499
#

FROM amazon-mwaa/airflow:2.8.0-base
Expand Down
2 changes: 1 addition & 1 deletion images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-dev
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-01-31 20:02:12.337537
# This file was generated on 2024-02-12 01:56:33.024518
#

FROM amazon-mwaa/airflow:2.8.0-base
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-01-31 20:02:12.348042
# This file was generated on 2024-02-12 01:56:33.035092
#

FROM amazon-mwaa/airflow:2.8.0-base
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-01-31 20:02:12.340210
# This file was generated on 2024-02-12 01:56:33.027225
#

FROM amazon-mwaa/airflow:2.8.0-base
Expand Down
18 changes: 13 additions & 5 deletions images/airflow/2.8.0/Dockerfiles/Dockerfile.base
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,20 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-01-31 20:02:12.330369
# This file was generated on 2024-02-12 01:56:33.018473
#

FROM public.ecr.aws/amazonlinux/amazonlinux:2023

# Environment variables
ENV AIRFLOW_AMAZON_PROVIDERS_VERSION=8.13.0
ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.8.0/constraints-3.11.txt"

# Temporarily downgrading to 2.7.2 to make it easier to test using it internally.
ENV AIRFLOW_VERSION=2.7.2
ENV AIRFLOW_AMAZON_PROVIDERS_VERSION=8.7.1

ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.7.2/constraints-3.11.txt"
ENV AIRFLOW_USER_HOME=/usr/local/airflow
ENV AIRFLOW_HOME=${AIRFLOW_USER_HOME}
ENV AIRFLOW_VERSION=2.8.0
ENV MWAA_HOME=/usr/local/mwaa
ENV PYTHON_VERSION=3.11.7

Expand Down Expand Up @@ -108,8 +111,13 @@ EXPOSE 8080

ENV PATH=${PATH_AIRFLOW_USER}
ENV PYTHONPATH="/python"
ENV PYTHONUNBUFFERED=1

WORKDIR ${AIRFLOW_USER_HOME}

# Copy python files.
COPY ./python /python
COPY ./python /python

# TODO Move this to the bin folder under airflow's home folder.
COPY healthcheck.sh /healthcheck.sh
RUN chmod +x /healthcheck.sh
4 changes: 4 additions & 0 deletions images/airflow/2.8.0/healthcheck.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
# TODO Move this to the bin folder under airflow's home folder.
echo "Health check succeeded."
exit 0
68 changes: 64 additions & 4 deletions images/airflow/2.8.0/python/mwaa/config/database.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,68 @@
import os
import json
from operator import itemgetter
from typing import Tuple


def get_db_credentials() -> Tuple[str, str]:
"""
Retrieves database credentials from environment variables.
This function looks for database credentials in two possible locations within the
environment variables:
1. MWAA__DB__CREDENTIALS: expects a JSON string containing "username" and "password"
keys.
2. MWAA__DB__POSTGRES_USER and MWAA__DB__POSTGRES_PASSWORD: separate environment
variables for the username and password.
The function first checks for the presence of "MWAA__DB__CREDENTIALS". If found, it
parses the JSON string to extract the username and password. If not found, it then
looks for the "MWAA__DB__POSTGRES_USER" and "MWAA__DB__POSTGRES_PASSWORD"
environment variables.
If neither method finds the credentials, a RuntimeError is raised indicating the
absence of necessary environment variables for database connection.
Returns:
Tuple[str, str]: A tuple containing the PostgreSQL username and password.
Raises:
RuntimeError: If neither MWAA__DB__CREDENTIALS nor MWAA__DB__POSTGRES_USER and
MWAA__DB__POSTGRES_PASSWORD environment variables are set, indicating that the
database credentials are not provided.
Example:
To use this function, ensure that the required environment variables are set in
your environment before calling it. Then, you can retrieve the credentials as
follows:
>>> user, password = get_db_credentials()
>>> print(f"Username: {user}, Password: {password}")
"""

if "MWAA__DB__CREDENTIALS" in os.environ:
print("Reading database credentilas from MWAA__DB__CREDENTIALS.")
db_secrets = json.loads(os.environ["MWAA__DB__CREDENTIALS"])
postgres_user = db_secrets["username"]
postgres_password = db_secrets["password"]
elif (
"MWAA__DB__POSTGRES_USER" in os.environ
and "MWAA__DB__POSTGRES_PASSWORD" in os.environ
):
print(
"Reading database credentilas from MWAA__DB__POSTGRES_USER/ "
"MWAA__DB__POSTGRES_USER environment variables."
)
postgres_user = os.environ["MWAA__DB__POSTGRES_USER"]
postgres_password = os.environ["MWAA__DB__POSTGRES_PASSWORD"]
else:
raise RuntimeError(
"Couldn't find database credentials in environment variables. "
"Please pass them either in MWAA__DB__CREDENTIALS as a JSON with "
"'username' and 'password' fields, or in MWAA__DB__POSTGRES_USER "
"and MWAA__DB__POSTGRES_PASSWORD."
)
return postgres_user, postgres_password


def get_db_connection_string() -> str:
Expand All @@ -11,18 +74,15 @@ def get_db_connection_string() -> str:
env_vars_names = [
"MWAA__DB__POSTGRES_HOST",
"MWAA__DB__POSTGRES_PORT",
"MWAA__DB__POSTGRES_USER",
"MWAA__DB__POSTGRES_PASSWORD",
"MWAA__DB__POSTGRES_DB",
]
try:
(
postgres_host,
postgres_port,
postgres_user,
postgres_password,
postgres_db,
) = itemgetter(*env_vars_names)(os.environ)
(postgres_user, postgres_password) = get_db_credentials()
except Exception as e:
raise RuntimeError(
"One or more of the required environment variables for "
Expand Down

0 comments on commit f8c1a93

Please sign in to comment.