diff --git a/.github/actions/configure_dbt_environment/action.yaml b/.github/actions/configure_dbt_environment/action.yaml new file mode 100644 index 000000000..b9415701b --- /dev/null +++ b/.github/actions/configure_dbt_environment/action.yaml @@ -0,0 +1,29 @@ +name: Configure dbt environment +description: Set environment variables based on the active dbt project (CI or prod) +runs: + using: composite + steps: + - name: Configure dbt environment + run: | + if [[ $GITHUB_REF_NAME == 'master' ]]; then + echo "On master branch, setting dbt env to prod" + { + echo "TARGET=prod"; + echo "CACHE_KEY=master"; + } >> "$GITHUB_ENV" + elif [[ $GITHUB_REF_NAME == 'data-catalog' ]]; then + echo "On data catalog branch, setting dbt env to CI" + { + echo "TARGET=ci"; + echo "CACHE_KEY=data-catalog"; + echo "HEAD_REF=data-catalog"; + } >> "$GITHUB_ENV" + else + echo "On pull request branch, setting dbt env to CI" + { + echo "TARGET=ci"; + echo "CACHE_KEY=$GITHUB_HEAD_REF"; + echo "HEAD_REF=$GITHUB_HEAD_REF" + } >> "$GITHUB_ENV" + fi + shell: bash diff --git a/.github/actions/install_dbt_requirements/action.yaml b/.github/actions/install_dbt_requirements/action.yaml new file mode 100644 index 000000000..6086b0caa --- /dev/null +++ b/.github/actions/install_dbt_requirements/action.yaml @@ -0,0 +1,34 @@ +name: Install dbt dependencies +description: Installs Python and dbt requirements for a workflow +inputs: + dbt_project_dir: + description: Path to the directory containing the dbt project. + required: false + default: ./dbt + requirements_file_path: + description: Path to Python requirements file. + required: false + default: ./dbt/requirements.txt +runs: + using: composite + steps: + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: 3.x + cache: pip + + - name: Install python requirements + run: python -m pip install -r ${{ inputs.requirements_file_path }} + shell: bash + + - name: Cache dbt requirements + uses: actions/cache@v3 + with: + path: ${{ inputs.dbt_project_dir }}/dbt_packages + key: dbt-${{ hashFiles(format('{0}/packages.yml', inputs.dbt_project_dir)) }} + + - name: Install dbt requirements + run: dbt deps + working-directory: ${{ inputs.dbt_project_dir }} + shell: bash diff --git a/.github/actions/load_environment_variables/action.yaml b/.github/actions/load_environment_variables/action.yaml new file mode 100644 index 000000000..fe112f9a8 --- /dev/null +++ b/.github/actions/load_environment_variables/action.yaml @@ -0,0 +1,16 @@ +name: Load environment variables +description: Configures environment variables for a workflow +inputs: + env_var_file_path: + description: | + File path to variable file or directory. + Defaults to ./.github/variables/* if none specified + and runs against each file in that directory. + required: false + default: ./.github/variables/* +runs: + using: composite + steps: + # Use sed to strip comment lines + - run: sed "/#/d" ${{ inputs.env_var_file_path }} >> "$GITHUB_ENV" + shell: bash diff --git a/.github/scripts/cleanup_dbt_resources.sh b/.github/scripts/cleanup_dbt_resources.sh new file mode 100755 index 000000000..68e00c616 --- /dev/null +++ b/.github/scripts/cleanup_dbt_resources.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Clean up dbt resources created by a CI run or by local development. +# +# Takes one argument representing the target environment to clean up, +# one of `dev` or `ci`. E.g.: +# +# ./cleanup_dbt_resources.sh dev +# +# Assumes that jq is installed and available on the caller's path. +set -euo pipefail + +if [[ "$#" -eq 0 ]]; then + echo "Missing first argument representing dbt target" + exit 1 +fi + +if [ "$1" == "prod" ]; then + echo "Target cannot be 'prod'" + exit 1 +fi + +schemas_json=$(dbt --quiet list --resource-type model --target "$1" \ + --output json --output-keys schema) || (echo "Error in dbt call" && exit 1) +schemas=$(echo "$schemas_json"| sort | uniq | jq ' .schema') || (\ + echo "Error in schema parsing" && exit 1 +) + +echo "Deleting the following schemas from Athena:" +echo +echo "$schemas" + +echo "$schemas" | xargs -i bash -c 'aws glue delete-database --name {} || exit 255' + +echo +echo "Done!" diff --git a/.github/variables/dbt.env b/.github/variables/dbt.env new file mode 100644 index 000000000..f142826fa --- /dev/null +++ b/.github/variables/dbt.env @@ -0,0 +1,3 @@ +CACHE_NAME=dbt-cache +MANIFEST_DIR=dbt/target +PROJECT_DIR=dbt diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml new file mode 100644 index 000000000..68f529962 --- /dev/null +++ b/.github/workflows/build_and_test_dbt.yaml @@ -0,0 +1,83 @@ +name: build-and-test-dbt + +on: + pull_request: + branches: [master, data-catalog] + push: + branches: [master, data-catalog] + +jobs: + build-and-test-dbt: + runs-on: ubuntu-latest + # These permissions are needed to interact with GitHub's OIDC Token endpoint + # so that we can authenticate with AWS + permissions: + id-token: write + contents: read + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Install dbt requirements + uses: ./.github/actions/install_dbt_requirements + + - name: Load environment variables + uses: ./.github/actions/load_environment_variables + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} + aws-region: us-east-1 + + - name: Configure dbt environment + uses: ./.github/actions/configure_dbt_environment + + - name: Cache dbt manifest + id: cache + uses: actions/cache@v3 + with: + path: ${{ env.MANIFEST_DIR }} + key: ${{ env.CACHE_NAME }}-${{ env.CACHE_KEY }} + restore-keys: | + ${{ env.CACHE_NAME }}-data-catalog + ${{ env.CACHE_NAME }}-master + + - if: ${{ steps.cache.outputs.cache-hit == 'true' }} + name: Set command args to build/test modified resources + run: echo "MODIFIED_RESOURCES_ONLY=true" >> "$GITHUB_ENV" + shell: bash + + - if: ${{ steps.cache.outputs.cache-hit != 'true' }} + name: Set command args to build/test all resources + run: echo "MODIFIED_RESOURCES_ONLY=false" >> "$GITHUB_ENV" + shell: bash + + - name: Test dbt macros + run: dbt run-operation test_all + working-directory: ${{ env.PROJECT_DIR }} + shell: bash + + - name: Build models + run: | + if [[ $MODIFIED_RESOURCES_ONLY == 'true' ]]; then + echo "Running build on modified resources only" + dbt run --target "$TARGET" -s state:modified --defer --state target/ + else + echo "Running build on all resources" + dbt run --target "$TARGET" + fi + working-directory: ${{ env.PROJECT_DIR }} + shell: bash + + - name: Test models + run: | + if [[ $MODIFIED_RESOURCES_ONLY == 'true' ]]; then + echo "Running tests on modified resources only" + dbt test --target "$TARGET" -s state:modified --state target/ + else + echo "Running tests on all resources" + dbt test --target "$TARGET" + fi + working-directory: ${{ env.PROJECT_DIR }} + shell: bash diff --git a/.github/workflows/cleanup_dbt_resources.yaml b/.github/workflows/cleanup_dbt_resources.yaml new file mode 100644 index 000000000..2c785d11f --- /dev/null +++ b/.github/workflows/cleanup_dbt_resources.yaml @@ -0,0 +1,42 @@ +name: cleanup-dbt-resources + +on: + pull_request: + branches: [master, data-catalog] + types: [closed] + +jobs: + cleanup-dbt-resources: + runs-on: ubuntu-latest + # These permissions are needed to interact with GitHub's OIDC Token endpoint + # so that we can authenticate with AWS + permissions: + id-token: write + contents: read + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Install dbt requirements + uses: ./.github/actions/install_dbt_requirements + + - name: Install requirements for cleaning up dbt resources + run: sudo apt-get update && sudo apt-get install jq + shell: bash + + - name: Load environment variables + uses: ./.github/actions/load_environment_variables + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} + aws-region: us-east-1 + + - name: Configure dbt environment + uses: ./.github/actions/configure_dbt_environment + + - name: Clean up dbt resources + run: ../.github/scripts/cleanup_dbt_resources.sh ci + working-directory: ${{ env.PROJECT_DIR }} + shell: bash diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml new file mode 100644 index 000000000..1e2a93a19 --- /dev/null +++ b/.github/workflows/test_dbt_models.yaml @@ -0,0 +1,40 @@ +name: test-dbt-models + +on: workflow_dispatch + +jobs: + test-dbt-models: + runs-on: ubuntu-latest + # These permissions are needed to interact with GitHub's OIDC Token endpoint + # so that we can authenticate with AWS + permissions: + id-token: write + contents: read + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Install dbt requirements + uses: ./.github/actions/install_dbt_requirements + + - name: Load environment variables + uses: ./.github/actions/load_environment_variables + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} + aws-region: us-east-1 + + - name: Configure dbt environment + uses: ./.github/actions/configure_dbt_environment + + - name: Test models + # Target is currently set to CI because we expect this action to be + # run against the long-lived data-catalog branch, but we should change + # this to prod when we merge that branch into master + run: dbt test --target ci + working-directory: ${{ env.PROJECT_DIR }} + shell: bash + env: + GITHUB_HEAD_REF: data-catalog diff --git a/dbt/macros/generate_schema_name.sql b/dbt/macros/generate_schema_name.sql index e46df7ea1..53c261afd 100644 --- a/dbt/macros/generate_schema_name.sql +++ b/dbt/macros/generate_schema_name.sql @@ -29,7 +29,7 @@ {%- if target.name == "dev" -%} {%- set schema_prefix = "dev_" ~ env_var_func("USER") ~ "_" -%} {%- elif target.name == "ci" -%} - {%- set github_head_ref = kebab_slugify(env_var_func("GITHUB_HEAD_REF")) -%} + {%- set github_head_ref = kebab_slugify(env_var_func("HEAD_REF")) -%} {%- set schema_prefix = "ci_" ~ github_head_ref ~ "_" -%} {%- else -%} {%- set schema_prefix = "" -%} {%- endif -%} diff --git a/dbt/macros/tests/test_generate_schema_name.sql b/dbt/macros/tests/test_generate_schema_name.sql index 5116a26df..afc2de207 100644 --- a/dbt/macros/tests/test_generate_schema_name.sql +++ b/dbt/macros/tests/test_generate_schema_name.sql @@ -7,7 +7,7 @@ {% macro mock_env_var(var_name) %} {% if var_name == "USER" %} {{ return("testuser") }} - {% elif var_name == "GITHUB_HEAD_REF" %} {{ return("testuser/feature-branch-1") }} + {% elif var_name == "HEAD_REF" %} {{ return("testuser/feature-branch-1") }} {% else %} {{ return("") }} {% endif %} {% endmacro %} diff --git a/dbt/models/default/schema.yml b/dbt/models/default/schema.yml index f6feb7f06..7aa7d735f 100644 --- a/dbt/models/default/schema.yml +++ b/dbt/models/default/schema.yml @@ -31,7 +31,7 @@ models: - pin - year config: - error_if: ">280655" + error_if: ">280662" # Unique by case number and year - unique_combination_of_columns: name: vw_pin_appeal_unique_by_case_number_and_year @@ -39,7 +39,7 @@ models: - year - case_no config: - error_if: ">365779" + error_if: ">365894" # `change` should be an enum - dbt_utils.expression_is_true: name: vw_pin_appeal_no_unexpected_change_values @@ -85,7 +85,7 @@ models: case when char_renovation = '1' then true else false end ) config: - error_if: ">73925" + error_if: ">73941" # TODO: Characteristics columns should adhere to pre-determined criteria - name: vw_pin_address_test description: '{{ doc("vw_pin_address_test") }}' @@ -111,7 +111,7 @@ models: - mail_address_zipcode_1 - mail_address_zipcode_2 config: - error_if: ">879261" + error_if: ">880581" # TODO: Mailing address changes after validated sale(?) # TODO: Site addresses are all in Cook County - name: vw_pin_condo_char_test