From 06fda97c53fdc97344ecf024eaa499432d9943f6 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 18 Dec 2023 09:11:33 -0500 Subject: [PATCH 01/33] Update release.yml --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 458bac69f4..32c53feab3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -83,7 +83,7 @@ jobs: name: python-package-distributions path: dist/ - name: Sign dist with Sigstore - uses: sigstore/gh-action-sigstore-python@v2.1.0 + uses: sigstore/gh-action-sigstore-python@v2 with: inputs: ./dist/*.tar.gz ./dist/*.whl - name: Upload artifact signatures to GitHub Release From 97a6089a84b4021347732966a6d0cbaefaf02957 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Fri, 15 Dec 2023 12:13:01 -0600 Subject: [PATCH 02/33] Tag commits of nightly builds and last stable release. --- .github/workflows/build-deploy-pudl.yml | 18 +++++++++++++----- .github/workflows/release.yml | 5 +++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-deploy-pudl.yml b/.github/workflows/build-deploy-pudl.yml index aae75483e5..717e23c2f8 100644 --- a/.github/workflows/build-deploy-pudl.yml +++ b/.github/workflows/build-deploy-pudl.yml @@ -26,6 +26,14 @@ jobs: if: ${{ (github.event_name == 'schedule') }} run: | echo "This action was triggered by a schedule." && echo "GCE_INSTANCE=pudl-deployment-dev" >> $GITHUB_ENV && echo "GITHUB_REF=dev" >> $GITHUB_ENV + echo "RUN_DATE=$(date +%Y-%m-%d)" >> $GITHUB_ENV + + - name: Tag nightly build + if: ${{ (github.event_name == 'schedule') }} + uses: EndBug/latest-tag@1 + with: + ref: nightly-${{ env.RUN_DATE }} + description: "Nightly build for ${{ env.RUN_DATE }}" - name: Checkout Repository uses: actions/checkout@v4 @@ -45,7 +53,7 @@ jobs: - name: Docker Metadata id: docker_metadata - uses: docker/metadata-action@v5.3.0 + uses: docker/metadata-action@v5 with: images: catalystcoop/pudl-etl flavor: | @@ -55,17 +63,17 @@ jobs: type=ref,event=tag - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3.0.0 + uses: docker/setup-buildx-action@v3 - name: Login to DockerHub if: github.event_name != 'pull_request' - uses: docker/login-action@v3.0.0 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build image and push to Docker Hub - uses: docker/build-push-action@v5.1.0 + uses: docker/build-push-action@v5 with: context: . file: docker/Dockerfile @@ -137,7 +145,7 @@ jobs: - name: Post to a pudl-deployments channel id: slack - uses: slackapi/slack-github-action@v1.24.0 + uses: slackapi/slack-github-action@v1 with: channel-id: "C03FHB9N0PQ" slack-message: "build-deploy-pudl status: ${{ job.status }}\n${{ env.GCS_OUTPUT_BUCKET }}/${{ env.RUN_TIMESTAMP}}-${{ env.SHORT_SHA }}-${{ env.COMMIT_BRANCH }}" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 32c53feab3..3547ce85e5 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -95,6 +95,11 @@ jobs: --discussion-category Announcements --generate-notes --repo '${{ github.repository }}' + - name: Tag stable release + uses: EndBug/latest-tag@1 + with: + ref: stable + description: "The current stable release of catalystcoop.pudl" notify-slack: runs-on: ubuntu-latest From e83334e54c9438d15258069c2a979e76d9f357b7 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 18 Dec 2023 16:19:53 -0600 Subject: [PATCH 03/33] Test pushing a tag in a GitHub action --- .github/workflows/pytest.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 0fb01335f8..d7a0ba4d60 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -104,6 +104,12 @@ jobs: name: coverage-unit path: coverage.xml + - name: Test pushing a tag + run: | + echo "RUN_DATE=$(date +%Y-%m-%d)" >> $GITHUB_ENV + git tag -a -m "Testy McTesterTag" test-${{ env.RUN_DATE }} + git push origin test-${{ env.RUN_DATE }} + ci-integration: runs-on: group: large-runner-group From 7cc0f18e319cee11f7a4466ea633c29ad01c2ac6 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 18 Dec 2023 16:29:50 -0600 Subject: [PATCH 04/33] Add email and name to git user before pushing tag. --- .github/workflows/pytest.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index d7a0ba4d60..ae2be7a111 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -107,6 +107,8 @@ jobs: - name: Test pushing a tag run: | echo "RUN_DATE=$(date +%Y-%m-%d)" >> $GITHUB_ENV + git config user.email "pudl@catalyst.coop" + git config user.name "PudlBot" git tag -a -m "Testy McTesterTag" test-${{ env.RUN_DATE }} git push origin test-${{ env.RUN_DATE }} From ebb594b3167bba9fa89cede328f89eb85a0a5ad4 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 18 Dec 2023 16:50:26 -0600 Subject: [PATCH 05/33] Change envvar syntax to capture date in tag. --- .github/workflows/pytest.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index ae2be7a111..52268d51dc 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -106,11 +106,11 @@ jobs: - name: Test pushing a tag run: | - echo "RUN_DATE=$(date +%Y-%m-%d)" >> $GITHUB_ENV + echo "NIGHTLY_TAG=nightly-$(date +%Y-%m-%d)" >> $GITHUB_ENV git config user.email "pudl@catalyst.coop" git config user.name "PudlBot" - git tag -a -m "Testy McTesterTag" test-${{ env.RUN_DATE }} - git push origin test-${{ env.RUN_DATE }} + git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG + git push origin $NIGHTLY_TAG ci-integration: runs-on: From b0f218e5c784832487ac29f94696a35f4c461358 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 18 Dec 2023 16:51:33 -0600 Subject: [PATCH 06/33] Push tag first, go faster. --- .github/workflows/pytest.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 52268d51dc..b430228df2 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -93,6 +93,14 @@ jobs: which sqlite3 sqlite3 --version + - name: Test pushing a tag + run: | + echo "NIGHTLY_TAG=nightly-$(date +%Y-%m-%d)" >> $GITHUB_ENV + git config user.email "pudl@catalyst.coop" + git config user.name "PudlBot" + git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG + git push origin $NIGHTLY_TAG + - name: Run PUDL unit tests and collect test coverage run: | pip install --no-deps --editable . @@ -104,14 +112,6 @@ jobs: name: coverage-unit path: coverage.xml - - name: Test pushing a tag - run: | - echo "NIGHTLY_TAG=nightly-$(date +%Y-%m-%d)" >> $GITHUB_ENV - git config user.email "pudl@catalyst.coop" - git config user.name "PudlBot" - git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG - git push origin $NIGHTLY_TAG - ci-integration: runs-on: group: large-runner-group From 9c597e9933dae0c608219b52116e96c90627dd04 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 18 Dec 2023 16:56:16 -0600 Subject: [PATCH 07/33] Try to fix nightly tag workflow --- .github/workflows/pytest.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index b430228df2..9e3c767557 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -98,6 +98,7 @@ jobs: echo "NIGHTLY_TAG=nightly-$(date +%Y-%m-%d)" >> $GITHUB_ENV git config user.email "pudl@catalyst.coop" git config user.name "PudlBot" + echo "nightly tag is: $NIGHTLY_TAG" git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG git push origin $NIGHTLY_TAG From 1171321fcf326fcaaaf290dc211007dcef9a89a9 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 18 Dec 2023 17:09:13 -0600 Subject: [PATCH 08/33] Try to fix nightly tag workflow --- .github/workflows/pytest.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 9e3c767557..7220fb21fc 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -93,12 +93,17 @@ jobs: which sqlite3 sqlite3 --version - - name: Test pushing a tag + - name: Set the nightly tag run: | echo "NIGHTLY_TAG=nightly-$(date +%Y-%m-%d)" >> $GITHUB_ENV + + - name: Test pushing a tag + run: | + echo "1 NIGHTLY_TAG: $NIGHTLY_TAG" + echo "2 NIGHTLY_TAG: ${{ env.NIGHTLY_TAG}}" + echo "3 NIGHTLY_TAG: " ${{ env.NIGHTLY_TAG}} git config user.email "pudl@catalyst.coop" git config user.name "PudlBot" - echo "nightly tag is: $NIGHTLY_TAG" git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG git push origin $NIGHTLY_TAG From f42e951f928e57d7cba8ca7c557f1aee23641669 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 18 Dec 2023 17:20:06 -0600 Subject: [PATCH 09/33] Set up nightly tag in deploy workflow. --- .github/workflows/build-deploy-pudl.yml | 18 ++++++++++-------- .github/workflows/pytest.yml | 14 -------------- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/.github/workflows/build-deploy-pudl.yml b/.github/workflows/build-deploy-pudl.yml index 717e23c2f8..c2e977224a 100644 --- a/.github/workflows/build-deploy-pudl.yml +++ b/.github/workflows/build-deploy-pudl.yml @@ -26,20 +26,22 @@ jobs: if: ${{ (github.event_name == 'schedule') }} run: | echo "This action was triggered by a schedule." && echo "GCE_INSTANCE=pudl-deployment-dev" >> $GITHUB_ENV && echo "GITHUB_REF=dev" >> $GITHUB_ENV - echo "RUN_DATE=$(date +%Y-%m-%d)" >> $GITHUB_ENV - - - name: Tag nightly build - if: ${{ (github.event_name == 'schedule') }} - uses: EndBug/latest-tag@1 - with: - ref: nightly-${{ env.RUN_DATE }} - description: "Nightly build for ${{ env.RUN_DATE }}" + echo "NIGHTLY_TAG=nightly-$(date +%Y-%m-%d)" >> $GITHUB_ENV + echo "NIGHTLY_TAG: $NIGHTLY_TAG" - name: Checkout Repository uses: actions/checkout@v4 with: ref: ${{ env.GITHUB_REF }} + - name: Tag nightly build + if: ${{ (github.event_name == 'schedule') }} + run: | + git config user.email "pudl@catalyst.coop" + git config user.name "PudlBot" + git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG + git push origin $NIGHTLY_TAG + - name: Get HEAD of the branch (main or dev) run: | echo "ACTION_SHA=$(git rev-parse HEAD)" >> $GITHUB_ENV diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 7220fb21fc..0fb01335f8 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -93,20 +93,6 @@ jobs: which sqlite3 sqlite3 --version - - name: Set the nightly tag - run: | - echo "NIGHTLY_TAG=nightly-$(date +%Y-%m-%d)" >> $GITHUB_ENV - - - name: Test pushing a tag - run: | - echo "1 NIGHTLY_TAG: $NIGHTLY_TAG" - echo "2 NIGHTLY_TAG: ${{ env.NIGHTLY_TAG}}" - echo "3 NIGHTLY_TAG: " ${{ env.NIGHTLY_TAG}} - git config user.email "pudl@catalyst.coop" - git config user.name "PudlBot" - git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG - git push origin $NIGHTLY_TAG - - name: Run PUDL unit tests and collect test coverage run: | pip install --no-deps --editable . From 736476e379ff5050e545bc8c6730b8c9f82a85b4 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 18 Dec 2023 17:28:24 -0600 Subject: [PATCH 10/33] Use git commands to tag stable during release. --- .github/workflows/release.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3547ce85e5..f33bb21344 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -95,11 +95,12 @@ jobs: --discussion-category Announcements --generate-notes --repo '${{ github.repository }}' - - name: Tag stable release - uses: EndBug/latest-tag@1 - with: - ref: stable - description: "The current stable release of catalystcoop.pudl" + - name: Tag the current stable commit + run: | + git config user.email "pudl@catalyst.coop" + git config user.name "PudlBot" + git tag -a -m "stable" stable + git push origin stable notify-slack: runs-on: ubuntu-latest From afc25af214f2b9f60637802a0686b9d443f1fbf5 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 18 Dec 2023 18:19:21 -0600 Subject: [PATCH 11/33] Purge old files from distribution path before uploading new files. --- docker/gcp_pudl_etl.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/gcp_pudl_etl.sh b/docker/gcp_pudl_etl.sh index 3958fa83ab..e64dc75538 100644 --- a/docker/gcp_pudl_etl.sh +++ b/docker/gcp_pudl_etl.sh @@ -68,9 +68,13 @@ function copy_outputs_to_gcs() { } function copy_outputs_to_distribution_bucket() { + echo "Removing old outputs from GCP distributon bucket." + gsutil -m -u $GCP_BILLING_PROJECT rm -r "gs://pudl.catalyst.coop/$GITHUB_REF" echo "Copying outputs to GCP distribution bucket" gsutil -m -u $GCP_BILLING_PROJECT cp -r "$PUDL_OUTPUT/*" "gs://pudl.catalyst.coop/$GITHUB_REF" + echo "Removing old outputs from AWS distributon bucket." + aws s3 rm "s3://pudl.catalyst.coop/$GITHUB_REF" --recursive echo "Copying outputs to AWS distribution bucket" aws s3 cp "$PUDL_OUTPUT/" "s3://pudl.catalyst.coop/$GITHUB_REF" --recursive } From 273d1732d80d5f328a5ee02249214180ec3173c2 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 18 Dec 2023 18:48:53 -0600 Subject: [PATCH 12/33] Apply nightly-YYYY-MM-DD to GIHUB_REF not main --- .github/workflows/build-deploy-pudl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-deploy-pudl.yml b/.github/workflows/build-deploy-pudl.yml index c2e977224a..e7ec9991ac 100644 --- a/.github/workflows/build-deploy-pudl.yml +++ b/.github/workflows/build-deploy-pudl.yml @@ -39,7 +39,7 @@ jobs: run: | git config user.email "pudl@catalyst.coop" git config user.name "PudlBot" - git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG + git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG $GITHUB_REF git push origin $NIGHTLY_TAG - name: Get HEAD of the branch (main or dev) From fa0c04471bc3813f7c7be15582867ff8421948c8 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 18 Dec 2023 19:16:44 -0600 Subject: [PATCH 13/33] Try to tag nightly from within docker container --- .github/workflows/build-deploy-pudl.yml | 1 + docker/gcp_pudl_etl.sh | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/.github/workflows/build-deploy-pudl.yml b/.github/workflows/build-deploy-pudl.yml index e7ec9991ac..f7370c417c 100644 --- a/.github/workflows/build-deploy-pudl.yml +++ b/.github/workflows/build-deploy-pudl.yml @@ -137,6 +137,7 @@ jobs: --container-env DAGSTER_PG_HOST="104.154.182.24" \ --container-env DAGSTER_PG_DB="dagster-storage" \ --container-env FLY_ACCESS_TOKEN=${{ secrets.FLY_ACCESS_TOKEN }} \ + --container-env PUDL_BOT_PAT=${{ secrets.PUDL_BOT_PAT }} \ --container-env ZENODO_SANDBOX_TOKEN_PUBLISH=${{ secrets.ZENODO_SANDBOX_TOKEN_PUBLISH }} \ --container-env PUDL_SETTINGS_YML="/home/mambauser/src/pudl/package_data/settings/etl_full.yml" \ --container-env PUDL_GCS_OUTPUT=${{ env.PUDL_OUTPUT_PATH }} diff --git a/docker/gcp_pudl_etl.sh b/docker/gcp_pudl_etl.sh index e64dc75538..c5ef33207f 100644 --- a/docker/gcp_pudl_etl.sh +++ b/docker/gcp_pudl_etl.sh @@ -111,6 +111,12 @@ copy_outputs_to_gcs # if pipeline is successful, distribute + publish datasette if [[ $ETL_SUCCESS == 0 ]]; then + if [ $GITHUB_ACTION_TRIGGER = "schedule" ]; then + # Tag the nightly build + git config user.email "pudl@catalyst.coop" + git config user.name "PudlBot" + git tag -a -m "The most recent successful nightly build." nightly $GITHUB_REF + git push origin nightly # Deploy the updated data to datasette if [ $GITHUB_REF = "dev" ]; then python ~/devtools/datasette/publish.py 2>&1 | tee -a $LOGFILE From fe5e2245c747ef4b68dec88dbb9879fbb93959ab Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 18:40:15 -0600 Subject: [PATCH 14/33] Make nightly & stable branches. Safer removal of old deployments. Release notes config. --- .github/release.yml | 17 +++++++++++++++++ .github/workflows/release.yml | 7 ++++--- docker/gcp_pudl_etl.sh | 30 ++++++++++++++++-------------- 3 files changed, 37 insertions(+), 17 deletions(-) create mode 100644 .github/release.yml diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 0000000000..e999750de7 --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,17 @@ +--- +changelog: + exclude: + authors: + - dependabot + - pre-commit-ci + - pudlbot + labels: + - conda-lock + - dependencies + categories: + - title: New Data + labels: + - new-data + - title: Other Changes + labels: + - "*" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f33bb21344..2b212e6a00 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -98,9 +98,10 @@ jobs: - name: Tag the current stable commit run: | git config user.email "pudl@catalyst.coop" - git config user.name "PudlBot" - git tag -a -m "stable" stable - git push origin stable + git config user.name "pudlbot" + git checkout stable + git merge --ff-only ${{ github.ref_name }} + git push notify-slack: runs-on: ubuntu-latest diff --git a/docker/gcp_pudl_etl.sh b/docker/gcp_pudl_etl.sh index c5ef33207f..df4b587b64 100644 --- a/docker/gcp_pudl_etl.sh +++ b/docker/gcp_pudl_etl.sh @@ -68,15 +68,17 @@ function copy_outputs_to_gcs() { } function copy_outputs_to_distribution_bucket() { - echo "Removing old outputs from GCP distributon bucket." - gsutil -m -u $GCP_BILLING_PROJECT rm -r "gs://pudl.catalyst.coop/$GITHUB_REF" - echo "Copying outputs to GCP distribution bucket" - gsutil -m -u $GCP_BILLING_PROJECT cp -r "$PUDL_OUTPUT/*" "gs://pudl.catalyst.coop/$GITHUB_REF" - - echo "Removing old outputs from AWS distributon bucket." - aws s3 rm "s3://pudl.catalyst.coop/$GITHUB_REF" --recursive - echo "Copying outputs to AWS distribution bucket" - aws s3 cp "$PUDL_OUTPUT/" "s3://pudl.catalyst.coop/$GITHUB_REF" --recursive + # Only attempt to update outputs if we have a real value of GITHUB_REF + if [ -n "$GITHUB_REF" ]; then + echo "Removing old $GITHUB_REF outputs from GCP distributon bucket." + gsutil -m -u $GCP_BILLING_PROJECT rm -r "gs://pudl.catalyst.coop/$GITHUB_REF" + echo "Copying outputs to GCP distribution bucket" + gsutil -m -u $GCP_BILLING_PROJECT cp -r "$PUDL_OUTPUT/*" "gs://pudl.catalyst.coop/$GITHUB_REF" + + echo "Removing old $GITHUB_REF outputs from AWS distributon bucket." + aws s3 rm "s3://pudl.catalyst.coop/$GITHUB_REF" --recursive + echo "Copying outputs to AWS distribution bucket" + aws s3 cp "$PUDL_OUTPUT/" "s3://pudl.catalyst.coop/$GITHUB_REF" --recursive } function zenodo_data_release() { @@ -84,7 +86,6 @@ function zenodo_data_release() { ~/devtools/zenodo/zenodo_data_release.py --publish --env sandbox --source-dir $PUDL_OUTPUT } - function notify_slack() { # Notify pudl-builds slack channel of deployment status if [ $1 = "success" ]; then @@ -112,11 +113,12 @@ copy_outputs_to_gcs # if pipeline is successful, distribute + publish datasette if [[ $ETL_SUCCESS == 0 ]]; then if [ $GITHUB_ACTION_TRIGGER = "schedule" ]; then - # Tag the nightly build + # Update the nightly branch to point at newly successful nightly build tag git config user.email "pudl@catalyst.coop" - git config user.name "PudlBot" - git tag -a -m "The most recent successful nightly build." nightly $GITHUB_REF - git push origin nightly + git config user.name "pudlbot" + git checkout nightly + git merge --ff-only $NIGHTLY_TAG + git push # Deploy the updated data to datasette if [ $GITHUB_REF = "dev" ]; then python ~/devtools/datasette/publish.py 2>&1 | tee -a $LOGFILE From b68bb63d7f35e08138e0d95dd2d2561a349bd35f Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 18:46:12 -0600 Subject: [PATCH 15/33] Copy repo contents into container before building conda env. --- docker/Dockerfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 26b291f4f2..4ca3ac67fe 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -35,12 +35,11 @@ RUN mkdir -p ${PUDL_INPUT} ${PUDL_OUTPUT} ${DAGSTER_HOME} # Copy dagster configuration file COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml +# Copy the cloned pudl repository into the user's home directory +COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${CONTAINER_HOME} # Create a conda environment based on the specification in the repo -COPY environments/conda-lock.yml environments/conda-lock.yml RUN micromamba create --prefix ${CONDA_PREFIX} --yes --file environments/conda-lock.yml && \ micromamba clean -afy -# Copy the cloned pudl repository into the user's home directory -COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${CONTAINER_HOME} # TODO(rousik): The following is a workaround for sudden breakage where conda # can't find libraries contained within the environment. It's unclear why! From eef0c6234181768790df54008d2a082f24c3562f Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 19:18:25 -0600 Subject: [PATCH 16/33] Attempt to clone repo inside Docker container --- .github/workflows/build-deploy-pudl.yml | 1 + .github/workflows/docker-build-test.yml | 8 +++++--- docker/Dockerfile | 11 +++++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build-deploy-pudl.yml b/.github/workflows/build-deploy-pudl.yml index f7370c417c..db279f040d 100644 --- a/.github/workflows/build-deploy-pudl.yml +++ b/.github/workflows/build-deploy-pudl.yml @@ -124,6 +124,7 @@ jobs: --container-env-file="./docker/.env" \ --container-env ACTION_SHA=$ACTION_SHA \ --container-env GITHUB_REF=${{ env.GITHUB_REF }} \ + --container-env NIGHTLY_TAG=${{ env.NIGHTLY_TAG }} \ --container-env GITHUB_ACTION_TRIGGER=${{ github.event_name }} \ --container-env SLACK_TOKEN=${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} \ --container-env GCE_INSTANCE=${{ env.GCE_INSTANCE }} \ diff --git a/.github/workflows/docker-build-test.yml b/.github/workflows/docker-build-test.yml index 6cc14b8d58..e09c54670d 100644 --- a/.github/workflows/docker-build-test.yml +++ b/.github/workflows/docker-build-test.yml @@ -17,20 +17,22 @@ jobs: - name: Docker Metadata id: docker_metadata - uses: docker/metadata-action@v5.3.0 + uses: docker/metadata-action@v5 with: images: catalystcoop/pudl-etl flavor: | latest=auto - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3.0.0 + uses: docker/setup-buildx-action@v3 - name: Build image but do not push to Docker Hub - uses: docker/build-push-action@v5.1.0 + uses: docker/build-push-action@v5 with: context: . file: docker/Dockerfile push: false cache-from: type=gha cache-to: type=gha,mode=max + secrets: | + "GITHUB_REF=${{ github.ref_name }}" diff --git a/docker/Dockerfile b/docker/Dockerfile index 4ca3ac67fe..6a04523129 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -36,7 +36,9 @@ RUN mkdir -p ${PUDL_INPUT} ${PUDL_OUTPUT} ${DAGSTER_HOME} COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml # Copy the cloned pudl repository into the user's home directory -COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${CONTAINER_HOME} +# COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${CONTAINER_HOME} +RUN "git clone --depth 2 --single-branch ${GITHUB_REF} https://github.com/catalyst-cooperative/pudl.git" +WORKDIR ${PUDL_REPO} # Create a conda environment based on the specification in the repo RUN micromamba create --prefix ${CONDA_PREFIX} --yes --file environments/conda-lock.yml && \ micromamba clean -afy @@ -46,11 +48,12 @@ RUN micromamba create --prefix ${CONDA_PREFIX} --yes --file environments/conda-l ENV LD_LIBRARY_PATH=${CONDA_PREFIX}/lib # We need information from .git to get version with setuptools_scm so we mount that # directory without copying it into the image. -RUN --mount=type=bind,source=.git,target=${PUDL_REPO}/.git \ - ${CONDA_RUN} pip install --no-cache-dir --no-deps --editable . +#RUN --mount=type=bind,source=.git,target=${PUDL_REPO}/.git \ +RUN ${CONDA_RUN} pip install --no-cache-dir --no-deps --editable . # Install awscli2 # Change back to root because the install script needs access to /usr/local/aws-cli +WORKDIR ${CONTAINER_HOME} USER root RUN ${CONDA_RUN} bash -c 'curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && unzip awscliv2.zip && ./aws/install' USER $MAMBA_USER @@ -60,6 +63,6 @@ USER $MAMBA_USER RUN ${CONDA_RUN} bash -c 'curl -L https://fly.io/install.sh | sh' ENV PATH="${CONTAINER_HOME}/.fly/bin:$PATH" - +WORKDIR ${PUDL_REPO} # Run the unit tests: CMD ["micromamba", "run", "--prefix", "${CONDA_PREFIX}", "--attach", "''", "pytest", "test/unit"] From c0f1be7ea9e129c89be66487501a5feb1f2df406 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 19:23:30 -0600 Subject: [PATCH 17/33] Attempt to clone repo inside Docker container --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 6a04523129..fef297b49a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -37,7 +37,7 @@ COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml # Copy the cloned pudl repository into the user's home directory # COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${CONTAINER_HOME} -RUN "git clone --depth 2 --single-branch ${GITHUB_REF} https://github.com/catalyst-cooperative/pudl.git" +RUN git clone --depth 2 --single-branch "$GITHUB_REF" https://github.com/catalyst-cooperative/pudl.git WORKDIR ${PUDL_REPO} # Create a conda environment based on the specification in the repo RUN micromamba create --prefix ${CONDA_PREFIX} --yes --file environments/conda-lock.yml && \ From bf02f01c858d867b725850efb25777c5f201317e Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 19:28:59 -0600 Subject: [PATCH 18/33] Attempt to clone repo inside Docker container --- docker/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index fef297b49a..e896883e04 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -37,7 +37,8 @@ COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml # Copy the cloned pudl repository into the user's home directory # COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${CONTAINER_HOME} -RUN git clone --depth 2 --single-branch "$GITHUB_REF" https://github.com/catalyst-cooperative/pudl.git +RUN git clone --depth 2 https://github.com/catalyst-cooperative/pudl.git && \ + git checkout "$GITHUB_REF" WORKDIR ${PUDL_REPO} # Create a conda environment based on the specification in the repo RUN micromamba create --prefix ${CONDA_PREFIX} --yes --file environments/conda-lock.yml && \ From f2255326cb2c7dfeb79812cc4220bc27b079f02f Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 19:31:10 -0600 Subject: [PATCH 19/33] Attempt to clone repo inside Docker container --- docker/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index e896883e04..d9ade02b23 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -38,8 +38,9 @@ COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml # Copy the cloned pudl repository into the user's home directory # COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${CONTAINER_HOME} RUN git clone --depth 2 https://github.com/catalyst-cooperative/pudl.git && \ - git checkout "$GITHUB_REF" WORKDIR ${PUDL_REPO} +RUN git checkout "$GITHUB_REF" + # Create a conda environment based on the specification in the repo RUN micromamba create --prefix ${CONDA_PREFIX} --yes --file environments/conda-lock.yml && \ micromamba clean -afy From 8892f39d113d8e557a139641a250ce7f56efe354 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 19:32:33 -0600 Subject: [PATCH 20/33] Attempt to clone repo inside Docker container --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index d9ade02b23..dcd785e6d1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -37,7 +37,7 @@ COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml # Copy the cloned pudl repository into the user's home directory # COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${CONTAINER_HOME} -RUN git clone --depth 2 https://github.com/catalyst-cooperative/pudl.git && \ +RUN git clone --depth 2 https://github.com/catalyst-cooperative/pudl.git WORKDIR ${PUDL_REPO} RUN git checkout "$GITHUB_REF" From 85e944938b8152b421c18db3bca1e93ebb029546 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 19:35:24 -0600 Subject: [PATCH 21/33] Attempt to clone repo inside Docker container --- docker/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/Dockerfile b/docker/Dockerfile index dcd785e6d1..6f47ca836f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -39,6 +39,8 @@ COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml # COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${CONTAINER_HOME} RUN git clone --depth 2 https://github.com/catalyst-cooperative/pudl.git WORKDIR ${PUDL_REPO} +RUN env +ENV GITHUB_REF=${GITHUB_REF} RUN git checkout "$GITHUB_REF" # Create a conda environment based on the specification in the repo From 16445d462376e38729446b7cf58b0d52a635d688 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 20:46:16 -0600 Subject: [PATCH 22/33] Revert to copying repo into container --- .github/workflows/docker-build-test.yml | 2 -- docker/Dockerfile | 11 ++--------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/.github/workflows/docker-build-test.yml b/.github/workflows/docker-build-test.yml index e09c54670d..fb87ee22e7 100644 --- a/.github/workflows/docker-build-test.yml +++ b/.github/workflows/docker-build-test.yml @@ -34,5 +34,3 @@ jobs: push: false cache-from: type=gha cache-to: type=gha,mode=max - secrets: | - "GITHUB_REF=${{ github.ref_name }}" diff --git a/docker/Dockerfile b/docker/Dockerfile index 6f47ca836f..bde0236939 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM mambaorg/micromamba:1.5.3 +FROM mambaorg/micromamba:1.5.5 USER root @@ -36,12 +36,7 @@ RUN mkdir -p ${PUDL_INPUT} ${PUDL_OUTPUT} ${DAGSTER_HOME} COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml # Copy the cloned pudl repository into the user's home directory -# COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${CONTAINER_HOME} -RUN git clone --depth 2 https://github.com/catalyst-cooperative/pudl.git -WORKDIR ${PUDL_REPO} -RUN env -ENV GITHUB_REF=${GITHUB_REF} -RUN git checkout "$GITHUB_REF" +COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${CONTAINER_HOME} # Create a conda environment based on the specification in the repo RUN micromamba create --prefix ${CONDA_PREFIX} --yes --file environments/conda-lock.yml && \ @@ -57,7 +52,6 @@ RUN ${CONDA_RUN} pip install --no-cache-dir --no-deps --editable . # Install awscli2 # Change back to root because the install script needs access to /usr/local/aws-cli -WORKDIR ${CONTAINER_HOME} USER root RUN ${CONDA_RUN} bash -c 'curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && unzip awscliv2.zip && ./aws/install' USER $MAMBA_USER @@ -67,6 +61,5 @@ USER $MAMBA_USER RUN ${CONDA_RUN} bash -c 'curl -L https://fly.io/install.sh | sh' ENV PATH="${CONTAINER_HOME}/.fly/bin:$PATH" -WORKDIR ${PUDL_REPO} # Run the unit tests: CMD ["micromamba", "run", "--prefix", "${CONDA_PREFIX}", "--attach", "''", "pytest", "test/unit"] From ddfb861f92e4ca42302f9f6c2c5c9942a6d59047 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 20:56:14 -0600 Subject: [PATCH 23/33] Move pudl repo into the pudl/ directory --- docker/Dockerfile | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index bde0236939..f73c0ef4eb 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -36,19 +36,17 @@ RUN mkdir -p ${PUDL_INPUT} ${PUDL_OUTPUT} ${DAGSTER_HOME} COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml # Copy the cloned pudl repository into the user's home directory -COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${CONTAINER_HOME} +COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${PUDL_REPO} # Create a conda environment based on the specification in the repo -RUN micromamba create --prefix ${CONDA_PREFIX} --yes --file environments/conda-lock.yml && \ +RUN ls -a && \ + micromamba create --prefix ${CONDA_PREFIX} --yes --file ${PUDL_REPO}/environments/conda-lock.yml && \ micromamba clean -afy # TODO(rousik): The following is a workaround for sudden breakage where conda # can't find libraries contained within the environment. It's unclear why! ENV LD_LIBRARY_PATH=${CONDA_PREFIX}/lib -# We need information from .git to get version with setuptools_scm so we mount that -# directory without copying it into the image. -#RUN --mount=type=bind,source=.git,target=${PUDL_REPO}/.git \ -RUN ${CONDA_RUN} pip install --no-cache-dir --no-deps --editable . +RUN ${CONDA_RUN} pip install --no-cache-dir --no-deps --editable ${PUDL_REPO} # Install awscli2 # Change back to root because the install script needs access to /usr/local/aws-cli @@ -61,5 +59,6 @@ USER $MAMBA_USER RUN ${CONDA_RUN} bash -c 'curl -L https://fly.io/install.sh | sh' ENV PATH="${CONTAINER_HOME}/.fly/bin:$PATH" +WORKDIR ${PUDL_REPO} # Run the unit tests: CMD ["micromamba", "run", "--prefix", "${CONDA_PREFIX}", "--attach", "''", "pytest", "test/unit"] From 9f98de698d8c51a8a60ae75001ab0222c3287cf1 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 21:04:57 -0600 Subject: [PATCH 24/33] Stop using conda run when not required. --- docker/Dockerfile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index f73c0ef4eb..565225c021 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -39,7 +39,7 @@ COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${PUDL_REPO} # Create a conda environment based on the specification in the repo -RUN ls -a && \ +RUN ls -a pudl pudl_work && \ micromamba create --prefix ${CONDA_PREFIX} --yes --file ${PUDL_REPO}/environments/conda-lock.yml && \ micromamba clean -afy @@ -51,14 +51,17 @@ RUN ${CONDA_RUN} pip install --no-cache-dir --no-deps --editable ${PUDL_REPO} # Install awscli2 # Change back to root because the install script needs access to /usr/local/aws-cli USER root -RUN ${CONDA_RUN} bash -c 'curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && unzip awscliv2.zip && ./aws/install' +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ + unzip awscliv2.zip && \ + ./aws/install USER $MAMBA_USER # Install flyctl # hadolint ignore=DL3059 -RUN ${CONDA_RUN} bash -c 'curl -L https://fly.io/install.sh | sh' +RUN curl -L https://fly.io/install.sh | sh ENV PATH="${CONTAINER_HOME}/.fly/bin:$PATH" WORKDIR ${PUDL_REPO} +RUN git config -l # Run the unit tests: CMD ["micromamba", "run", "--prefix", "${CONDA_PREFIX}", "--attach", "''", "pytest", "test/unit"] From b3b3f524afa3e7970f390265a1e2c362fc6ee185 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 21:13:25 -0600 Subject: [PATCH 25/33] Try and do a git thing --- docker/Dockerfile | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 565225c021..5dbdf17ce4 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -30,7 +30,7 @@ ENV PUDL_INPUT=${CONTAINER_PUDL_WORKSPACE}/input ENV PUDL_OUTPUT=${CONTAINER_PUDL_WORKSPACE}/output ENV DAGSTER_HOME=${CONTAINER_PUDL_WORKSPACE}/dagster_home -RUN mkdir -p ${PUDL_INPUT} ${PUDL_OUTPUT} ${DAGSTER_HOME} +RUN mkdir -p ${PUDL_INPUT} ${PUDL_OUTPUT} ${DAGSTER_HOME} ${PUDL_REPO} # Copy dagster configuration file COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml @@ -39,8 +39,7 @@ COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${PUDL_REPO} # Create a conda environment based on the specification in the repo -RUN ls -a pudl pudl_work && \ - micromamba create --prefix ${CONDA_PREFIX} --yes --file ${PUDL_REPO}/environments/conda-lock.yml && \ +RUN micromamba create --prefix ${CONDA_PREFIX} --yes --file ${PUDL_REPO}/environments/conda-lock.yml && \ micromamba clean -afy # TODO(rousik): The following is a workaround for sudden breakage where conda @@ -50,15 +49,14 @@ RUN ${CONDA_RUN} pip install --no-cache-dir --no-deps --editable ${PUDL_REPO} # Install awscli2 # Change back to root because the install script needs access to /usr/local/aws-cli +# curl commands run within conda environment because curl is installed by conda. USER root -RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ - unzip awscliv2.zip && \ - ./aws/install +RUN ${CONDA_RUN} bash -c 'curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && unzip awscliv2.zip && ./aws/install' USER $MAMBA_USER # Install flyctl # hadolint ignore=DL3059 -RUN curl -L https://fly.io/install.sh | sh +RUN ${CONDA_RUN} bash -c 'curl -L https://fly.io/install.sh | sh' ENV PATH="${CONTAINER_HOME}/.fly/bin:$PATH" WORKDIR ${PUDL_REPO} From f989635e7da2c290fd8e29ffd0cfe6f597bb86dd Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 21:59:36 -0600 Subject: [PATCH 26/33] Test git authentication with workflow_dispatch --- docker/Dockerfile | 4 ++-- docker/gcp_pudl_etl.sh | 15 ++++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 5dbdf17ce4..2682b18a58 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -35,7 +35,8 @@ RUN mkdir -p ${PUDL_INPUT} ${PUDL_OUTPUT} ${DAGSTER_HOME} ${PUDL_REPO} # Copy dagster configuration file COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml -# Copy the cloned pudl repository into the user's home directory +# Copy the cloned pudl repository into the container +# This includes the .git directory, so it is a whole repo COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${PUDL_REPO} # Create a conda environment based on the specification in the repo @@ -60,6 +61,5 @@ RUN ${CONDA_RUN} bash -c 'curl -L https://fly.io/install.sh | sh' ENV PATH="${CONTAINER_HOME}/.fly/bin:$PATH" WORKDIR ${PUDL_REPO} -RUN git config -l # Run the unit tests: CMD ["micromamba", "run", "--prefix", "${CONDA_PREFIX}", "--attach", "''", "pytest", "test/unit"] diff --git a/docker/gcp_pudl_etl.sh b/docker/gcp_pudl_etl.sh index df4b587b64..6d5aabcd5e 100644 --- a/docker/gcp_pudl_etl.sh +++ b/docker/gcp_pudl_etl.sh @@ -48,16 +48,12 @@ function run_pudl_etl() { } function shutdown_vm() { - # Copy the outputs to the GCS bucket - upload_file_to_slack $LOGFILE "pudl_etl logs for $ACTION_SHA-$GITHUB_REF:" - + # Shut down the vm instance when the etl is done. echo "Shutting down VM." - # # Shut down the vm instance when the etl is done. ACCESS_TOKEN=`curl \ "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" \ -H "Metadata-Flavor: Google" | jq -r '.access_token'` - curl -X POST -H "Content-Length: 0" -H "Authorization: Bearer ${ACCESS_TOKEN}" https://compute.googleapis.com/compute/v1/projects/catalyst-cooperative-pudl/zones/$GCE_INSTANCE_ZONE/instances/$GCE_INSTANCE/stop } @@ -102,6 +98,14 @@ function notify_slack() { send_slack_msg "$message" } +if [ $GITHUB_ACTION_TRIGGER = "workflow_dispatch" ]; then + git config user.email "pudl@catalyst.coop" + git config user.name "pudlbot" + git remote set-url origin https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git + git tag -a -m "Nightly test tag" nightly-tag-test + git push origin nightly-tag-test + shutdown_vm + # # Run ETL. Copy outputs to GCS and shutdown VM if ETL succeeds or fails # 2>&1 redirects stderr to stdout. run_pudl_etl 2>&1 | tee $LOGFILE @@ -116,6 +120,7 @@ if [[ $ETL_SUCCESS == 0 ]]; then # Update the nightly branch to point at newly successful nightly build tag git config user.email "pudl@catalyst.coop" git config user.name "pudlbot" + git remote set-url origin https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git git checkout nightly git merge --ff-only $NIGHTLY_TAG git push From 5f1228ba489c377b69c88f86da061c028c3f165b Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 23:02:37 -0600 Subject: [PATCH 27/33] Add some logging to git authentication check --- docker/gcp_pudl_etl.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/docker/gcp_pudl_etl.sh b/docker/gcp_pudl_etl.sh index 6d5aabcd5e..dcfe25fa22 100644 --- a/docker/gcp_pudl_etl.sh +++ b/docker/gcp_pudl_etl.sh @@ -51,9 +51,9 @@ function shutdown_vm() { upload_file_to_slack $LOGFILE "pudl_etl logs for $ACTION_SHA-$GITHUB_REF:" # Shut down the vm instance when the etl is done. echo "Shutting down VM." - ACCESS_TOKEN=`curl \ + ACCESS_TOKEN=$(curl \ "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" \ - -H "Metadata-Flavor: Google" | jq -r '.access_token'` + -H "Metadata-Flavor: Google" | jq -r '.access_token') curl -X POST -H "Content-Length: 0" -H "Authorization: Bearer ${ACCESS_TOKEN}" https://compute.googleapis.com/compute/v1/projects/catalyst-cooperative-pudl/zones/$GCE_INSTANCE_ZONE/instances/$GCE_INSTANCE/stop } @@ -75,6 +75,7 @@ function copy_outputs_to_distribution_bucket() { aws s3 rm "s3://pudl.catalyst.coop/$GITHUB_REF" --recursive echo "Copying outputs to AWS distribution bucket" aws s3 cp "$PUDL_OUTPUT/" "s3://pudl.catalyst.coop/$GITHUB_REF" --recursive + fi } function zenodo_data_release() { @@ -98,13 +99,16 @@ function notify_slack() { send_slack_msg "$message" } -if [ $GITHUB_ACTION_TRIGGER = "workflow_dispatch" ]; then +if [ "$GITHUB_ACTION_TRIGGER" = "workflow_dispatch" ]; then + echo "Deployed via workflow_dispatch, testing git authentication!" git config user.email "pudl@catalyst.coop" git config user.name "pudlbot" - git remote set-url origin https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git + git remote set-url origin "https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git" + git config -l git tag -a -m "Nightly test tag" nightly-tag-test git push origin nightly-tag-test shutdown_vm +fi # # Run ETL. Copy outputs to GCS and shutdown VM if ETL succeeds or fails # 2>&1 redirects stderr to stdout. @@ -124,6 +128,7 @@ if [[ $ETL_SUCCESS == 0 ]]; then git checkout nightly git merge --ff-only $NIGHTLY_TAG git push + fi # Deploy the updated data to datasette if [ $GITHUB_REF = "dev" ]; then python ~/devtools/datasette/publish.py 2>&1 | tee -a $LOGFILE From b45794263e00adad34c7748d79ed1fcc2be031d4 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 19 Dec 2023 23:44:41 -0600 Subject: [PATCH 28/33] Update settings file path to point at repo in container --- .github/workflows/build-deploy-pudl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-deploy-pudl.yml b/.github/workflows/build-deploy-pudl.yml index db279f040d..94b21f7904 100644 --- a/.github/workflows/build-deploy-pudl.yml +++ b/.github/workflows/build-deploy-pudl.yml @@ -140,7 +140,7 @@ jobs: --container-env FLY_ACCESS_TOKEN=${{ secrets.FLY_ACCESS_TOKEN }} \ --container-env PUDL_BOT_PAT=${{ secrets.PUDL_BOT_PAT }} \ --container-env ZENODO_SANDBOX_TOKEN_PUBLISH=${{ secrets.ZENODO_SANDBOX_TOKEN_PUBLISH }} \ - --container-env PUDL_SETTINGS_YML="/home/mambauser/src/pudl/package_data/settings/etl_full.yml" \ + --container-env PUDL_SETTINGS_YML="/home/mambauser/pudl/src/pudl/package_data/settings/etl_full.yml" \ --container-env PUDL_GCS_OUTPUT=${{ env.PUDL_OUTPUT_PATH }} # Start the VM From 03f6e19d4f33d2c45f45dcb496a9b216fb76fa25 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Wed, 20 Dec 2023 12:55:48 -0600 Subject: [PATCH 29/33] unset read-only git config --- docker/gcp_pudl_etl.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/gcp_pudl_etl.sh b/docker/gcp_pudl_etl.sh index dcfe25fa22..e1b3b6e5c3 100644 --- a/docker/gcp_pudl_etl.sh +++ b/docker/gcp_pudl_etl.sh @@ -101,6 +101,8 @@ function notify_slack() { if [ "$GITHUB_ACTION_TRIGGER" = "workflow_dispatch" ]; then echo "Deployed via workflow_dispatch, testing git authentication!" + # Remove the read-only authentication header + git config --unset http.https://github.com/.extraheader git config user.email "pudl@catalyst.coop" git config user.name "pudlbot" git remote set-url origin "https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git" From f409d8df4c2bd8fec1dc069787cc5d70e324fd38 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Wed, 20 Dec 2023 13:39:14 -0600 Subject: [PATCH 30/33] Consolidate envvars and set explicit BUILD_ID --- .github/workflows/build-deploy-pudl.yml | 42 +++++++++++-------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/.github/workflows/build-deploy-pudl.yml b/.github/workflows/build-deploy-pudl.yml index 94b21f7904..a4a242874f 100644 --- a/.github/workflows/build-deploy-pudl.yml +++ b/.github/workflows/build-deploy-pudl.yml @@ -25,34 +25,36 @@ jobs: - name: Use pudl-deployment-dev vm and dev branch if running on a schedule if: ${{ (github.event_name == 'schedule') }} run: | - echo "This action was triggered by a schedule." && echo "GCE_INSTANCE=pudl-deployment-dev" >> $GITHUB_ENV && echo "GITHUB_REF=dev" >> $GITHUB_ENV - echo "NIGHTLY_TAG=nightly-$(date +%Y-%m-%d)" >> $GITHUB_ENV - echo "NIGHTLY_TAG: $NIGHTLY_TAG" + echo "This action was triggered by a schedule." + echo "GCE_INSTANCE=pudl-deployment-dev" >> $GITHUB_ENV + echo "GCE_INSTANCE: $GCE_INSTANCE" + echo "GITHUB_REF=dev" >> $GITHUB_ENV + echo "GITHUB_REF: $GITHUB_REF" - name: Checkout Repository uses: actions/checkout@v4 with: ref: ${{ env.GITHUB_REF }} + - name: Set action environment variables + run: | + echo "NIGHTLY_TAG=nightly-$(date +%Y-%m-%d)" >> $GITHUB_ENV + echo "NIGHTLY_TAG: $NIGHTLY_TAG" + echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_ENV + echo "SHORT_SHA: $SHORT_SHA" + echo "BUILD_TIMESTAMP=$(date +%Y-%m-%d-%H%M)" >> $GITHUB_ENV + echo "BUILD_TIMESTAMP: $BUILD_TIMESTAMP" + echo "BUILD_ID=${BUILD_TIMESTAMP}-${SHORT_SHA}-${GITHUB_REF} + echo "BUILD_ID: $BUILD_ID" + - name: Tag nightly build if: ${{ (github.event_name == 'schedule') }} run: | git config user.email "pudl@catalyst.coop" - git config user.name "PudlBot" + git config user.name "pudlbot" git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG $GITHUB_REF git push origin $NIGHTLY_TAG - - name: Get HEAD of the branch (main or dev) - run: | - echo "ACTION_SHA=$(git rev-parse HEAD)" >> $GITHUB_ENV - echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_ENV - - - name: Print action vars - run: | - echo "ACTION_SHA: $ACTION_SHA" && \ - echo "GITHUB_REF: $GITHUB_REF" && \ - echo "GCE_INSTANCE: $GCE_INSTANCE" - - name: Docker Metadata id: docker_metadata uses: docker/metadata-action@v5 @@ -95,17 +97,11 @@ jobs: - name: Set up Cloud SDK uses: google-github-actions/setup-gcloud@v2 - - name: Determine commit information - run: |- - echo "COMMIT_BRANCH=$(echo ${GITHUB_REF#refs/heads/} | tr / -)" >> $GITHUB_ENV - echo "COMMIT_TIME=$(git log -1 --format=%cd --date=format:%Y-%m-%d-%H%M)" >> $GITHUB_ENV - echo "RUN_TIMESTAMP=$(date +%Y-%m-%d-%H%M)" >> $GITHUB_ENV - # Deploy PUDL image to GCE - name: Deploy env: DAGSTER_PG_PASSWORD: ${{ secrets.DAGSTER_PG_PASSWORD }} - PUDL_OUTPUT_PATH: ${{ env.GCS_OUTPUT_BUCKET }}/${{ env.RUN_TIMESTAMP }}-${{ env.SHORT_SHA }}-${{ env.COMMIT_BRANCH }} + PUDL_OUTPUT_PATH: ${{ env.GCS_OUTPUT_BUCKET }}/${{ env.BUILD_ID }} run: |- gcloud compute instances add-metadata "$GCE_INSTANCE" \ --zone "$GCE_INSTANCE_ZONE" \ @@ -152,7 +148,7 @@ jobs: uses: slackapi/slack-github-action@v1 with: channel-id: "C03FHB9N0PQ" - slack-message: "build-deploy-pudl status: ${{ job.status }}\n${{ env.GCS_OUTPUT_BUCKET }}/${{ env.RUN_TIMESTAMP}}-${{ env.SHORT_SHA }}-${{ env.COMMIT_BRANCH }}" + slack-message: "build-deploy-pudl status: ${{ job.status }}\n${{ env.GCS_OUTPUT_BUCKET }}/${{ env.BUILD_ID }}" env: channel-id: "C03FHB9N0PQ" SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} From ac31f1ec0ae1dbc5976581be6ea3cc5162df90ec Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Wed, 20 Dec 2023 13:43:55 -0600 Subject: [PATCH 31/33] Clean up GCP PUDL ETL script - Remove ad-hoc test of nightly build tagging - Use "$DOUBPLE_QUOTES" around envvars to prevent globbing/split words - Use $BUILD_ID consistently when referring to nightly build outputs. - Update name of epacems directory that we remove before deployment. --- docker/gcp_pudl_etl.sh | 79 ++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 45 deletions(-) diff --git a/docker/gcp_pudl_etl.sh b/docker/gcp_pudl_etl.sh index e1b3b6e5c3..1f0d0c77ac 100644 --- a/docker/gcp_pudl_etl.sh +++ b/docker/gcp_pudl_etl.sh @@ -3,7 +3,7 @@ # This script won't work locally because it needs adequate GCP permissions. # Set PUDL_GCS_OUTPUT *only* if it is currently unset -: "${PUDL_GCS_OUTPUT:=gs://nightly-build-outputs.catalyst.coop/$ACTION_SHA-$GITHUB_REF}" +: "${PUDL_GCS_OUTPUT:=gs://nightly-build-outputs.catalyst.coop/$BUILD_ID}" set -x @@ -12,64 +12,64 @@ function send_slack_msg() { } function upload_file_to_slack() { - curl -F file=@$1 -F "initial_comment=$2" -F channels=C03FHB9N0PQ -H "Authorization: Bearer ${SLACK_TOKEN}" https://slack.com/api/files.upload + curl -F "file=@$1" -F "initial_comment=$2" -F channels=C03FHB9N0PQ -H "Authorization: Bearer ${SLACK_TOKEN}" https://slack.com/api/files.upload } function authenticate_gcp() { # Set the default gcloud project id so the zenodo-cache bucket # knows what project to bill for egress - gcloud config set project $GCP_BILLING_PROJECT + gcloud config set project "$GCP_BILLING_PROJECT" } function run_pudl_etl() { - send_slack_msg ":large_yellow_circle: Deployment started for $ACTION_SHA-$GITHUB_REF :floppy_disk:" + send_slack_msg ":large_yellow_circle: Deployment started for $BUILD_ID :floppy_disk:" authenticate_gcp && \ alembic upgrade head && \ ferc_to_sqlite \ --loglevel DEBUG \ --gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \ --workers 8 \ - $PUDL_SETTINGS_YML \ + "$PUDL_SETTINGS_YML" \ && pudl_etl \ --loglevel DEBUG \ --gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \ - $PUDL_SETTINGS_YML \ + "$PUDL_SETTINGS_YML" \ && pytest \ -n auto \ --gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \ - --etl-settings $PUDL_SETTINGS_YML \ + --etl-settings "$PUDL_SETTINGS_YML" \ --live-dbs test/integration test/unit \ && pytest \ -n auto \ --gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \ - --etl-settings $PUDL_SETTINGS_YML \ + --etl-settings "$PUDL_SETTINGS_YML" \ --live-dbs test/validate \ - && touch ${PUDL_OUTPUT}/success + && touch "$PUDL_OUTPUT/success" } function shutdown_vm() { - upload_file_to_slack $LOGFILE "pudl_etl logs for $ACTION_SHA-$GITHUB_REF:" + upload_file_to_slack "$LOGFILE" "pudl_etl logs for $BUILD_ID:" # Shut down the vm instance when the etl is done. echo "Shutting down VM." ACCESS_TOKEN=$(curl \ "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" \ -H "Metadata-Flavor: Google" | jq -r '.access_token') - curl -X POST -H "Content-Length: 0" -H "Authorization: Bearer ${ACCESS_TOKEN}" https://compute.googleapis.com/compute/v1/projects/catalyst-cooperative-pudl/zones/$GCE_INSTANCE_ZONE/instances/$GCE_INSTANCE/stop + curl -X POST -H "Content-Length: 0" -H "Authorization: Bearer ${ACCESS_TOKEN}" "https://compute.googleapis.com/compute/v1/projects/catalyst-cooperative-pudl/zones/$GCE_INSTANCE_ZONE/instances/$GCE_INSTANCE/stop" } function copy_outputs_to_gcs() { echo "Copying outputs to GCP bucket $PUDL_GCS_OUTPUT" - gsutil -m cp -r $PUDL_OUTPUT ${PUDL_GCS_OUTPUT} - rm ${PUDL_OUTPUT}/success + gsutil -m cp -r "$PUDL_OUTPUT" "$PUDL_GCS_OUTPUT" + rm "$PUDL_OUTPUT/success" } function copy_outputs_to_distribution_bucket() { # Only attempt to update outputs if we have a real value of GITHUB_REF if [ -n "$GITHUB_REF" ]; then echo "Removing old $GITHUB_REF outputs from GCP distributon bucket." - gsutil -m -u $GCP_BILLING_PROJECT rm -r "gs://pudl.catalyst.coop/$GITHUB_REF" + gsutil -m -u "$GCP_BILLING_PROJECT" rm -r "gs://pudl.catalyst.coop/$GITHUB_REF" echo "Copying outputs to GCP distribution bucket" - gsutil -m -u $GCP_BILLING_PROJECT cp -r "$PUDL_OUTPUT/*" "gs://pudl.catalyst.coop/$GITHUB_REF" + gsutil -m -u "$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/*" "gs://pudl.catalyst.coop/$GITHUB_REF" echo "Removing old $GITHUB_REF outputs from AWS distributon bucket." aws s3 rm "s3://pudl.catalyst.coop/$GITHUB_REF" --recursive @@ -80,41 +80,28 @@ function copy_outputs_to_distribution_bucket() { function zenodo_data_release() { echo "Creating a new PUDL data release on Zenodo." - ~/devtools/zenodo/zenodo_data_release.py --publish --env sandbox --source-dir $PUDL_OUTPUT + ~/devtools/zenodo/zenodo_data_release.py --publish --env sandbox --source-dir "$PUDL_OUTPUT" } function notify_slack() { # Notify pudl-builds slack channel of deployment status - if [ $1 = "success" ]; then + if [ "$1" = "success" ]; then message=":large_green_circle: :sunglasses: :unicorn_face: :rainbow: The deployment succeeded!! :partygritty: :database_parrot: :blob-dance: :large_green_circle:\n\n " message+="\n\n" - elif [ $1 = "failure" ]; then + elif [ "$1" = "failure" ]; then message=":large_red_square: Oh bummer the deployment failed ::fiiiiine: :sob: :cry_spin:\n\n " else echo "Invalid deployment status" exit 1 fi - message+="See https://console.cloud.google.com/storage/browser/nightly-build-outputs.catalyst.coop/$ACTION_SHA-$GITHUB_REF for logs and outputs." + message+="See https://console.cloud.google.com/storage/browser/nightly-build-outputs.catalyst.coop/$BUILD_ID for logs and outputs." send_slack_msg "$message" } -if [ "$GITHUB_ACTION_TRIGGER" = "workflow_dispatch" ]; then - echo "Deployed via workflow_dispatch, testing git authentication!" - # Remove the read-only authentication header - git config --unset http.https://github.com/.extraheader - git config user.email "pudl@catalyst.coop" - git config user.name "pudlbot" - git remote set-url origin "https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git" - git config -l - git tag -a -m "Nightly test tag" nightly-tag-test - git push origin nightly-tag-test - shutdown_vm -fi - # # Run ETL. Copy outputs to GCS and shutdown VM if ETL succeeds or fails # 2>&1 redirects stderr to stdout. -run_pudl_etl 2>&1 | tee $LOGFILE +run_pudl_etl 2>&1 | tee "$LOGFILE" ETL_SUCCESS=${PIPESTATUS[0]} @@ -122,42 +109,44 @@ copy_outputs_to_gcs # if pipeline is successful, distribute + publish datasette if [[ $ETL_SUCCESS == 0 ]]; then - if [ $GITHUB_ACTION_TRIGGER = "schedule" ]; then - # Update the nightly branch to point at newly successful nightly build tag + if [ "$GITHUB_ACTION_TRIGGER" = "schedule" ]; then + # Remove read-only authentication header added by git checkout + git config --unset http.https://github.com/.extraheader git config user.email "pudl@catalyst.coop" git config user.name "pudlbot" - git remote set-url origin https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git + git remote set-url origin "https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git" + # Update the nightly branch to point at newly successful nightly build tag git checkout nightly - git merge --ff-only $NIGHTLY_TAG + git merge --ff-only "$NIGHTLY_TAG" git push fi # Deploy the updated data to datasette - if [ $GITHUB_REF = "dev" ]; then - python ~/devtools/datasette/publish.py 2>&1 | tee -a $LOGFILE + if [ "$GITHUB_REF" = "dev" ]; then + python ~/devtools/datasette/publish.py 2>&1 | tee -a "$LOGFILE" ETL_SUCCESS=${PIPESTATUS[0]} fi # Compress the SQLite DBs for easier distribution # Remove redundant multi-file EPA CEMS outputs prior to distribution - gzip --verbose $PUDL_OUTPUT/*.sqlite && \ - rm -rf $PUDL_OUTPUT/hourly_emissions_epacems/ && \ - rm -f $PUDL_OUTPUT/metadata.yml + gzip --verbose "$PUDL_OUTPUT"/*.sqlite && \ + rm -rf "$PUDL_OUTPUT/core_epacems__hourly_emissions/" && \ + rm -f "$PUDL_OUTPUT/metadata.yml" ETL_SUCCESS=${PIPESTATUS[0]} # Dump outputs to s3 bucket if branch is dev or build was triggered by a tag # TODO: this behavior should be controlled by on/off switch here and this logic # should be moved to the triggering github action. Having it here feels # fragmented. - if [ $GITHUB_ACTION_TRIGGER = "push" ] || [ $GITHUB_REF = "dev" ]; then + if [ "$GITHUB_ACTION_TRIGGER" = "push" ] || [ "$GITHUB_REF" = "dev" ]; then copy_outputs_to_distribution_bucket ETL_SUCCESS=${PIPESTATUS[0]} - zenodo_data_release 2>&1 | tee -a $LOGFILE + zenodo_data_release 2>&1 | tee -a "$LOGFILE" ETL_SUCCESS=${PIPESTATUS[0]} fi fi # This way we also save the logs from latter steps in the script -gsutil cp $LOGFILE ${PUDL_GCS_OUTPUT} +gsutil cp "$LOGFILE" "$PUDL_GCS_OUTPUT" # Notify slack about entire pipeline's success or failure; # PIPESTATUS[0] either refers to the failed ETL run or the last distribution From 2e6097c5341fec99721fffff8af7b70745bc73a9 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Wed, 20 Dec 2023 13:47:47 -0600 Subject: [PATCH 32/33] remove old ACTION_SHA add BUILD_ID to container envvars --- .github/workflows/build-deploy-pudl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-deploy-pudl.yml b/.github/workflows/build-deploy-pudl.yml index a4a242874f..e5196392cc 100644 --- a/.github/workflows/build-deploy-pudl.yml +++ b/.github/workflows/build-deploy-pudl.yml @@ -118,8 +118,8 @@ jobs: --container-arg="bash" \ --container-arg="./docker/gcp_pudl_etl.sh" \ --container-env-file="./docker/.env" \ - --container-env ACTION_SHA=$ACTION_SHA \ --container-env GITHUB_REF=${{ env.GITHUB_REF }} \ + --container-env BUILD_ID=${{ env.BUILD_ID }} \ --container-env NIGHTLY_TAG=${{ env.NIGHTLY_TAG }} \ --container-env GITHUB_ACTION_TRIGGER=${{ github.event_name }} \ --container-env SLACK_TOKEN=${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} \ From de588511f7a5a344922d77aefb901a776ec56d95 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Wed, 20 Dec 2023 14:49:36 -0600 Subject: [PATCH 33/33] Reinstate caching of conda environment in Docker image. --- docker/Dockerfile | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 2682b18a58..56de80bce8 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -35,14 +35,17 @@ RUN mkdir -p ${PUDL_INPUT} ${PUDL_OUTPUT} ${DAGSTER_HOME} ${PUDL_REPO} # Copy dagster configuration file COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml -# Copy the cloned pudl repository into the container -# This includes the .git directory, so it is a whole repo -COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${PUDL_REPO} - +# Copy conda-lock.yml in so we can build the conda environment and cache that layer in +# the Docker image before installing PUDL. +COPY environments/conda-lock.yml ${PUDL_REPO}/environments/conda-lock.yml # Create a conda environment based on the specification in the repo RUN micromamba create --prefix ${CONDA_PREFIX} --yes --file ${PUDL_REPO}/environments/conda-lock.yml && \ micromamba clean -afy +# Copy the rest of the cloned PUDL repo into the image. +# This includes the .git directory, so it is a whole repo +COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${PUDL_REPO} + # TODO(rousik): The following is a workaround for sudden breakage where conda # can't find libraries contained within the environment. It's unclear why! ENV LD_LIBRARY_PATH=${CONDA_PREFIX}/lib