Determine workflow run id more robustly to avoid race conditions (#37)

* Set query=event=workflow_dispatch&status=in_progress. Align wait_interval with api and docs * Remove note about ###Timing in the ##Potential Issues section and update 'INPUT_WAIT_INTERVAL' in the ##Testing section of the README. * Added the ability to delay API calls for the last_workflow loop through a configurable last_workflow_interval input variable (defaults to 0). Redefine 'inputs' variable to be 'client_payload'. Lastly, using #bash, which uses /usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin * Determine workflow run id more robustly to avoid race conditions * Refactor to use a common api function that catches and reports errors * Change function name to be busybox-compatible * Fix testing instructions to use busybox to run entrypoint.sh The Alpine base image uses the busybox implementation of sh, so we need to use the same shell when testing entrypoint.sh outside of Docker. * Fix incorrect status message * Add coreutils to Docker image * Remove extraneous single quotes * Use an explicit tag for the alpine base image This prevents runners from using a stale base and is better practice anyway Co-authored-by: Levon <[email protected]> Co-authored-by: Keith Weaver <[email protected]>
convictional · Mar 9, 2022 · 8be90d7 · 8be90d7 · kschlesselmann · May 19, 2022
1 parent d2b75f3
commit 8be90d7
Show file tree

Hide file tree

Showing 4 changed files with 120 additions and 85 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,8 +1,7 @@
-# https://hub.docker.com/_/alpine
 FROM alpine:3.15.0
 
-RUN apk update && \
-    apk --no-cache add curl jq
+RUN apk update
+RUN apk --no-cache add curl jq coreutils
 
 COPY entrypoint.sh /entrypoint.sh
 

diff --git a/README.md b/README.md
@@ -9,19 +9,20 @@ When deploying an app you may need to deploy additional services, this Github Ac
 
 ## Arguments
 
-| Argument Name         | Required   | Default     | Description           |
-| --------------------- | ---------- | ----------- | --------------------- |
-| `owner`               | True       | N/A         | The owner of the repository where the workflow is contained. |
-| `repo`                | True       | N/A         | The repository where the workflow is contained. |
-| `github_token`        | True       | N/A         | The Github access token with access to the repository. Its recommended you put it under secrets. |
-| `workflow_file_name`  | True       | N/A         | The reference point. For example, you could use main.yml. |
-| `github_user`         | False      | N/A         | The name of the github user whose access token is being used to trigger the workflow. |
-| `ref`                 | False      | main        | The reference of the workflow run. The reference can be a branch, tag, or a commit SHA. |
-| `waiting_interval`    | False      | 10          | The number of seconds delay between checking for result of run. |
-| `inputs`              | False      | `{}`        | Inputs to pass to the workflow, must be a JSON string |
-| `propagate_failure`   | False      | `true`      | Fail current job if downstream job fails. |
-| `trigger_workflow`    | False      | `true`      | Trigger the specified workflow. |
-| `wait_workflow`       | False      | `true`      | Wait for workflow to finish. |
+| Argument Name            | Required   | Default     | Description           |
+| ---------------------    | ---------- | ----------- | --------------------- |
+| `owner`                  | True       | N/A         | The owner of the repository where the workflow is contained. |
+| `repo`                   | True       | N/A         | The repository where the workflow is contained. |
+| `github_token`           | True       | N/A         | The Github access token with access to the repository. Its recommended you put it under secrets. |
+| `workflow_file_name`     | True       | N/A         | The reference point. For example, you could use main.yml. |
+| `github_user`            | False      | N/A         | The name of the github user whose access token is being used to trigger the workflow. |
+| `ref`                    | False      | main        | The reference of the workflow run. The reference can be a branch, tag, or a commit SHA. |
+| `wait_interval`          | False      | 10          | The number of seconds delay between checking for result of run. |
+| `client_payload`         | False      | `{}`        | Payload to pass to the workflow, must be a JSON string |
+| `propagate_failure`      | False      | `true`      | Fail current job if downstream job fails. |
+| `trigger_workflow`       | False      | `true`      | Trigger the specified workflow. |
+| `wait_workflow`          | False      | `true`      | Wait for workflow to finish. |
+| `last_workflow_interval` | False      | 0           | The number of seconds delay between checking for the last workflow. default: 0 |
 
 
 ## Example
@@ -48,10 +49,11 @@ When deploying an app you may need to deploy additional services, this Github Ac
     workflow_file_name: main.yml
     ref: release-branch
     wait_interval: 10
-    inputs: '{}'
+    client_payload: '{}'
     propagate_failure: false
     trigger_workflow: true
     wait_workflow: true
+    last_workflow_interval: 1
 ```
 
 
@@ -60,17 +62,18 @@ When deploying an app you may need to deploy additional services, this Github Ac
 You can test out the action locally by cloning the repository to your computer. You can run:
 
 ```shell
-INPUT_WAITING_INTERVAL=10 \
-  INPUT_PROPAGATE_FAILURE=false \
-  INPUT_TRIGGER_WORKFLOW=true \
-  INPUT_WORKFLOW_FILE_NAME="main.yml" \
-  INPUT_GITHUB_USER="github-user" \
-  INPUT_WAIT_WORKFLOW=true \
-  INPUT_OWNER="keithconvictional" \
-  INPUT_REPO="trigger-workflow-and-wait-example-repo1" \
-  INPUT_GITHUB_TOKEN="<REDACTED>" \
-  INPUT_INPUTS='{}' \
-  bash entrypoint.sh
+INPUT_WAIT_INTERVAL=10 \
+INPUT_PROPAGATE_FAILURE=false \
+INPUT_TRIGGER_WORKFLOW=true \
+INPUT_WORKFLOW_FILE_NAME="main.yml" \
+INPUT_GITHUB_USER="github-user" \
+INPUT_WAIT_WORKFLOW=true \
+INPUT_LAST_WORKFLOW_INTERVAL=1 \
+INPUT_OWNER="keithconvictional" \
+INPUT_REPO="trigger-workflow-and-wait-example-repo1" \
+INPUT_GITHUB_TOKEN="<REDACTED>" \
+INPUT_CLIENT_PAYLOAD='{}' \
+busybox sh entrypoint.sh
 ```
 
 You will have to create a Github Personal access token. You can create a test workflow to be executed. In a repository, add a new `main.yml` to `.github/workflows/`. The workflow will be:
@@ -102,10 +105,6 @@ You can see the example [here](https://github.com/keithconvictional/trigger-work
 
 ## Potential Issues
 
-### Timing
-
-The actions dispatch is an asynchronous job and it at times can take a few seconds to start. If you do not have a delay, it may be started after the action has checked if it was successful. ie. Start dispatch call --> No delay --> Check if successful --> Actually starts. If the workflow has run before, it will just complete immediately as a successful run. You can solve this by simply increasing the delay to a few seconds. By default it is 10 seconds. Creating a large delay between checks will help the traffic to the Github API.
-
 ### Changes
 
 If you do not want the latest build all of the time, please use a versioned copy of the Github Action. You specify the version after the `@` sign.

diff --git a/action.yml b/action.yml
@@ -26,8 +26,8 @@ inputs:
   workflow_file_name:
     description: "The reference point. For example, you could use main.yml."
     required: true
-  inputs:
-    description: 'Inputs to pass to the workflow, must be a JSON string'
+  client_payload:
+    description: 'Payload to pass to the workflow, must be a JSON string'
     required: false
   propagate_failure:
     description: 'Fail current job if downstream job fails. default: true'
@@ -38,6 +38,9 @@ inputs:
   wait_workflow:
     description: 'Wait for workflow to finish. default: true'
     required: false
+  last_workflow_interval:
+    description: 'The number of seconds delay between checking for the last workflow. default: 0'
+    required: false
 outputs:
   workflow_id:
     description: The ID of the workflow that was triggered by this action

diff --git a/entrypoint.sh b/entrypoint.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash
 set -e
 
 usage_docs() {
@@ -16,9 +16,9 @@ GITHUB_SERVER_URL="${SERVER_URL:-https://github.com}"
 
 validate_args() {
   wait_interval=10 # Waits for 10 seconds
-  if [ "${INPUT_WAITING_INTERVAL}" ]
+  if [ "${INPUT_WAIT_INTERVAL}" ]
   then
-    wait_interval=${INPUT_WAITING_INTERVAL}
+    wait_interval=${INPUT_WAIT_INTERVAL}
   fi
 
   propagate_failure=true
@@ -39,6 +39,12 @@ validate_args() {
     wait_workflow=${INPUT_WAIT_WORKFLOW}
   fi
 
+  last_workflow_interval=0
+  if [ -n "${INPUT_LAST_WORKFLOW_INTERVAL}" ]
+  then
+    last_workflow_interval=${INPUT_LAST_WORKFLOW_INTERVAL}
+  fi
+
   if [ -z "${INPUT_OWNER}" ]
   then
     echo "Error: Owner is a required argument."
@@ -69,10 +75,10 @@ validate_args() {
     exit 1
   fi
 
-  inputs=$(echo '{}' | jq -c)
-  if [ "${INPUT_INPUTS}" ]
+  client_payload=$(echo '{}' | jq)
+  if [ "${INPUT_CLIENT_PAYLOAD}" ]
   then
-    inputs=$(echo "${INPUT_INPUTS}" | jq -c)
+    client_payload=$(echo "${INPUT_CLIENT_PAYLOAD}" | jq)
   fi
 
   ref="main"
@@ -82,71 +88,96 @@ validate_args() {
   fi
 }
 
-lets_wait() {
-  echo "Sleeping for ${wait_interval} seconds"
-  sleep $wait_interval
-}
-
-trigger_workflow() {
-  echo "${GITHUB_API_URL}/repos/${INPUT_OWNER}/${INPUT_REPO}/actions/workflows/${INPUT_WORKFLOW_FILE_NAME}/dispatches"
-
-  curl --fail -X POST "${GITHUB_API_URL}/repos/${INPUT_OWNER}/${INPUT_REPO}/actions/workflows/${INPUT_WORKFLOW_FILE_NAME}/dispatches" \
-    -H "Accept: application/vnd.github.v3+json" \
-    -H "Content-Type: application/json" \
-    -H "Authorization: Bearer ${INPUT_GITHUB_TOKEN}" \
-    --data "{\"ref\":\"${ref}\",\"inputs\":${inputs}}"
-  if [[ "$?" -gt 0 ]]; then
-    echo "Failed to call workflow_dispatch. Exiting."
+api() {
+  path=$1; shift
+  if response=$(curl --fail-with-body -sSL \
+      "${GITHUB_API_URL}/repos/${INPUT_OWNER}/${INPUT_REPO}/actions/$path" \
+      -H "Authorization: Bearer ${INPUT_GITHUB_TOKEN}" \
+      -H 'Accept: application/vnd.github.v3+json' \
+      -H 'Content-Type: application/json' \
+      "$@")
+  then
+    echo "$response"
+  else
+    echo >&2 "api failed:"
+    echo >&2 "path: $path"
+    echo >&2 "response: $response"
     exit 1
   fi
-  lets_wait
 }
 
-wait_for_workflow_to_finish() {
-  # Find the id of the last run using filters to identify the workflow triggered by this action
-  echo "Getting the ID of the workflow..."
-  query="event=workflow_dispatch&status=queued"
-  if [ "$INPUT_GITHUB_USER" ]
-  then
-    query="${query}&actor=${INPUT_GITHUB_USER}"
-  fi
-  last_workflow="null"
-  while [[ "$last_workflow" == "null" ]]
+# Return the ids of the most recent workflow runs, optionally filtered by user
+get_workflow_runs() {
+  since=${1:?}
+
+  query="event=workflow_dispatch&created=>=$since${INPUT_GITHUB_USER+&actor=}${INPUT_GITHUB_USER}&per_page=100"
+
+  echo "Getting workflow runs using query: ${query}" >&2
+
+  api "workflows/${INPUT_WORKFLOW_FILE_NAME}/runs?${query}" |
+  jq '.workflow_runs[].id' |
+  sort # Sort to ensure repeatable order, and lexicographically for compatibility with join
+}
+
+trigger_workflow() {
+  START_TIME=$(date +%s)
+  SINCE=$(date -u -Iseconds -d "@$((START_TIME - 120))") # Two minutes ago, to overcome clock skew
+
+  OLD_RUNS=$(get_workflow_runs "$SINCE")
+
+  echo >&2 "Triggering workflow:"
+  echo >&2 "  workflows/${INPUT_WORKFLOW_FILE_NAME}/dispatches"
+  echo >&2 "  {\"ref\":\"${ref}\",\"inputs\":${client_payload}}"
+
+  api "workflows/${INPUT_WORKFLOW_FILE_NAME}/dispatches" \
+    --data "{\"ref\":\"${ref}\",\"inputs\":${client_payload}}"
+
+  NEW_RUNS=$OLD_RUNS
+  while [ "$NEW_RUNS" = "$OLD_RUNS" ]
   do
-    echo "Using the following params to filter the workflow runs to get the triggered run id -"
-    echo "Query params: ${query}"
-    last_workflow=$(curl -X GET "${GITHUB_API_URL}/repos/${INPUT_OWNER}/${INPUT_REPO}/actions/workflows/${INPUT_WORKFLOW_FILE_NAME}/runs?${query}" \
-      -H 'Accept: application/vnd.github.antiope-preview+json' \
-      -H "Authorization: Bearer ${INPUT_GITHUB_TOKEN}" | jq '[.workflow_runs[]] | first')
+    echo >&2 "Sleeping for ${wait_interval} seconds"
+    sleep "$wait_interval"
+    NEW_RUNS=$(get_workflow_runs "$SINCE")
   done
-  last_workflow_id=$(echo "${last_workflow}" | jq '.id')
+
+  # Return new run ids
+  join -v2 <(echo "$OLD_RUNS") <(echo "$NEW_RUNS")
+}
+
+wait_for_workflow_to_finish() {
+  last_workflow_id=${1:?}
   last_workflow_url="${GITHUB_SERVER_URL}/${INPUT_OWNER}/${INPUT_REPO}/actions/runs/${last_workflow_id}"
+
+  echo "Waiting for workflow to finish:"
   echo "The workflow id is [${last_workflow_id}]."
   echo "The workflow logs can be found at ${last_workflow_url}"
   echo "::set-output name=workflow_id::${last_workflow_id}"
   echo "::set-output name=workflow_url::${last_workflow_url}"
   echo ""
-  conclusion=$(echo "${last_workflow}" | jq '.conclusion')
-  status=$(echo "${last_workflow}" | jq '.status')
 
-  while [[ "${conclusion}" == "null" && "${status}" != "\"completed\"" ]]
+  conclusion=null
+  status=
+
+  while [[ "${conclusion}" == "null" && "${status}" != "completed" ]]
   do
-    lets_wait
-    workflow=$(curl -X GET "${GITHUB_API_URL}/repos/${INPUT_OWNER}/${INPUT_REPO}/actions/workflows/${INPUT_WORKFLOW_FILE_NAME}/runs" \
-      -H 'Accept: application/vnd.github.antiope-preview+json' \
-      -H "Authorization: Bearer ${INPUT_GITHUB_TOKEN}" | jq '.workflow_runs[] | select(.id == '${last_workflow_id}')')
-    conclusion=$(echo "${workflow}" | jq '.conclusion')
-    status=$(echo "${workflow}" | jq '.status')
+    echo "Sleeping for \"${wait_interval}\" seconds"
+    sleep "${wait_interval}"
+
+    workflow=$(api "runs/$last_workflow_id")
+    conclusion=$(echo "${workflow}" | jq -r '.conclusion')
+    status=$(echo "${workflow}" | jq -r '.status')
+
     echo "Checking conclusion [${conclusion}]"
     echo "Checking status [${status}]"
   done
 
-  if [[ "${conclusion}" == "\"success\"" && "${status}" == "\"completed\"" ]]
+  if [[ "${conclusion}" == "success" && "${status}" == "completed" ]]
   then
     echo "Yes, success"
   else
     # Alternative "failure"
-    echo "Conclusion is not success, its [${conclusion}]."
+    echo "Conclusion is not success, it's [${conclusion}]."
+
     if [ "${propagate_failure}" = true ]
     then
       echo "Propagating failure to upstream job"
@@ -160,14 +191,17 @@ main() {
 
   if [ "${trigger_workflow}" = true ]
   then
-    trigger_workflow
+    run_ids=$(trigger_workflow)
   else
     echo "Skipping triggering the workflow."
   fi
 
   if [ "${wait_workflow}" = true ]
   then
-    wait_for_workflow_to_finish
+    for run_id in $run_ids
+    do
+      wait_for_workflow_to_finish "$run_id"
+    done
   else
     echo "Skipping waiting for workflow."
   fi