validate self-hosted runner status #32
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
# NOTE: ipatch, self-hosted runners are removed from runner registry if they don't connect to github in 14 days | |
# 1. check if remote machine hosting the virtual machine running the self-hosted runner is online | |
# 2. if remote box is online, check to see if vm serice ie. vmmojave is up and running. | |
# a. status the systemd service for the vm | |
# b. use `nc` to verify the online status of the vm using its static ip (ping is NOT an option) | |
# c. if the vm for the self-hosted runner is online, verify online status of the self-hosted runner service/process | |
# 3. if one of our checks fails we need to send an email followed by an action / step to try | |
# and bring the service online, and then move to the next step. | |
# 4. etup a specific group on remote box and virtual-machine allowing an isolated $USER to run specific tasks for statusing and controlling runner related services. | |
# the way of thinking of this is to, | |
# 1 .first check if the runner is online, by querying the github api endpoint | |
# a1. if getting a successful online respone from api take note | |
# b1. if runner is offline, run through the below steps/checks | |
name: validate self-hosted runner status | |
on: | |
# NOTE: run this action on request from github web UI | |
workflow_dispatch: | |
schedule: | |
# min hour day month day-of-week,1-6 contrab.guru | |
- cron: '1 4 */12 * *' # run task every 12 days | |
jobs: | |
validate-self-hosted-runners: | |
# a fast booting github-hosted runner | |
runs-on: ubuntu-latest | |
env: | |
vmmojave: 192.168.1.114 | |
vmcatalina: 192.168.1.115 | |
vmbigsur: 191.168.1.116 | |
maintainer_emai: [email protected] | |
steps: | |
- name: Configure SSH for github hosted runner | |
id: configure_ssh | |
run: | | |
mkdir -p ~/.ssh/ | |
echo "$SSH_KEY" > ~/.ssh/shrunserver.key | |
chmod 600 ~/.ssh/shrunserver.key | |
cat >>~/.ssh/config <<END | |
Host shrunserver | |
HostName $SSH_HOST | |
User $SSH_USER | |
IdentityFile ~/.ssh/shrunserver.key | |
StrictHostKeyChecking no | |
END | |
env: | |
SSH_USER: ${{ secrets.ARCHBOX_SERVER_USER }} | |
SSH_KEY: ${{ secrets.ACTIONS_SSH_PRIVATE_KEY }} | |
SSH_HOST: ${{ secrets.ARCHBOX_SERVER_IP }} | |
- name: Debug runner IPs | |
id: debug_runner_ips | |
run: | | |
echo "vmmojave: $vmmojave" | |
echo "vmcatalina: $vmcatalina" | |
echo "vmbigsur: $vmbigsur" | |
- name: test_runner_status | |
id: test_runner_status | |
run: | | |
for runner in "vmmojave" "vmcatalina" "vmbigsur"; do | |
# Access the IP address using indirect expansion | |
runner_ip="${!runner}" | |
echo "$runner_ip" | |
status_selfhosted_runner=$(curl -s -H "Authorization: Bearer ${{ secrets.HOMEBREW_GITHUB_API_TOKEN }}" \ | |
https://api.github.com/repos/freecad/homebrew-freecad/actions/runners \ | |
| jq -r ".runners[] | select(.name == \"$runner\") | .status // \"not found\"") | |
echo "$runner is $status_selfhosted_runner" | |
done | |
# NOTE: HOMEBREW_GITHUB_API_TOKEN needed in repo secrets, use web UI to add token | |
- name: check self-hosted runners status for freecad/homebrew-freecad | |
id: status_runners | |
run: | | |
for runner in "vmmojave" "vmcatalina" "vmbigsur"; do | |
# Access the IP address using indirect expansion | |
runner_ip="${!runner}" | |
status_selfhosted_runner=$(curl -s -H "Authorization: Bearer ${{ secrets.HOMEBREW_GITHUB_API_TOKEN }}" \ | |
https://api.github.com/repos/freecad/homebrew-freecad/actions/runners \ | |
| jq -r ".runners[] | select(.name == \"$runner\") | .status // \"not found\"") | |
echo $"status_selfhosted_runner" | |
if [[ "$status_selfhosted_runner" == "online" ]]; then | |
echo "$runner is online ✅" | |
echo "${runner}_status=online" >> $GITHUB_ENV | |
else | |
echo "the github runner service for $runner is OFFLINE 🚫" | |
# Check reachability of host machine | |
if nc -v -z -w 5 "${{ secrets.ARCHBOX_SERVER_IP }}" 22; then | |
echo "Machine hosting the vm services for $runner is online ✅" | |
# NOTE: ipatch, run command on remote computer via ssh from github hosted runner | |
ssh shrunserver "date" | |
# ssh shrunserver "/usr/bin/sudo ls" # DEBUG | |
# echo "DEBUG $runner_ip" # DEBUG | |
if ssh shrunserver "nc -v -z -w 5 $runner_ip 22 < /dev/null"; then | |
echo "virtual machine $runner appears to be online" | |
else | |
echo "could not reach the virtual machine $runner" | |
# NOTE: status the vm systemd service, if offline attempt to start the service | |
# ssh shrunserver "echo \"/usr/bin/sudo systemctl restart $runner\";" # DEBUG | |
ssh shrunserver "/usr/bin/sudo systemctl restart $runner" || echo "failed to restart service" | |
sleep 45 | |
if ssh shrunserver "nc -v -z -w 5 $runner_ip 22 < /dev/null"; then | |
echo "$runner appears to have come back online ✅" | |
else | |
echo "$runner is still NOT online 🚫" | |
echo "${runner}_status=down" >> $GITHUB_ENV | |
fi | |
fi | |
else | |
echo "can not connect to host machine running $runner virtual machine" | |
fi | |
fi | |
done | |
- name: Print vm_status variable | |
id: print_vm_status | |
run: | | |
echo "vmmojave status is ${{ env.vmmojave_status }}" | |
echo "vmcatalina status is ${{ env.vmcatalina_status }}" | |
echo "vmbigsur status is ${{ env.vmbigsur_status }}" | |
- name: Send email on failure to reach runner service | |
id: send_alert_email | |
if: ${{ | |
env.vmmojave_status == 'down' || | |
env.vmcatalina_status == 'down' || | |
env.vmbigsur_status == 'down' | |
}} | |
uses: dawidd6/action-send-mail@v2 | |
with: | |
server_address: smtp.gmail.com | |
server_port: 587 | |
username: ${{ secrets.SMTP_USERNAME }} | |
password: ${{ secrets.SMTP_PASSWORD }} | |
subject: 'homebrew-freecad self-hosted runner is offline 🚫' | |
from: ${{ secrets.SMTP_USERNAME }} | |
to: ${{ env.maintainer_emai }} | |
body: > | |
One of the components related to the homebrew-freecad self-hosted runners is offline, | |
and can not be reached. Please follow up before runner expires from github registry. | |