Validation with Standalone Kepler #409
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Validation with Standalone Kepler | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: '0 16 * * *' | |
permissions: | |
pull-requests: write | |
contents: write | |
repository-projects: write | |
packages: write | |
jobs: | |
Create-runner: | |
name: "Create Runner" | |
uses: ./.github/workflows/create_equinix_runner.yml | |
secrets: inherit | |
Install: | |
name: "Install" | |
needs: Create-runner | |
runs-on: self-hosted | |
outputs: | |
runner-name: ${{ runner.name }} | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Run Setup Runner Action | |
uses: ./.github/actions/setup-action | |
- name: List available RAPL domains | |
run: | | |
for file in $(sudo find -L /sys/class/powercap/intel-rapl -name name 2>/dev/null); do cat $file; done | sort -n| uniq | tee -a /tmp/rapl-domain-availability.txt | |
# expected typical output if all domains are supported | |
# - core | |
# - dram | |
# - package-0 | |
# - psys # relatively new power management domain, only available after Skylake | |
# - uncore | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Run playbook | |
run: | | |
cd ${GITHUB_WORKSPACE}/ansible | |
echo "Create VM" | |
ansible-playbook -i inventory.yml kvm_playbook.yml | |
echo "Install SSH tunnel" | |
ansible-playbook ssh_tunnel_playbook.yml | |
echo "Install Prometheus" | |
ansible-playbook -i inventory.yml metrics_playbook.yml | |
echo "Install Node Exporter" | |
ansible-playbook -i inventory.yml node_exporter_playbook.yml -vvv | |
echo "Verify node-exporter" | |
sudo systemctl status node_exporter || true | |
sudo ss -tuln | grep 9100 || true | |
curl -s localhost:9100/metrics | grep collector || true | |
echo "Install Kepler" | |
ansible-playbook -i inventory.yml kepler_playbook.yml | |
echo "Create ssh tunnel" | |
ansible-playbook -i inventory.yml ssh_tunnel_playbook.yml | |
echo "Run validation test" | |
ansible-playbook -vvv kepler_validator.yml | |
echo "Checkout the report" | |
ls /tmp | |
cat /tmp/report-*.md || true | |
# create a directory to store the artifacts, the directory is the current date | |
set -x | |
export DATE_STR=$(date +%Y-%m-%d) | |
export TIME_STR=$(date +%H-%M-%S) | |
cd ${GITHUB_WORKSPACE} | |
mkdir -p docs/validation/${DATE_STR} | |
export KEPLER_TAG=$(ls -d /tmp/validator-* |tail -1 | sed 's/.*validator-//g') | |
# python -m pip install --upgrade pip | |
# pip install tabulate | |
# python util/generate_daily_validations.py \ | |
# --report-md-filepath docs/daily-validations/daily-validations-kepler-standalone/daily-report.md \ | |
# --report-json-filepath docs/daily-validations/daily-validations-kepler-standalone/daily-report.json \ | |
# --new-val-filepath /tmp/validator-${KEPLER_TAG}/${KEPLER_TAG}.json | |
# copy the report to the directory | |
mv /tmp/validator-${KEPLER_TAG}/ docs/validation/${DATE_STR}/validator-${KEPLER_TAG}-${TIME_STR}/ | |
mv /tmp/rapl-domain-availability.txt docs/validation/${DATE_STR}/validator-${KEPLER_TAG}-${TIME_STR}/ | |
echo "| " ${DATE_STR} " | " ${KEPLER_TAG}-${TIME_STR} " | [Report](validation/${DATE_STR}/validator-${KEPLER_TAG}-${TIME_STR}/report-${KEPLER_TAG}.md) | [Artifacts](validation/${DATE_STR}/validator-${KEPLER_TAG}-${TIME_STR}/artifacts) |" \ | |
>> docs/kepler-model-validation.md | |
# Capture Prometheus TSDB Snapshot | |
mkdir -p docs/validation/${DATE_STR}/validator-${KEPLER_TAG}-${TIME_STR}/prometheus-snapshot | |
snap_name=$(curl -XPOST http://localhost:9090/api/v1/admin/tsdb/snapshot | jq -r '.data.name') | |
tar -cvzf docs/validation/${DATE_STR}/validator-${KEPLER_TAG}-${TIME_STR}/prometheus-snapshot/snapshot.tar.gz -C \ | |
/var/lib/prometheus/snapshots ${snap_name} | |
# Push to the repo | |
git config user.email "dependabot[bot]@users.noreply.github.com" | |
git config user.name "dependabot[bot]" | |
git add docs/* | |
git commit -m "Add validation report for ${DATE_STR}-${TIME_STR}" -s | |
git push | |
echo "date=${DATE_STR}" >> $GITHUB_OUTPUT | |
env: | |
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} | |
TOTAL_RUNTIME_SECONDS: 1200 | |
VALIDATOTR_CURVE_TYPE: "default" | |
- name: upload to artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: equinix_metal | |
path: docs/validation/${{ steps.run_workload.run_playbook.date }} | |
retention-days: 30 | |
- name: Update model validation chart | |
run: | | |
export DATE_STR=$(date +%Y-%m-%d) | |
export TIME_STR=$(date +%H-%M-%S) | |
cd ${GITHUB_WORKSPACE} | |
cd docs/analytics | |
python -m pip install --upgrade pip | |
pip install -r requirements.txt | |
python ./kepler_analytics.py | |
# update git | |
git config user.email "dependabot[bot]@users.noreply.github.com" | |
git config user.name "dependabot[bot]" | |
cd ${GITHUB_WORKSPACE} | |
git add docs/analytics/* | |
git add docs/kepler-model-validation-chart.md | |
git commit -m "Add validation chart for ${DATE_STR}-${TIME_STR}" -s | |
git push | |
env: | |
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} | |
Cleanup: | |
name: "Cleanup" | |
needs: [Install] | |
if: ${{ always() }} | |
uses: ./.github/workflows/clean_equinix_runner.yml | |
secrets: inherit | |
with: | |
runner_name: ${{ needs.Install.outputs.runner-name }} |