Skip to content

Data Drift Check

Data Drift Check #6

# Checks for drift between train & prod data, and
# retrains the model when drift is detected
name: Data Drift Check
on:
workflow_run: # Trigger after another workflow completes
workflows: ["Production Data Batching"]
types:
- completed
push:
branches: ijdoc/issue5
jobs:
drift_check:
runs-on: ubuntu-latest
permissions:
contents: read
issues: write # Grant write access to issues
# outputs: # Define outputs for downstream jobs
# drift_detected: ${{ steps.drift_check.outputs.drift_detected }}
steps:
- name: ⏬ Checkout repository
uses: actions/checkout@v4
- name: 🐍 Setup python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: 📦 Install dependencies
run: |
pip install pipenv
cd drift
pipenv sync
- name: ⚙️ Run Drift Check
env:
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
id: drift_check
run: |
cd drift
output=$(pipenv run python check_drift.py)
echo "$output" >> $GITHUB_STEP_SUMMARY
# Uncomment the following block to automatically
# retrain the model when drift is detected
# retrain:
# needs: drift_check
# if: ${{ needs.drift_check.outputs.drift_detected == 'True' }}
# runs-on: self-hosted
# steps:
# - name: ⏬ Checkout repository
# uses: actions/checkout@v4
# - name: ⚙️ Train!
# env:
# WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
# run: |
# python train.py