Skip to content

Commit

Permalink
chore: first debug test
Browse files Browse the repository at this point in the history
  • Loading branch information
ijdoc committed Oct 9, 2024
1 parent 3b58f42 commit 7f23275
Show file tree
Hide file tree
Showing 6 changed files with 227 additions and 468 deletions.
15 changes: 2 additions & 13 deletions .github/workflows/eval_retrain.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,19 @@ on:

jobs:
eval_check:
runs-on: ubuntu-latest
runs-on: self-hosted
outputs: # Define outputs for downstream jobs
drift_detected: ${{ steps.eval_check.outputs.degraded }}
steps:
- name: ⏬ Checkout repository
uses: actions/checkout@v4

- name: 🐍 Setup python
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: 📦 Install dependencies
run: |
pip install pipenv
cd eval
pipenv sync
- name: ⚙️ Run Evaluation
env:
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
id: eval_check
run: |
cd drift
cd eval
output=$(pipenv run python eval.py)
echo "$output" >> $GITHUB_STEP_SUMMARY
Expand Down
17 changes: 9 additions & 8 deletions drift/check_drift.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
) as run:

# Grab the latest training and production dataframes
train_artifact = run.use_artifact("jdoc-org/wandb-registry-dataset/training:latest")
run.config["train_data"] = train_artifact.source_name
registered_training_dataset = "jdoc-org/wandb-registry-dataset/training:latest"
train_artifact = run.use_artifact(registered_training_dataset)
run.config["train_data"] = train_artifact.name
train_data = train_artifact.get("training_data").get_dataframe()

prod_artifact = run.use_artifact("production_data:latest")
run.config["prod_data"] = prod_artifact.source_name
run.config["prod_data"] = prod_artifact.name
prod_data = prod_artifact.get("production_data").get_dataframe()

feature_list = ["active_power", "temp", "humidity", "pressure"]
Expand Down Expand Up @@ -72,19 +73,19 @@
artifact.description = prod_artifact.description
artifact = run.log_artifact(artifact).wait()
# Open a github issue asking for manual review
issue_title = f"Data drift detected on {train_artifact.source_name}"
issue_title = f"Data drift detected on {train_artifact.name}"
issue_body = (
f"Data drift has been detected when comparing the registered training dataset with recent production data.\n\n"
f"Please review the [candidate artifact](https://wandb.ai/{run.entity}/{run.project}/artifacts/{artifact.type}/{artifact.source_name}) "
f"Please review the [candidate artifact](https://wandb.ai/{run.entity}/{run.project}/artifacts/{artifact.type}/{artifact.name}) "
f"and the [drift report]({report_url}) to determine if the registered training data should be updated.\n\n"
f"To approve the new candidate after review, link it to [the training Dataset Registry](https://wandb.ai/registry/dataset?selectionPath=jdoc-org%2Fwandb-registry-dataset%2Ftraining&view=versions) at "
f"(`jdoc-org/wandb-registry-dataset/training`), otherwise close this issue."
f"(`{registered_training_dataset}`), otherwise close this issue."
)
issue_url = open_github_issue(issue_title, issue_body, labels=["drift", "data"])
print(
f"Production batch `{prod_artifact.source_name}` has been logged "
f"as candidate to replace training data `{artifact.source_name}`. "
f"An [issue]({issue_url}) was created for manual review:\n"
f"as candidate `{artifact.name}` to replace training data. "
f"An [issue]({issue_url}) was also created for manual review:\n"
)
print(f"- [Data Drift Issue]({issue_url})")
else:
Expand Down
12 changes: 0 additions & 12 deletions eval/Pipfile

This file was deleted.

Loading

0 comments on commit 7f23275

Please sign in to comment.