Skip to content

Commit

Permalink
Finalise stages and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
alexioannides committed Jul 20, 2021
1 parent 46b5999 commit a6192e9
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 51 deletions.
2 changes: 1 addition & 1 deletion notebooks/requirements_nb.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
jupyterlab==3.0.16
seaborn==0.11.1
numpy==1.21.0
pandas==1.2.5
pandas==1.3.0
scikit-learn==0.24.2
boto3==1.17.101
joblib==1.0.1
2 changes: 1 addition & 1 deletion notebooks/time_to_dispatch_model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@
"def preprocess(df: pd.DataFrame) -> np.ndarray:\n",
" df_processed = df.copy()\n",
" category_map = {\"SKU001\": 0, \"SKU002\": 1, \"SKU003\": 2, \"SKU004\": 3, \"SKU005\": 4}\n",
" df_processed[\"product_code\"] = df[\"product_code\"].apply(lambda e: category_map[e])\n",
" df_processed[\"product_code\"] = df[\"product_code\"].apply(lambda e: PRODUCT_CODE_MAP[e])\n",
" return df_processed.values\n",
"\n",
"preprocess(dataset)"
Expand Down
36 changes: 32 additions & 4 deletions pipeline/serve_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,33 @@
- Get model and load into memory.
- Start web API server.
"""
import sys
from enum import Enum
from typing import Dict, Union

import uvicorn
from bodywork_pipeline_utils import aws, logging
from fastapi import FastAPI, status
from pydantic import BaseModel
from numpy import array
from pydantic import BaseModel, Field

from pipeline.train_model import PRODUCT_CODE_MAP

app = FastAPI(debug=False)
log = logging.configure_logger()


class ProductCode(Enum):
SKU001 = "SKU001"
SKU002 = "SKU002"
SKU003 = "SKU003"
SKU004 = "SKU004"
SKU005 = "SKU005"


class Data(BaseModel):
product_code: str
orders_placed: float
product_code: ProductCode
orders_placed: float = Field(..., ge=0.0)


class Prediction(BaseModel):
Expand All @@ -27,8 +42,21 @@ class Prediction(BaseModel):
response_model=Prediction,
)
def time_to_dispatch(data: Data) -> Dict[str, Union[str, float]]:
return {"est_hours_to_dispatch": 1.0, "model_version": "0.0.1"}
features = array([[data.orders_placed, PRODUCT_CODE_MAP[data.product_code.value]]])
prediction = wrapped_model.model.predict(features).tolist()[0]
return {"est_hours_to_dispatch": prediction, "model_version": str(wrapped_model)}


if __name__ == "__main__":
try:
args = sys.argv
s3_bucket = args[1]
wrapped_model = aws.get_latest_pkl_model_from_s3(s3_bucket, "models")
log.info(f"Successfully loaded model: {wrapped_model}")
except IndexError:
log.error("Invalid arguments passed to serve_model.py - expected S3_BUCKET")
sys.exit(1)
except Exception as e:
log.error(f"Could not get latest model and start web server - {e}")
sys.exit(1)
uvicorn.run(app, host="0.0.0.0", workers=1)
17 changes: 8 additions & 9 deletions pipeline/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
from typing import Any, Dict, List, NamedTuple, Tuple

from bodywork_pipeline_utils import aws, logging
from bodywork_pipeline_utils.aws.datasets import Dataset
from bodywork_pipeline_utils.aws import Dataset
from numpy import array, ndarray
from pandas import DataFrame
from sklearn.base import BaseEstimator
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.tree import DecisionTreeRegressor

CATEGORY_MAP = {"SKU001": 0, "SKU002": 1, "SKU003": 2, "SKU004": 3, "SKU005": 4}
PRODUCT_CODE_MAP = {"SKU001": 0, "SKU002": 1, "SKU003": 2, "SKU004": 3, "SKU005": 4}
HYPERPARAM_GRID = {
"random_state": [42],
"criterion": ["mse", "mae"],
Expand Down Expand Up @@ -115,8 +115,8 @@ def verify_trained_model_logic(model: BaseEstimator, data: FeatureAndLabels) ->
issues_detected: List[str] = []

orders_placed_sensitivity_checks = [
model.predict(array([[100, product], [110, product]])).tolist()
for product in range(len(CATEGORY_MAP))
model.predict(array([[100, product], [150, product]])).tolist()
for product in range(len(PRODUCT_CODE_MAP))
]
if not all(e[0] < e[1] for e in orders_placed_sensitivity_checks):
issues_detected.append(
Expand All @@ -140,9 +140,9 @@ def verify_trained_model_logic(model: BaseEstimator, data: FeatureAndLabels) ->

def preprocess(df: DataFrame) -> DataFrame:
"""Create features for training model."""
df_processed = df.copy()
df_processed["product_code"] = df["product_code"].apply(lambda e: CATEGORY_MAP[e])
return df_processed.values
processed = df.copy()
processed["product_code"] = df["product_code"].apply(lambda e: PRODUCT_CODE_MAP[e])
return processed.values


def persist_model(
Expand All @@ -152,7 +152,6 @@ def persist_model(
metadata = {
"r_squared": metrics.r_squared,
"mean_absolute_error": metrics.mean_absolute_error,
"category_map": CATEGORY_MAP
}
wrapped_model = aws.Model("time-to-dispatch", model, dataset, metadata)
s3_location = wrapped_model.put_model_to_s3(bucket, "models")
Expand All @@ -169,13 +168,13 @@ def persist_model(
r2_metric_warning_threshold = float(args[3])
if r2_metric_warning_threshold <= 0 or r2_metric_warning_threshold > 1:
raise ValueError()

except (ValueError, IndexError):
log.error(
"Invalid arguments passed to train_model.py. "
"Expected S3_BUCKET R_SQUARED_ERROR_THRESHOLD R_SQUARED_WARNING_THRESHOLD, "
"where all thresholds must be in the range [0, 1]."
)
sys.exit(1)

try:
main(
Expand Down
18 changes: 0 additions & 18 deletions pipeline/utils.py

This file was deleted.

11 changes: 5 additions & 6 deletions requirements_pipe.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
numpy>=1.21.0
pandas>=1.2.0
scikit-learn>=0.24.0
boto3>=1.17.0
joblib>=1.0.0
numpy==1.21.0
pandas==1.2.5
scikit-learn==0.24.2
boto3==1.17.101
fastapi==0.65.2
uvicorn==0.14.0
git+https://github.com/bodywork-ml/[email protected].1
git+https://github.com/bodywork-ml/[email protected].4
Binary file added tests/resources/model.pkl
Binary file not shown.
43 changes: 41 additions & 2 deletions tests/test_serve_model.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,66 @@
"""
Tests for web API.
"""
import pickle
from subprocess import run
from unittest.mock import patch

from bodywork_pipeline_utils.aws import Model
from fastapi.testclient import TestClient
from numpy import array

from pipeline.serve_model import app

test_client = TestClient(app)


def wrapped_model() -> Model:
with open("tests/resources/model.pkl", "r+b") as file:
wrapped_model = pickle.load(file)
return wrapped_model


@patch("pipeline.serve_model.wrapped_model", new=wrapped_model(), create=True)
def test_web_api_returns_valid_response_given_valid_data():
prediction_request = {"product_code": "SKU001", "orders_placed": 100}
prediction_response = test_client.post(
"/api/v0.1/time_to_dispatch", json=prediction_request
)
model_obj = wrapped_model()
expected_prediction = model_obj.model.predict(array([[100, 0]])).tolist()[0]
assert prediction_response.status_code == 200
assert "est_hours_to_dispatch" in prediction_response.json().keys()
assert "model_version" in prediction_response.json().keys()
assert prediction_response.json()["est_hours_to_dispatch"] == expected_prediction
assert prediction_response.json()["model_version"] == str(model_obj)


@patch("pipeline.serve_model.wrapped_model", new=wrapped_model(), create=True)
def test_web_api_returns_error_code_given_invalid_data():
prediction_request = {"product_code": "SKU001", "foo": 100}
prediction_response = test_client.post(
"/api/v0.1/time_to_dispatch", json=prediction_request
)
assert prediction_response.status_code == 422
assert "value_error.missing" in prediction_response.text

prediction_request = {"product_code": "SKU000", "orders_placed": 100}
prediction_response = test_client.post(
"/api/v0.1/time_to_dispatch", json=prediction_request
)
assert prediction_response.status_code == 422
assert "not a valid enumeration member" in prediction_response.text

prediction_request = {"product_code": "SKU001", "orders_placed": -100}
prediction_response = test_client.post(
"/api/v0.1/time_to_dispatch", json=prediction_request
)
assert prediction_response.status_code == 422
assert "ensure this value is greater than or equal to 0" in prediction_response.text


def test_web_server_raises_exception_if_passed_invalid_args():
process = run(
["python", "-m", "pipeline.serve_model"], capture_output=True, encoding="utf-8"
)
assert process.returncode != 0
assert "ERROR" in process.stdout
assert "Invalid arguments passed to serve_model.py" in process.stdout
25 changes: 15 additions & 10 deletions tests/test_train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,35 +171,40 @@ def test_run_job_handles_error_for_invalid_args():
)
assert process_one.returncode != 0
assert "ERROR" in process_one.stdout
assert "Invalid arguments passed to train_model.py" in process_one.stdout

process_two = run(
["python", "pipeline/train_model.py", "my-bucket", "-1", "0.5"],
["python", "-m", "pipeline.train_model", "my-bucket", "-1", "0.5"],
capture_output=True,
encoding="utf-8"
)
assert process_two.returncode != 0
assert "ERROR" in process_two.stdout
assert "Invalid arguments passed to train_model.py" in process_two.stdout

process_three = run(
["python", "pipeline/train_model.py", "my-bucket", "2", "0.5"],
["python", "-m", "pipeline.train_model", "my-bucket", "2", "0.5"],
capture_output=True,
encoding="utf-8"
)
assert process_three.returncode != 0
assert "ERROR" in process_three.stdout
assert "Invalid arguments passed to train_model.py" in process_three.stdout

process_two = run(
["python", "pipeline/train_model.py", "my-bucket", "0.5", "-1"],
process_four = run(
["python", "-m", "pipeline.train_model", "my-bucket", "0.5", "-1"],
capture_output=True,
encoding="utf-8"
)
assert process_two.returncode != 0
assert "ERROR" in process_two.stdout
assert process_four.returncode != 0
assert "ERROR" in process_four.stdout
assert "Invalid arguments passed to train_model.py" in process_four.stdout

process_three = run(
["python", "pipeline/train_model.py", "my-bucket", "0.5", "2"],
process_five = run(
["python", "-m", "pipeline.train_model", "my-bucket", "0.5", "2"],
capture_output=True,
encoding="utf-8"
)
assert process_three.returncode != 0
assert "ERROR" in process_three.stdout
assert process_five.returncode != 0
assert "ERROR" in process_five.stdout
assert "Invalid arguments passed to train_model.py" in process_five.stdout

0 comments on commit a6192e9

Please sign in to comment.