-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Issue 36 long term reading variance algorithm (#46)
* Fix: fixed separated test cases * Feat: added 1/2 responses testcase * Feat: test cases and tests added * feat: built longterm variance detection * Fix: removed pasted tests * Fix: added no anomaly testcase * Fix: edited noanom testcase * Fix: testcases * Fix: removed query source from testcase * feat: added make target and fn resource * Fix: added third response to test cases * Fix: changed data size check to account for both previous years * Fix: removed underscore from testcases folder * Fix: testcase file reference * passing: fixed test cases * fix: removed extraneous print statement * fix: correct StuckTwoDay target * added python_lib to pipfile * fix: low_delta_t anomaly message * missing setup.py * .gitignore egg-info * added longTermVariance algorithm to cron handler --------- Co-authored-by: logo0303 <[email protected]>
- Loading branch information
1 parent
0ae6354
commit a9dc7d6
Showing
19 changed files
with
18,097 additions
and
634 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,4 +5,5 @@ coverage | |
.build | ||
*.pyc | ||
.pytest_cache | ||
.aws-sam | ||
.aws-sam | ||
*.egg-info/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
SHELL := /bin/bash | ||
build-SparseData build-LowDeltaT StuckTwoDay: | ||
build-SparseData build-LowDeltaT build-StuckTwoDay build-LongTermVariance: | ||
cp -r ./* $(ARTIFACTS_DIR) | ||
pip install --upgrade pip | ||
pip install pipenv | ||
pipenv run pip install -r <(pipenv requirements) -t $(ARTIFACTS_DIR) | ||
pipenv run pip install -r <(pipenv requirements) -t $(ARTIFACTS_DIR) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
""" | ||
long_term_variance/handler.py | ||
""" | ||
|
||
from datetime import timedelta | ||
import numpy as np | ||
import pandas as pd | ||
from requests.exceptions import ConnectionError as RequestConnectionError, HTTPError | ||
from python_lib.utils import parse_event, fetch_trends, build_index, build_df, AnomalyError | ||
|
||
# the window size we are analyzing. The assumption is the consumption is | ||
# normally similar across this period and for the same period one year prior | ||
RECENT_DAYS = 7 | ||
# the minimum acceptable length of the datapoints array | ||
MIN_DATAPOINTS_LENGTH = int(RECENT_DAYS * 24 * 0.6) | ||
# if the magnitude of the z score is greater than this, it's anomolous | ||
Z_SCORE_THRESHOLD = 3.0 | ||
ANOMALY_PORTION = 0.5 | ||
|
||
def run(event, _context): | ||
""" | ||
handler.run - the lambda function entry point | ||
""" | ||
|
||
|
||
# start out with blank response | ||
response = {"statusCode": 200, "body": ""} | ||
# parse event and ensure timeStamp and pointName are present | ||
params = parse_event(event) | ||
print(f"long_term_variance run with params {params}") | ||
|
||
now = params.get("timeStamp") | ||
year_ago = now - timedelta(365) | ||
two_years_ago = now - timedelta(365*2) | ||
start_time = now - timedelta(RECENT_DAYS) | ||
one_year_st = year_ago - timedelta(RECENT_DAYS) | ||
two_year_st = two_years_ago - timedelta(RECENT_DAYS) | ||
point_name = params.get("pointName") | ||
try: | ||
# read recent data, prior year, and prior 2 year | ||
# fetch recent | ||
df = build_df(fetch_trends( | ||
point=point_name, start_time=start_time, end_time=now )) | ||
year1 = fetch_trends( | ||
point=point_name, start_time=one_year_st, end_time=year_ago ) | ||
year2 = fetch_trends( | ||
point=point_name, start_time=two_year_st, end_time=two_years_ago ) | ||
a = np.array(year1[0]["datapoints"] + year2[0]["datapoints"]) | ||
if (a.size < MIN_DATAPOINTS_LENGTH*2) or (df.size < MIN_DATAPOINTS_LENGTH): | ||
# not enough data to determine anomaly | ||
return response | ||
t_df = pd.DataFrame.from_records(a, columns = ("previous", "ts"), index="ts") | ||
t_df.index = pd.to_datetime(t_df.index, unit='ms' | ||
) | ||
df = df.merge(t_df, how="outer", on="ts").interpolate() | ||
# add column to separate time of day into 8 bins | ||
df["hour"] = df.index.hour | ||
# 8 three-hour bins to aggregate the data | ||
hourbins = np.array(range(0,24,3)) | ||
df["hourbin"] = np.digitize(df["hour"], bins= hourbins) | ||
# compute mean and std of prior, compute z score for recent | ||
result = df.groupby(["hourbin"], as_index=False).agg({"previous": ["mean", "std"], point_name: ["mean"]}) | ||
result.columns=['bin','prev_mean','prev_std','curr_mean'] | ||
result["z"]=(result["curr_mean"] - result["prev_mean"])/result["prev_std"] | ||
anomoly_count = result.loc[lambda x: np.abs(x["z"]) > Z_SCORE_THRESHOLD]["z"].size | ||
# return anomaly if magnitude of z score is greater than threshold | ||
if anomoly_count > (hourbins.size * ANOMALY_PORTION): | ||
raise(AnomalyError(f"z score over {Z_SCORE_THRESHOLD} for more than {ANOMALY_PORTION:.0%} over past {RECENT_DAYS} relative to past 2 years")) | ||
except RequestConnectionError as err: | ||
response[ | ||
"body" | ||
] = f"""{point_name} ConnectionError: | ||
{err.response.text} {start_time:%Y-%m-%d %H:%M} to {now:%Y-%m-%d %H:%M}""" | ||
except AnomalyError as err: | ||
response[ | ||
"body" | ||
] = f"""{point_name} Anomaly Detected: {err} {start_time:%Y-%m-%d %H:%M} to {now:%Y-%m-%d %H:%M}""" | ||
except HTTPError as err: | ||
response[ | ||
"body" | ||
] = f"""{point_name} {err.response.text} | ||
for the period {start_time:%Y-%m-%d %H:%M} to {now:%Y-%m-%d %H:%M}""" | ||
if err.response.status_code == 400: | ||
try: # have to try decoding json | ||
if err.response.json()["error"] == "No data": | ||
response[ | ||
"body" | ||
] = f"""{point_name} has no data | ||
for the period {start_time:%Y-%m-%d %H:%M} to {now:%Y-%m-%d %H:%M}""" | ||
except ValueError: | ||
pass | ||
return response |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
from long_term_variance.handler import run | ||
from requests.exceptions import HTTPError | ||
from requests.models import Response | ||
import json | ||
|
||
|
||
anom_3 = open('long_term_variance/test_cases/one_month_oldest.json') | ||
""" | ||
one_month_oldest.json is a file containing the swagger JSON response from the following query | ||
{ | ||
"range": { | ||
"from": "2019-8-19", | ||
"to": "2019-9-18" | ||
}, | ||
"targets": [ | ||
{ | ||
"target": "CarpenterHall.CW.FP/TONS" | ||
} | ||
] | ||
} | ||
""" | ||
anom_2 = open('long_term_variance/test_cases/one_month_old.json') | ||
""" | ||
one_month_old.json is a file containing the swagger JSON response from the following query | ||
{ | ||
"range": { | ||
"from": "2020-8-19", | ||
"to": "2020-9-18" | ||
}, | ||
"targets": [ | ||
{ | ||
"target": "CarpenterHall.CW.FP/TONS" | ||
} | ||
] | ||
} | ||
""" | ||
|
||
anom_1 = open('long_term_variance/test_cases/one_month_new.json') | ||
""" | ||
one_month_new.json is a file containing the swagger JSON response from the following query | ||
{ | ||
"range": { | ||
"from": "2021-8-19", | ||
"to": "2021-9-18" | ||
}, | ||
"targets": [ | ||
{ | ||
"target": "CarpenterHall.CW.FP/TONS" | ||
} | ||
] | ||
} | ||
""" | ||
noanom_3 = open('long_term_variance/test_cases/noanom_oldest.json') | ||
""" | ||
noanom_oldest.json is a file containing the swagger JSON response from the following query | ||
{ | ||
"range": { | ||
"from": "2021-07-01", | ||
"to": "2021-07-30" | ||
}, | ||
"targets": [ | ||
{ | ||
"target": "BartonHall.CW.FP/TONS" | ||
} | ||
] | ||
} | ||
""" | ||
noanom_2 = open('long_term_variance/test_cases/noanom_old.json') | ||
""" | ||
noanom_old.json is a file containing the swagger JSON response from the following query | ||
{ | ||
"range": { | ||
"from": "2022-07-01", | ||
"to": "2022-07-30" | ||
}, | ||
"targets": [ | ||
{ | ||
"target": "BartonHall.CW.FP/TONS" | ||
} | ||
] | ||
} | ||
""" | ||
noanom_1 = open('long_term_variance/test_cases/noanom_new.json') | ||
""" | ||
noanom_new.json is a file containing the swagger JSON response from the following query | ||
{ | ||
"range": { | ||
"from": "2023-07-01", | ||
"to": "2023-07-30" | ||
}, | ||
"targets": [ | ||
{ | ||
"target": "BartonHall.CW.FP/TONS" | ||
} | ||
] | ||
} | ||
""" | ||
|
||
data_noanom_1 = json.load(noanom_1) | ||
data_noanom_2 = json.load(noanom_2) | ||
data_noanom_3 = json.load(noanom_3) | ||
data_anom_1 = json.load(anom_1) | ||
data_anom_2 = json.load(anom_2) | ||
data_anom_3 = json.load(anom_3) | ||
|
||
|
||
def test_onetime(mocker): | ||
event = { | ||
"body": { | ||
"pointName": "BartonHall.CW.FP/TONS", | ||
"timeStamp": "2023-07-30", | ||
} | ||
} | ||
#data_current contains the current data from when the algorithm was executed | ||
#data1 contains the data from the previous year | ||
#data2 contains the data from two years prior | ||
data1 = [{"datapoints": []}] | ||
mocker.patch( | ||
"long_term_variance.handler.fetch_trends", | ||
side_effect = [data_noanom_1, data1, data_noanom_3] | ||
) | ||
result = run(event, None) | ||
assert "statusCode" in result | ||
# if not enough data is present to be conclusive, | ||
# we expect the algorithm to return no anomaly | ||
assert "" == result.get("body") | ||
|
||
def test_anom(mocker): | ||
event = { | ||
"body": { | ||
"pointName": "CarpenterHall.CW.FP/TONS", | ||
"timeStamp": "2021-9-18", | ||
} | ||
} | ||
#data_current contains the current data from when the algorithm was executed | ||
#data1 contains the data from the previous year | ||
#data2 contains the data from two years prior | ||
|
||
mocker.patch( | ||
"long_term_variance.handler.fetch_trends", | ||
side_effect = [data_anom_1, data_anom_2, data_anom_3] | ||
) | ||
result = run(event, None) | ||
assert "statusCode" in result | ||
assert "Anomaly Detected" in result.get("body") | ||
|
||
def test_noanom(mocker): | ||
event = { | ||
"body": { | ||
"pointName": "BartonHall.CW.FP/TONS", | ||
"timeStamp": "2023-07-30", | ||
} | ||
} | ||
#data_current contains the current data from when the algorithm was executed | ||
#data1 contains the data from the previous year | ||
#data2 contains the data from two years prior | ||
mocker.patch( | ||
"long_term_variance.handler.fetch_trends", | ||
side_effect = [data_noanom_1, data_noanom_2, data_noanom_3] | ||
) | ||
result = run(event, None) | ||
assert "statusCode" in result | ||
assert "" == result.get("body") |
Oops, something went wrong.