Skip to content

Commit

Permalink
Issue 36 long term reading variance algorithm (#46)
Browse files Browse the repository at this point in the history
* Fix: fixed separated test cases

* Feat: added 1/2 responses testcase

* Feat: test cases and tests added

* feat: built longterm variance detection

* Fix: removed pasted tests

* Fix: added no anomaly testcase

* Fix: edited noanom testcase

* Fix: testcases

* Fix: removed query source from testcase

* feat: added make target and fn resource

* Fix: added third response to test cases

* Fix: changed data size check to account for both previous years

* Fix: removed underscore from testcases folder

* Fix: testcase file reference

* passing: fixed test cases

* fix: removed extraneous print statement

* fix: correct StuckTwoDay target

* added python_lib to pipfile

* fix: low_delta_t anomaly message

* missing setup.py

* .gitignore egg-info

* added longTermVariance algorithm to cron handler

---------

Co-authored-by: logo0303 <[email protected]>
  • Loading branch information
stevemandl and logo0303 authored Oct 23, 2024
1 parent 0ae6354 commit a9dc7d6
Show file tree
Hide file tree
Showing 19 changed files with 18,097 additions and 634 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ coverage
.build
*.pyc
.pytest_cache
.aws-sam
.aws-sam
*.egg-info/
4 changes: 2 additions & 2 deletions py_src/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SHELL := /bin/bash
build-SparseData build-LowDeltaT StuckTwoDay:
build-SparseData build-LowDeltaT build-StuckTwoDay build-LongTermVariance:
cp -r ./* $(ARTIFACTS_DIR)
pip install --upgrade pip
pip install pipenv
pipenv run pip install -r <(pipenv requirements) -t $(ARTIFACTS_DIR)
pipenv run pip install -r <(pipenv requirements) -t $(ARTIFACTS_DIR)
1 change: 1 addition & 0 deletions py_src/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ numpy = "*"
pandas = "*"
matplotlib = "*"
tk = "*"
pythonlib = {file = "python_lib"}

[dev-packages]
pytest = {version=">=6.2"}
Expand Down
1,356 changes: 730 additions & 626 deletions py_src/Pipfile.lock

Large diffs are not rendered by default.

92 changes: 92 additions & 0 deletions py_src/long_term_variance/handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""
long_term_variance/handler.py
"""

from datetime import timedelta
import numpy as np
import pandas as pd
from requests.exceptions import ConnectionError as RequestConnectionError, HTTPError
from python_lib.utils import parse_event, fetch_trends, build_index, build_df, AnomalyError

# the window size we are analyzing. The assumption is the consumption is
# normally similar across this period and for the same period one year prior
RECENT_DAYS = 7
# the minimum acceptable length of the datapoints array
MIN_DATAPOINTS_LENGTH = int(RECENT_DAYS * 24 * 0.6)
# if the magnitude of the z score is greater than this, it's anomolous
Z_SCORE_THRESHOLD = 3.0
ANOMALY_PORTION = 0.5

def run(event, _context):
"""
handler.run - the lambda function entry point
"""


# start out with blank response
response = {"statusCode": 200, "body": ""}
# parse event and ensure timeStamp and pointName are present
params = parse_event(event)
print(f"long_term_variance run with params {params}")

now = params.get("timeStamp")
year_ago = now - timedelta(365)
two_years_ago = now - timedelta(365*2)
start_time = now - timedelta(RECENT_DAYS)
one_year_st = year_ago - timedelta(RECENT_DAYS)
two_year_st = two_years_ago - timedelta(RECENT_DAYS)
point_name = params.get("pointName")
try:
# read recent data, prior year, and prior 2 year
# fetch recent
df = build_df(fetch_trends(
point=point_name, start_time=start_time, end_time=now ))
year1 = fetch_trends(
point=point_name, start_time=one_year_st, end_time=year_ago )
year2 = fetch_trends(
point=point_name, start_time=two_year_st, end_time=two_years_ago )
a = np.array(year1[0]["datapoints"] + year2[0]["datapoints"])
if (a.size < MIN_DATAPOINTS_LENGTH*2) or (df.size < MIN_DATAPOINTS_LENGTH):
# not enough data to determine anomaly
return response
t_df = pd.DataFrame.from_records(a, columns = ("previous", "ts"), index="ts")
t_df.index = pd.to_datetime(t_df.index, unit='ms'
)
df = df.merge(t_df, how="outer", on="ts").interpolate()
# add column to separate time of day into 8 bins
df["hour"] = df.index.hour
# 8 three-hour bins to aggregate the data
hourbins = np.array(range(0,24,3))
df["hourbin"] = np.digitize(df["hour"], bins= hourbins)
# compute mean and std of prior, compute z score for recent
result = df.groupby(["hourbin"], as_index=False).agg({"previous": ["mean", "std"], point_name: ["mean"]})
result.columns=['bin','prev_mean','prev_std','curr_mean']
result["z"]=(result["curr_mean"] - result["prev_mean"])/result["prev_std"]
anomoly_count = result.loc[lambda x: np.abs(x["z"]) > Z_SCORE_THRESHOLD]["z"].size
# return anomaly if magnitude of z score is greater than threshold
if anomoly_count > (hourbins.size * ANOMALY_PORTION):
raise(AnomalyError(f"z score over {Z_SCORE_THRESHOLD} for more than {ANOMALY_PORTION:.0%} over past {RECENT_DAYS} relative to past 2 years"))
except RequestConnectionError as err:
response[
"body"
] = f"""{point_name} ConnectionError:
{err.response.text} {start_time:%Y-%m-%d %H:%M} to {now:%Y-%m-%d %H:%M}"""
except AnomalyError as err:
response[
"body"
] = f"""{point_name} Anomaly Detected: {err} {start_time:%Y-%m-%d %H:%M} to {now:%Y-%m-%d %H:%M}"""
except HTTPError as err:
response[
"body"
] = f"""{point_name} {err.response.text}
for the period {start_time:%Y-%m-%d %H:%M} to {now:%Y-%m-%d %H:%M}"""
if err.response.status_code == 400:
try: # have to try decoding json
if err.response.json()["error"] == "No data":
response[
"body"
] = f"""{point_name} has no data
for the period {start_time:%Y-%m-%d %H:%M} to {now:%Y-%m-%d %H:%M}"""
except ValueError:
pass
return response
163 changes: 163 additions & 0 deletions py_src/long_term_variance/longterm_spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from long_term_variance.handler import run
from requests.exceptions import HTTPError
from requests.models import Response
import json


anom_3 = open('long_term_variance/test_cases/one_month_oldest.json')
"""
one_month_oldest.json is a file containing the swagger JSON response from the following query
{
"range": {
"from": "2019-8-19",
"to": "2019-9-18"
},
"targets": [
{
"target": "CarpenterHall.CW.FP/TONS"
}
]
}
"""
anom_2 = open('long_term_variance/test_cases/one_month_old.json')
"""
one_month_old.json is a file containing the swagger JSON response from the following query
{
"range": {
"from": "2020-8-19",
"to": "2020-9-18"
},
"targets": [
{
"target": "CarpenterHall.CW.FP/TONS"
}
]
}
"""

anom_1 = open('long_term_variance/test_cases/one_month_new.json')
"""
one_month_new.json is a file containing the swagger JSON response from the following query
{
"range": {
"from": "2021-8-19",
"to": "2021-9-18"
},
"targets": [
{
"target": "CarpenterHall.CW.FP/TONS"
}
]
}
"""
noanom_3 = open('long_term_variance/test_cases/noanom_oldest.json')
"""
noanom_oldest.json is a file containing the swagger JSON response from the following query
{
"range": {
"from": "2021-07-01",
"to": "2021-07-30"
},
"targets": [
{
"target": "BartonHall.CW.FP/TONS"
}
]
}
"""
noanom_2 = open('long_term_variance/test_cases/noanom_old.json')
"""
noanom_old.json is a file containing the swagger JSON response from the following query
{
"range": {
"from": "2022-07-01",
"to": "2022-07-30"
},
"targets": [
{
"target": "BartonHall.CW.FP/TONS"
}
]
}
"""
noanom_1 = open('long_term_variance/test_cases/noanom_new.json')
"""
noanom_new.json is a file containing the swagger JSON response from the following query
{
"range": {
"from": "2023-07-01",
"to": "2023-07-30"
},
"targets": [
{
"target": "BartonHall.CW.FP/TONS"
}
]
}
"""

data_noanom_1 = json.load(noanom_1)
data_noanom_2 = json.load(noanom_2)
data_noanom_3 = json.load(noanom_3)
data_anom_1 = json.load(anom_1)
data_anom_2 = json.load(anom_2)
data_anom_3 = json.load(anom_3)


def test_onetime(mocker):
event = {
"body": {
"pointName": "BartonHall.CW.FP/TONS",
"timeStamp": "2023-07-30",
}
}
#data_current contains the current data from when the algorithm was executed
#data1 contains the data from the previous year
#data2 contains the data from two years prior
data1 = [{"datapoints": []}]
mocker.patch(
"long_term_variance.handler.fetch_trends",
side_effect = [data_noanom_1, data1, data_noanom_3]
)
result = run(event, None)
assert "statusCode" in result
# if not enough data is present to be conclusive,
# we expect the algorithm to return no anomaly
assert "" == result.get("body")

def test_anom(mocker):
event = {
"body": {
"pointName": "CarpenterHall.CW.FP/TONS",
"timeStamp": "2021-9-18",
}
}
#data_current contains the current data from when the algorithm was executed
#data1 contains the data from the previous year
#data2 contains the data from two years prior

mocker.patch(
"long_term_variance.handler.fetch_trends",
side_effect = [data_anom_1, data_anom_2, data_anom_3]
)
result = run(event, None)
assert "statusCode" in result
assert "Anomaly Detected" in result.get("body")

def test_noanom(mocker):
event = {
"body": {
"pointName": "BartonHall.CW.FP/TONS",
"timeStamp": "2023-07-30",
}
}
#data_current contains the current data from when the algorithm was executed
#data1 contains the data from the previous year
#data2 contains the data from two years prior
mocker.patch(
"long_term_variance.handler.fetch_trends",
side_effect = [data_noanom_1, data_noanom_2, data_noanom_3]
)
result = run(event, None)
assert "statusCode" in result
assert "" == result.get("body")
Loading

0 comments on commit a9dc7d6

Please sign in to comment.