Skip to content

Commit

Permalink
Merge pull request #77 from andrewm4894/add-alert_snooze_n-param
Browse files Browse the repository at this point in the history
Add ability to snooze alerts
  • Loading branch information
andrewm4894 authored Dec 20, 2023
2 parents aba8636 + 0a25297 commit 5184b3b
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 4 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ Here is a list of features of Anomstack (emoji alert warning!)
14. 🔔 - Scores & Alerts saved to database so you can query them and do whatever you want with them.
15. 🏷️ - Add custom metric tags for more complex alert routing e.g. priority or subject area based.
16. 🔄 - Change detection jobs out of the box.
17. 😴 - Ability to snooze alerts for a period of time to reduce repeated and duplicate alerts.

### Architecture

Expand Down
1 change: 1 addition & 0 deletions metrics/defaults/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ alert_metric_timestamp_max_days_ago: 45 # don't alert on metrics older than this
change_metric_timestamp_max_days_ago: 45 # don't all metrics older than this into change detection.
alert_recent_n: 1 # only alert on recent n so as to avoid continually alerting.
alert_smooth_n: 3 # smooth anomaly score over rolling n to avoid being too trigger happy.
alert_snooze_n: 3 # snooze alerts for n periods after an alert.
change_smooth_n: 1 # smooth metric values as part of change detection.
alert_threshold: 0.8 # threshold for smoothed anomaly score above which to alert on.
change_threshold: 3.5 # threshold for PyOD MAD based change detection above which to alert on.
Expand Down
45 changes: 41 additions & 4 deletions metrics/defaults/sql/alerts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,25 @@ where
group by 1,2,3
),

metric_alert_data as
(
select distinct
metric_timestamp,
metric_batch,
metric_name,
max(metric_value) as metric_alert
from
{{ table_key }}
where
metric_batch = '{{ metric_batch }}'
and
metric_type = 'alert'
and
-- limit to the last {{ alert_metric_timestamp_max_days_ago }} days
cast(metric_timestamp as datetime) >= CURRENT_DATE - INTERVAL '{{ alert_metric_timestamp_max_days_ago }}' DAY
group by 1,2,3
),

metric_score_recency_ranked as
(
select distinct
Expand Down Expand Up @@ -75,7 +94,8 @@ select
m.metric_value,
s.metric_score,
m.metric_value_recency_rank,
s.metric_score_recency_rank
s.metric_score_recency_rank,
a.metric_alert
from
metric_value_recency_ranked m
left outer join
Expand All @@ -86,6 +106,14 @@ on
m.metric_batch = s.metric_batch
and
m.metric_timestamp = s.metric_timestamp
left outer join
metric_alert_data a
on
m.metric_name = a.metric_name
and
m.metric_batch = a.metric_batch
and
m.metric_timestamp = a.metric_timestamp
),

data_smoothed as
Expand All @@ -99,7 +127,9 @@ select
metric_value_recency_rank,
metric_score_recency_rank,
-- smooth the metric score over the last {{ alert_smooth_n }} values
avg(metric_score) over (partition by metric_name order by metric_score_recency_rank rows between {{ alert_smooth_n }} preceding and current row) as metric_score_smooth
avg(metric_score) over (partition by metric_name order by metric_score_recency_rank rows between {{ alert_smooth_n }} preceding and current row) as metric_score_smooth,
-- add a window function to check for previous alerts within the last {{ alert_snooze_n }} values
max(metric_alert) over (partition by metric_name order by metric_score_recency_rank rows between {{ alert_snooze_n }} preceding and current row) as metric_has_recent_alert
from
data_ranked
),
Expand All @@ -113,6 +143,7 @@ select
metric_value,
metric_score,
metric_score_smooth,
metric_has_recent_alert,
-- only alert on the most recent {{ alert_max_n }} values
case when metric_score_recency_rank <= {{ alert_recent_n }} and (metric_score_smooth >= {{ alert_threshold }} or {{ alert_always }}=True ) then 1 else 0 end as metric_alert
from
Expand All @@ -127,11 +158,17 @@ metrics_triggered as
select
metric_batch,
metric_name,
max(metric_has_recent_alert) as metric_has_recent_alert_tmp,
max(metric_alert) as metric_alert_tmp
from
data_alerts
group by 1,2
having max(metric_alert) = 1
having
-- only return metrics that have been triggered
max(metric_alert) = 1
and
-- respect the snooze period
max(metric_has_recent_alert) = 0
)

select
Expand All @@ -144,7 +181,7 @@ select
metric_alert
from
data_alerts
-- only return metrics that have been triggered
-- only return metrics that have been triggered or not snoozed
join
metrics_triggered
on
Expand Down

0 comments on commit 5184b3b

Please sign in to comment.