From 05c317f63cca79be6239f37e5582450dd116ee40 Mon Sep 17 00:00:00 2001 From: Cairry Date: Sat, 1 Jun 2024 23:51:35 +0800 Subject: [PATCH] add mute type, Effective time. --- alert/mute/mute.go | 52 +++++++++++++++++++++++ alert/process/process.go | 6 ++- alert/process/prom.go | 58 ++++++++++++++++++++++++++ alert/query/query.go | 34 +++++++-------- config/config.yaml | 2 +- internal/models/alert_current_event.go | 1 + internal/models/rule.go | 40 +++++++++++------- pkg/ctx/ctx.go | 2 + 8 files changed, 159 insertions(+), 36 deletions(-) create mode 100644 alert/process/prom.go diff --git a/alert/mute/mute.go b/alert/mute/mute.go index 5406e89..c95e0cc 100644 --- a/alert/mute/mute.go +++ b/alert/mute/mute.go @@ -1,6 +1,8 @@ package mute import ( + "time" + "watchAlert/internal/global" models "watchAlert/internal/models" "watchAlert/pkg/ctx" ) @@ -26,5 +28,55 @@ func IsMuted(ctx *ctx.Context, alert *models.AlertCurEvent) bool { } } + return InTheEffectiveTime(alert) +} + +// InTheEffectiveTime 判断生效时间 +func InTheEffectiveTime(alert *models.AlertCurEvent) bool { + if len(alert.EffectiveTime.Week) <= 0 { + return false + } + + var ( + p bool + currentTime = time.Now() + ) + + cwd := currentWeekday(currentTime) + for _, wd := range alert.EffectiveTime.Week { + if cwd != wd { + continue + } + p = true + } + + if !p { + return true + } + + cts := currentTimeSeconds(currentTime) + if cts < alert.EffectiveTime.StartTime || cts > alert.EffectiveTime.EndTime { + return true + } + return false } + +func currentWeekday(ct time.Time) string { + // 获取当前时间 + currentDate := ct.Format("2006-01-02") + + // 解析日期字符串为时间对象 + date, err := time.Parse("2006-01-02", currentDate) + if err != nil { + global.Logger.Sugar().Error(err.Error()) + return "" + } + + return date.Weekday().String() +} + +func currentTimeSeconds(ct time.Time) int { + cs := ct.Hour()*3600 + ct.Minute()*60 + return cs +} diff --git a/alert/process/process.go b/alert/process/process.go index 7e11db2..c7f6d04 100644 --- a/alert/process/process.go +++ b/alert/process/process.go @@ -54,15 +54,15 @@ func ParserDefaultEvent(rule models.AlertRule) models.AlertCurEvent { DatasourceType: rule.DatasourceType, RuleId: rule.RuleId, RuleName: rule.RuleName, - Severity: rule.Severity, Labels: rule.Labels, EvalInterval: rule.EvalInterval, - ForDuration: rule.ForDuration, + ForDuration: rule.PrometheusConfig.ForDuration, NoticeId: rule.NoticeId, NoticeGroup: rule.NoticeGroup, IsRecovered: false, RepeatNoticeInterval: rule.RepeatNoticeInterval, DutyUser: "暂无", // 默认暂无值班人员, 渲染模版时会实际判断 Notice 是否存在值班人员 + EffectiveTime: rule.EffectiveTime, } return event @@ -70,6 +70,8 @@ func ParserDefaultEvent(rule models.AlertRule) models.AlertCurEvent { } func SaveEventCache(ctx *ctx.Context, event models.AlertCurEvent) { + ctx.Lock() + defer ctx.Unlock() firingKey := event.GetFiringAlertCacheKey() pendingKey := event.GetPendingAlertCacheKey() diff --git a/alert/process/prom.go b/alert/process/prom.go new file mode 100644 index 0000000..0c0766b --- /dev/null +++ b/alert/process/prom.go @@ -0,0 +1,58 @@ +package process + +import ( + "watchAlert/internal/models" + "watchAlert/pkg/client" + "watchAlert/pkg/ctx" + "watchAlert/pkg/utils/cmd" +) + +func CalIndicatorValue(ctx *ctx.Context, m string, Threshold float64, rule models.AlertRule, v client.Vector, datasourceId string, curFiringKeys, curPendingKeys *[]string, severity string) { + switch m { + case ">": + if v.Value > Threshold { + f(ctx, datasourceId, curFiringKeys, curPendingKeys, v, rule, severity) + } + case ">=": + if v.Value >= Threshold { + f(ctx, datasourceId, curFiringKeys, curPendingKeys, v, rule, severity) + } + case "<": + if v.Value < Threshold { + f(ctx, datasourceId, curFiringKeys, curPendingKeys, v, rule, severity) + } + case "<=": + if v.Value <= Threshold { + f(ctx, datasourceId, curFiringKeys, curPendingKeys, v, rule, severity) + } + case "=": + if v.Value == Threshold { + f(ctx, datasourceId, curFiringKeys, curPendingKeys, v, rule, severity) + } + case "!=": + if v.Value != Threshold { + f(ctx, datasourceId, curFiringKeys, curPendingKeys, v, rule, severity) + } + } +} + +func f(ctx *ctx.Context, datasourceId string, curFiringKeys, curPendingKeys *[]string, v client.Vector, rule models.AlertRule, severity string) { + event := ParserDefaultEvent(rule) + event.DatasourceId = datasourceId + event.Fingerprint = v.GetFingerprint() + event.Metric = v.GetMetric() + event.Metric["severity"] = severity + event.Severity = severity + event.Annotations = cmd.ParserVariables(rule.PrometheusConfig.Annotations, event.Metric) + + firingKey := event.GetFiringAlertCacheKey() + pendingKey := event.GetPendingAlertCacheKey() + + *curFiringKeys = append(*curFiringKeys, firingKey) + *curPendingKeys = append(*curPendingKeys, pendingKey) + + ok := ctx.DB.Rule().GetRuleIsExist(event.RuleId) + if ok { + SaveEventCache(ctx, event) + } +} diff --git a/alert/query/query.go b/alert/query/query.go index 995f8b0..e778907 100644 --- a/alert/query/query.go +++ b/alert/query/query.go @@ -1,6 +1,8 @@ package query import ( + "regexp" + "strconv" "time" "watchAlert/alert/process" "watchAlert/alert/queue" @@ -8,7 +10,6 @@ import ( models "watchAlert/internal/models" "watchAlert/pkg/client" "watchAlert/pkg/ctx" - "watchAlert/pkg/utils/cmd" ) type RuleQuery struct { @@ -81,11 +82,15 @@ func (rq *RuleQuery) alertRecover(rule models.AlertRule, curKeys []string) { // Prometheus 数据源 func (rq *RuleQuery) prometheus(datasourceId string, rule models.AlertRule) { - var curFiringKeys, curPendingKeys []string + var ( + curFiringKeys = &[]string{} + curPendingKeys = &[]string{} + ) + defer func() { - go process.GcPendingCache(rq.ctx, rule, curPendingKeys) - rq.alertRecover(rule, curFiringKeys) - go process.GcRecoverWaitCache(rule, curFiringKeys) + go process.GcPendingCache(rq.ctx, rule, *curPendingKeys) + rq.alertRecover(rule, *curFiringKeys) + go process.GcRecoverWaitCache(rule, *curFiringKeys) }() r := models.DatasourceQuery{ @@ -109,20 +114,11 @@ func (rq *RuleQuery) prometheus(datasourceId string, rule models.AlertRule) { } for _, v := range resQuery { - event := process.ParserDefaultEvent(rule) - event.DatasourceId = datasourceId - event.Fingerprint = v.GetFingerprint() - event.Metric = v.GetMetric() - event.Annotations = cmd.ParserVariables(rule.Annotations, event.Metric) - - firingKey := event.GetFiringAlertCacheKey() - pendingKey := event.GetPendingAlertCacheKey() - curFiringKeys = append(curFiringKeys, firingKey) - curPendingKeys = append(curPendingKeys, pendingKey) - - ok := rq.ctx.DB.Rule().GetRuleIsExist(event.RuleId) - if ok { - process.SaveEventCache(rq.ctx, event) + for _, ruleExpr := range rule.PrometheusConfig.Rules { + re := regexp.MustCompile(`([^\d]+)(\d+)`) + matches := re.FindStringSubmatch(ruleExpr.Expr) + t, _ := strconv.ParseFloat(matches[2], 64) + process.CalIndicatorValue(rq.ctx, matches[1], t, rule, v, datasourceId, curFiringKeys, curPendingKeys, ruleExpr.Severity) } } diff --git a/config/config.yaml b/config/config.yaml index d619518..4d2d77f 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -14,7 +14,7 @@ MySQL: host: 127.0.0.1 port: 3306 user: root - pass: semaik1023 + pass: w8t.123 dbName: watchalert timeout: 10s diff --git a/internal/models/alert_current_event.go b/internal/models/alert_current_event.go index b78a075..b29df74 100644 --- a/internal/models/alert_current_event.go +++ b/internal/models/alert_current_event.go @@ -29,6 +29,7 @@ type AlertCurEvent struct { RecoverTime int64 `json:"recover_time" gorm:"-"` // 恢复时间 RecoverTimeFormat string `json:"recover_time_format" gorm:"-"` DutyUser string `json:"duty_user" gorm:"-"` + EffectiveTime EffectiveTime `json:"effectiveTime" gorm:"effectiveTime;serializer:json"` } type AlertCurEventQuery struct { diff --git a/internal/models/rule.go b/internal/models/rule.go index b42c8af..46da402 100644 --- a/internal/models/rule.go +++ b/internal/models/rule.go @@ -13,19 +13,17 @@ type NoticeGroup []map[string]string type AlertRule struct { //gorm.Model - TenantId string `json:"tenantId"` - RuleId string `json:"ruleId" gorm:"ruleId"` - RuleGroupId string `json:"ruleGroupId"` - DatasourceType string `json:"datasourceType"` - DatasourceIdList []string `json:"datasourceId" gorm:"datasourceId;serializer:json"` - RuleName string `json:"ruleName"` - EvalInterval int64 `json:"evalInterval"` - ForDuration int64 `json:"forDuration"` - RepeatNoticeInterval int64 `json:"repeatNoticeInterval"` - Description string `json:"description"` - Annotations string `json:"annotations"` - Labels LabelsMap `json:"labels" gorm:"labels;serializer:json"` - Severity string `json:"severity"` + TenantId string `json:"tenantId"` + RuleId string `json:"ruleId" gorm:"ruleId"` + RuleGroupId string `json:"ruleGroupId"` + DatasourceType string `json:"datasourceType"` + DatasourceIdList []string `json:"datasourceId" gorm:"datasourceId;serializer:json"` + RuleName string `json:"ruleName"` + EvalInterval int64 `json:"evalInterval"` + RepeatNoticeInterval int64 `json:"repeatNoticeInterval"` + Description string `json:"description"` + Labels LabelsMap `json:"labels" gorm:"labels;serializer:json"` + EffectiveTime EffectiveTime `json:"effectiveTime" gorm:"effectiveTime;serializer:json"` // Prometheus PrometheusConfig PrometheusConfig `json:"prometheusConfig" gorm:"prometheusConfig;serializer:json"` @@ -51,7 +49,21 @@ type JaegerConfig struct { } type PrometheusConfig struct { - PromQL string `json:"promQL"` + PromQL string `json:"promQL"` + Annotations string `json:"annotations"` + ForDuration int64 `json:"forDuration"` + Rules []Rules `json:"rules"` +} + +type Rules struct { + Severity string `json:"severity"` + Expr string `json:"expr"` +} + +type EffectiveTime struct { + Week []string `json:"week"` + StartTime int `json:"startTime"` + EndTime int `json:"endTime"` } type AliCloudSLSConfig struct { diff --git a/pkg/ctx/ctx.go b/pkg/ctx/ctx.go index 55b9aff..93c337f 100644 --- a/pkg/ctx/ctx.go +++ b/pkg/ctx/ctx.go @@ -2,6 +2,7 @@ package ctx import ( "context" + "sync" "watchAlert/internal/cache" "watchAlert/internal/repo" ) @@ -10,6 +11,7 @@ type Context struct { DB repo.InterEntryRepo Redis cache.InterEntryCache Ctx context.Context + sync.RWMutex } var (