Skip to content

Commit

Permalink
Merge pull request #34 from Cairry/master
Browse files Browse the repository at this point in the history
1. add mute type, Effective time; 2. update prometheus rule configure.
  • Loading branch information
Cairry authored Jun 1, 2024
2 parents 7671c5c + a33816d commit 0dda300
Show file tree
Hide file tree
Showing 8 changed files with 159 additions and 35 deletions.
52 changes: 52 additions & 0 deletions alert/mute/mute.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package mute

import (
"time"
"watchAlert/internal/global"
models "watchAlert/internal/models"
"watchAlert/pkg/ctx"
)
Expand All @@ -26,5 +28,55 @@ func IsMuted(ctx *ctx.Context, alert *models.AlertCurEvent) bool {
}
}

return InTheEffectiveTime(alert)
}

// InTheEffectiveTime 判断生效时间
func InTheEffectiveTime(alert *models.AlertCurEvent) bool {
if len(alert.EffectiveTime.Week) <= 0 {
return false
}

var (
p bool
currentTime = time.Now()
)

cwd := currentWeekday(currentTime)
for _, wd := range alert.EffectiveTime.Week {
if cwd != wd {
continue
}
p = true
}

if !p {
return true
}

cts := currentTimeSeconds(currentTime)
if cts < alert.EffectiveTime.StartTime || cts > alert.EffectiveTime.EndTime {
return true
}

return false
}

func currentWeekday(ct time.Time) string {
// 获取当前时间
currentDate := ct.Format("2006-01-02")

// 解析日期字符串为时间对象
date, err := time.Parse("2006-01-02", currentDate)
if err != nil {
global.Logger.Sugar().Error(err.Error())
return ""
}

return date.Weekday().String()
}

func currentTimeSeconds(ct time.Time) int {
cs := ct.Hour()*3600 + ct.Minute()*60
return cs
}
6 changes: 4 additions & 2 deletions alert/process/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,22 +54,24 @@ func ParserDefaultEvent(rule models.AlertRule) models.AlertCurEvent {
DatasourceType: rule.DatasourceType,
RuleId: rule.RuleId,
RuleName: rule.RuleName,
Severity: rule.Severity,
Labels: rule.Labels,
EvalInterval: rule.EvalInterval,
ForDuration: rule.ForDuration,
ForDuration: rule.PrometheusConfig.ForDuration,
NoticeId: rule.NoticeId,
NoticeGroup: rule.NoticeGroup,
IsRecovered: false,
RepeatNoticeInterval: rule.RepeatNoticeInterval,
DutyUser: "暂无", // 默认暂无值班人员, 渲染模版时会实际判断 Notice 是否存在值班人员
EffectiveTime: rule.EffectiveTime,
}

return event

}

func SaveEventCache(ctx *ctx.Context, event models.AlertCurEvent) {
ctx.Lock()
defer ctx.Unlock()

firingKey := event.GetFiringAlertCacheKey()
pendingKey := event.GetPendingAlertCacheKey()
Expand Down
58 changes: 58 additions & 0 deletions alert/process/prom.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package process

import (
"watchAlert/internal/models"
"watchAlert/pkg/client"
"watchAlert/pkg/ctx"
"watchAlert/pkg/utils/cmd"
)

func CalIndicatorValue(ctx *ctx.Context, m string, Threshold float64, rule models.AlertRule, v client.Vector, datasourceId string, curFiringKeys, curPendingKeys *[]string, severity string) {
switch m {
case ">":
if v.Value > Threshold {
f(ctx, datasourceId, curFiringKeys, curPendingKeys, v, rule, severity)
}
case ">=":
if v.Value >= Threshold {
f(ctx, datasourceId, curFiringKeys, curPendingKeys, v, rule, severity)
}
case "<":
if v.Value < Threshold {
f(ctx, datasourceId, curFiringKeys, curPendingKeys, v, rule, severity)
}
case "<=":
if v.Value <= Threshold {
f(ctx, datasourceId, curFiringKeys, curPendingKeys, v, rule, severity)
}
case "=":
if v.Value == Threshold {
f(ctx, datasourceId, curFiringKeys, curPendingKeys, v, rule, severity)
}
case "!=":
if v.Value != Threshold {
f(ctx, datasourceId, curFiringKeys, curPendingKeys, v, rule, severity)
}
}
}

func f(ctx *ctx.Context, datasourceId string, curFiringKeys, curPendingKeys *[]string, v client.Vector, rule models.AlertRule, severity string) {
event := ParserDefaultEvent(rule)
event.DatasourceId = datasourceId
event.Fingerprint = v.GetFingerprint()
event.Metric = v.GetMetric()
event.Metric["severity"] = severity
event.Severity = severity
event.Annotations = cmd.ParserVariables(rule.PrometheusConfig.Annotations, event.Metric)

firingKey := event.GetFiringAlertCacheKey()
pendingKey := event.GetPendingAlertCacheKey()

*curFiringKeys = append(*curFiringKeys, firingKey)
*curPendingKeys = append(*curPendingKeys, pendingKey)

ok := ctx.DB.Rule().GetRuleIsExist(event.RuleId)
if ok {
SaveEventCache(ctx, event)
}
}
34 changes: 15 additions & 19 deletions alert/query/query.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
package query

import (
"regexp"
"strconv"
"time"
"watchAlert/alert/process"
"watchAlert/alert/queue"
"watchAlert/internal/global"
models "watchAlert/internal/models"
"watchAlert/pkg/client"
"watchAlert/pkg/ctx"
"watchAlert/pkg/utils/cmd"
)

type RuleQuery struct {
Expand Down Expand Up @@ -81,11 +82,15 @@ func (rq *RuleQuery) alertRecover(rule models.AlertRule, curKeys []string) {

// Prometheus 数据源
func (rq *RuleQuery) prometheus(datasourceId string, rule models.AlertRule) {
var curFiringKeys, curPendingKeys []string
var (
curFiringKeys = &[]string{}
curPendingKeys = &[]string{}
)

defer func() {
go process.GcPendingCache(rq.ctx, rule, curPendingKeys)
rq.alertRecover(rule, curFiringKeys)
go process.GcRecoverWaitCache(rule, curFiringKeys)
go process.GcPendingCache(rq.ctx, rule, *curPendingKeys)
rq.alertRecover(rule, *curFiringKeys)
go process.GcRecoverWaitCache(rule, *curFiringKeys)
}()

r := models.DatasourceQuery{
Expand All @@ -109,20 +114,11 @@ func (rq *RuleQuery) prometheus(datasourceId string, rule models.AlertRule) {
}

for _, v := range resQuery {
event := process.ParserDefaultEvent(rule)
event.DatasourceId = datasourceId
event.Fingerprint = v.GetFingerprint()
event.Metric = v.GetMetric()
event.Annotations = cmd.ParserVariables(rule.Annotations, event.Metric)

firingKey := event.GetFiringAlertCacheKey()
pendingKey := event.GetPendingAlertCacheKey()
curFiringKeys = append(curFiringKeys, firingKey)
curPendingKeys = append(curPendingKeys, pendingKey)

ok := rq.ctx.DB.Rule().GetRuleIsExist(event.RuleId)
if ok {
process.SaveEventCache(rq.ctx, event)
for _, ruleExpr := range rule.PrometheusConfig.Rules {
re := regexp.MustCompile(`([^\d]+)(\d+)`)
matches := re.FindStringSubmatch(ruleExpr.Expr)
t, _ := strconv.ParseFloat(matches[2], 64)
process.CalIndicatorValue(rq.ctx, matches[1], t, rule, v, datasourceId, curFiringKeys, curPendingKeys, ruleExpr.Severity)
}
}

Expand Down
1 change: 1 addition & 0 deletions deploy/sql/tenants.sql
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
INSERT INTO `tenants` (`id`, `name`, `create_at`, `create_by`, `manager`, `description`, `user_number`, `rule_number`, `duty_number`, `notice_number`, `remove_protection`) VALUES ('tid-co4iic3adq7a2jjeas90', 'default', 1711876400, 'system', 'admin', 'default 租户', 999, 999, 999, 999, 1);

1 change: 1 addition & 0 deletions internal/models/alert_current_event.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ type AlertCurEvent struct {
RecoverTime int64 `json:"recover_time" gorm:"-"` // 恢复时间
RecoverTimeFormat string `json:"recover_time_format" gorm:"-"`
DutyUser string `json:"duty_user" gorm:"-"`
EffectiveTime EffectiveTime `json:"effectiveTime" gorm:"effectiveTime;serializer:json"`
}

type AlertCurEventQuery struct {
Expand Down
40 changes: 26 additions & 14 deletions internal/models/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,17 @@ type NoticeGroup []map[string]string

type AlertRule struct {
//gorm.Model
TenantId string `json:"tenantId"`
RuleId string `json:"ruleId" gorm:"ruleId"`
RuleGroupId string `json:"ruleGroupId"`
DatasourceType string `json:"datasourceType"`
DatasourceIdList []string `json:"datasourceId" gorm:"datasourceId;serializer:json"`
RuleName string `json:"ruleName"`
EvalInterval int64 `json:"evalInterval"`
ForDuration int64 `json:"forDuration"`
RepeatNoticeInterval int64 `json:"repeatNoticeInterval"`
Description string `json:"description"`
Annotations string `json:"annotations"`
Labels LabelsMap `json:"labels" gorm:"labels;serializer:json"`
Severity string `json:"severity"`
TenantId string `json:"tenantId"`
RuleId string `json:"ruleId" gorm:"ruleId"`
RuleGroupId string `json:"ruleGroupId"`
DatasourceType string `json:"datasourceType"`
DatasourceIdList []string `json:"datasourceId" gorm:"datasourceId;serializer:json"`
RuleName string `json:"ruleName"`
EvalInterval int64 `json:"evalInterval"`
RepeatNoticeInterval int64 `json:"repeatNoticeInterval"`
Description string `json:"description"`
Labels LabelsMap `json:"labels" gorm:"labels;serializer:json"`
EffectiveTime EffectiveTime `json:"effectiveTime" gorm:"effectiveTime;serializer:json"`

// Prometheus
PrometheusConfig PrometheusConfig `json:"prometheusConfig" gorm:"prometheusConfig;serializer:json"`
Expand All @@ -51,7 +49,21 @@ type JaegerConfig struct {
}

type PrometheusConfig struct {
PromQL string `json:"promQL"`
PromQL string `json:"promQL"`
Annotations string `json:"annotations"`
ForDuration int64 `json:"forDuration"`
Rules []Rules `json:"rules"`
}

type Rules struct {
Severity string `json:"severity"`
Expr string `json:"expr"`
}

type EffectiveTime struct {
Week []string `json:"week"`
StartTime int `json:"startTime"`
EndTime int `json:"endTime"`
}

type AliCloudSLSConfig struct {
Expand Down
2 changes: 2 additions & 0 deletions pkg/ctx/ctx.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ctx

import (
"context"
"sync"
"watchAlert/internal/cache"
"watchAlert/internal/repo"
)
Expand All @@ -10,6 +11,7 @@ type Context struct {
DB repo.InterEntryRepo
Redis cache.InterEntryCache
Ctx context.Context
sync.RWMutex
}

var (
Expand Down

0 comments on commit 0dda300

Please sign in to comment.