diff --git a/metrics/otel_metrics.go b/metrics/otel_metrics.go index d97728de5f..898d3f7e4b 100644 --- a/metrics/otel_metrics.go +++ b/metrics/otel_metrics.go @@ -190,69 +190,31 @@ func (o *OTelMetrics) Register(metadata Metadata) { o.lock.Lock() defer o.lock.Unlock() - ctx := context.Background() - - unit := string(metadata.Unit) switch metadata.Type { case Counter: - ctr, err := o.meter.Int64Counter(metadata.Name, - metric.WithUnit(unit), - metric.WithDescription(metadata.Description), - ) + _, err := o.initCounter(metadata) if err != nil { - o.Logger.Error().WithString("name", metadata.Name).Logf("failed to create counter") + o.Logger.Error().WithString("name", metadata.Name).Logf("failed to create counter. %s", err.Error()) return } - - // Give the counter an initial value of 0 so that OTel will send it - ctr.Add(ctx, 0) - o.counters[metadata.Name] = ctr case Gauge: - var f metric.Float64Callback = func(_ context.Context, result metric.Float64Observer) error { - // this callback is invoked from outside this function call, so we - // need to Rlock when we read the values map. We don't know how long - // Observe() takes, so we make a copy of the value and unlock before - // calling Observe. - o.lock.RLock() - v := o.values[metadata.Name] - o.lock.RUnlock() - - result.Observe(v) - return nil - } - g, err := o.meter.Float64ObservableGauge(metadata.Name, - metric.WithUnit(unit), - metric.WithDescription(metadata.Description), - metric.WithFloat64Callback(f), - ) + _, err := o.initGauge(metadata) if err != nil { - o.Logger.Error().WithString("name", metadata.Name).Logf("failed to create gauge") + o.Logger.Error().WithString("name", metadata.Name).Logf("failed to create gauge. %s", err.Error()) return } - - o.gauges[metadata.Name] = g case Histogram: - h, err := o.meter.Float64Histogram(metadata.Name, - metric.WithUnit(unit), - metric.WithDescription(metadata.Description), - ) + _, err := o.initHistogram(metadata) if err != nil { - o.Logger.Error().WithString("name", metadata.Name).Logf("failed to create histogram") + o.Logger.Error().WithString("name", metadata.Name).Logf("failed to create histogram. %s", err.Error()) return } - h.Record(ctx, 0) - o.histograms[metadata.Name] = h case UpDown: - ud, err := o.meter.Int64UpDownCounter(metadata.Name, - metric.WithUnit(unit), - metric.WithDescription(metadata.Description), - ) + _, err := o.initUpDown(metadata) if err != nil { - o.Logger.Error().WithString("name", metadata.Name).Logf("failed to create updown counter") + o.Logger.Error().WithString("name", metadata.Name).Logf("failed to create updown counter. %s", err.Error()) return } - ud.Add(ctx, 0) - o.updowns[metadata.Name] = ud default: o.Logger.Error().WithString("type", metadata.Type.String()).Logf("unknown metric type") return @@ -263,16 +225,34 @@ func (o *OTelMetrics) Increment(name string) { o.lock.Lock() defer o.lock.Unlock() - if ctr, ok := o.counters[name]; ok { - ctr.Add(context.Background(), 1) - o.values[name]++ + var err error + ctr, ok := o.counters[name] + if !ok { + ctr, err = o.initCounter(Metadata{ + Name: name, + }) + + if err != nil { + return + } } + ctr.Add(context.Background(), 1) + o.values[name]++ } func (o *OTelMetrics) Gauge(name string, val interface{}) { o.lock.Lock() defer o.lock.Unlock() + if _, ok := o.gauges[name]; !ok { + _, err := o.initGauge(Metadata{ + Name: name, + }) + + if err != nil { + return + } + } o.values[name] = ConvertNumeric(val) } @@ -280,42 +260,66 @@ func (o *OTelMetrics) Count(name string, val interface{}) { o.lock.Lock() defer o.lock.Unlock() - if ctr, ok := o.counters[name]; ok { - f := ConvertNumeric(val) - ctr.Add(context.Background(), int64(f)) - o.values[name] += f + var err error + ctr, ok := o.counters[name] + if !ok { + ctr, err = o.initCounter(Metadata{Name: name}) + if err != nil { + return + } } + f := ConvertNumeric(val) + ctr.Add(context.Background(), int64(f)) + o.values[name] += f } func (o *OTelMetrics) Histogram(name string, val interface{}) { o.lock.Lock() defer o.lock.Unlock() - if h, ok := o.histograms[name]; ok { - f := ConvertNumeric(val) - h.Record(context.Background(), f) - o.values[name] += f + var err error + h, ok := o.histograms[name] + if !ok { + h, err = o.initHistogram(Metadata{Name: name}) + if err != nil { + return + } } + f := ConvertNumeric(val) + h.Record(context.Background(), f) + o.values[name] += f } func (o *OTelMetrics) Up(name string) { o.lock.Lock() defer o.lock.Unlock() - if ud, ok := o.updowns[name]; ok { - ud.Add(context.Background(), 1) - o.values[name]++ + var err error + ud, ok := o.updowns[name] + if !ok { + ud, err = o.initUpDown(Metadata{Name: name}) + if err != nil { + return + } } + ud.Add(context.Background(), 1) + o.values[name]++ } func (o *OTelMetrics) Down(name string) { o.lock.Lock() defer o.lock.Unlock() - if ud, ok := o.updowns[name]; ok { - ud.Add(context.Background(), -1) - o.values[name]-- + var err error + ud, ok := o.updowns[name] + if !ok { + ud, err = o.initUpDown(Metadata{Name: name}) + if err != nil { + return + } } + ud.Add(context.Background(), -1) + o.values[name]-- } func (o *OTelMetrics) Store(name string, value float64) { @@ -332,3 +336,87 @@ func (o *OTelMetrics) Get(name string) (float64, bool) { val, ok := o.values[name] return val, ok } + +// initCounter initializes a new counter metric with the given metadata +// It should be used while holding the metrics lock. +func (o *OTelMetrics) initCounter(metadata Metadata) (metric.Int64Counter, error) { + unit := string(metadata.Unit) + ctr, err := o.meter.Int64Counter(metadata.Name, + metric.WithUnit(unit), + metric.WithDescription(metadata.Description), + ) + if err != nil { + return nil, err + } + + // Give the counter an initial value of 0 so that OTel will send it + ctr.Add(context.Background(), 0) + o.counters[metadata.Name] = ctr + + return ctr, nil +} + +// initGauge initializes a new gauge metric with the given metadata +// It should be used while holding the metrics lock. +func (o *OTelMetrics) initGauge(metadata Metadata) (metric.Float64ObservableGauge, error) { + unit := string(metadata.Unit) + var f metric.Float64Callback = func(_ context.Context, result metric.Float64Observer) error { + // this callback is invoked from outside this function call, so we + // need to Rlock when we read the values map. We don't know how long + // Observe() takes, so we make a copy of the value and unlock before + // calling Observe. + o.lock.RLock() + v := o.values[metadata.Name] + o.lock.RUnlock() + + result.Observe(v) + return nil + } + g, err := o.meter.Float64ObservableGauge(metadata.Name, + metric.WithUnit(unit), + metric.WithDescription(metadata.Description), + metric.WithFloat64Callback(f), + ) + if err != nil { + return nil, err + } + + o.gauges[metadata.Name] = g + return g, nil +} + +// initHistogram initializes a new histogram metric with the given metadata +// It should be used while holding the metrics lock. +func (o *OTelMetrics) initHistogram(metadata Metadata) (metric.Float64Histogram, error) { + unit := string(metadata.Unit) + h, err := o.meter.Float64Histogram(metadata.Name, + metric.WithUnit(unit), + metric.WithDescription(metadata.Description), + ) + if err != nil { + return nil, err + } + h.Record(context.Background(), 0) + o.histograms[metadata.Name] = h + + return h, nil + +} + +// initUpDown initializes a new updown counter metric with the given metadata +// It should be used while holding the metrics lock. +func (o *OTelMetrics) initUpDown(metadata Metadata) (metric.Int64UpDownCounter, error) { + unit := string(metadata.Unit) + ud, err := o.meter.Int64UpDownCounter(metadata.Name, + metric.WithUnit(unit), + metric.WithDescription(metadata.Description), + ) + if err != nil { + return nil, err + } + ud.Add(context.Background(), 0) + o.updowns[metadata.Name] = ud + + return ud, nil + +} diff --git a/route/route.go b/route/route.go index 75e202e056..ff88051420 100644 --- a/route/route.go +++ b/route/route.go @@ -128,6 +128,10 @@ func (r *Router) SetVersion(ver string) { var routerMetrics = []metrics.Metadata{ {Name: "_router_proxied", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "the number of events proxied to another refinery"}, {Name: "_router_event", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "the number of events received"}, + {Name: "_router_span", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "the number of spans received"}, + {Name: "_router_dropped", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "the number of events dropped because the channel was full"}, + {Name: "_router_nonspan", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "the number of non-span events received"}, + {Name: "_router_peer", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "the number of spans proxied to a peer"}, } // LnS spins up the Listen and Serve portion of the router. A router is