diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index c3ff88d0c8ac..1f1f3e359353 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -116,6 +116,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Fix unintended skip in metric collection on Azure Monitor {issue}37204[37204] {pull}37203[37203] - Fix the "api-version query parameter (?api-version=) is required for all requests" error in Azure Billing. {pull}37158[37158] - Add memory hard limit from container metadata and remove usage percentage in AWS Fargate. {pull}37194[37194] +- Ignore parser errors from unsupported metrics types on Prometheus client and continue parsing until EOF is reached {pull}37383[37383] *Osquerybeat* diff --git a/metricbeat/helper/openmetrics/openmetrics.go b/metricbeat/helper/openmetrics/openmetrics.go index f4714c56f74a..5a0afaf2dba4 100644 --- a/metricbeat/helper/openmetrics/openmetrics.go +++ b/metricbeat/helper/openmetrics/openmetrics.go @@ -107,7 +107,7 @@ func (p *openmetrics) GetFamilies() ([]*prometheus.MetricFamily, error) { if err != nil { return nil, fmt.Errorf("failed to read response: %w", err) } - families, err := prometheus.ParseMetricFamilies(b, contentType, appendTime) + families, err := prometheus.ParseMetricFamilies(b, contentType, appendTime, p.logger) if err != nil { return nil, fmt.Errorf("failed to parse families: %w", err) } diff --git a/metricbeat/helper/prometheus/prometheus.go b/metricbeat/helper/prometheus/prometheus.go index 0cfab2dafe28..686bd97b9c48 100644 --- a/metricbeat/helper/prometheus/prometheus.go +++ b/metricbeat/helper/prometheus/prometheus.go @@ -105,7 +105,7 @@ func (p *prometheus) GetFamilies() ([]*MetricFamily, error) { if err != nil { return nil, fmt.Errorf("failed to read response: %w", err) } - families, err := ParseMetricFamilies(b, contentType, appendTime) + families, err := ParseMetricFamilies(b, contentType, appendTime, p.logger) if err != nil { return nil, fmt.Errorf("failed to parse families: %w", err) } diff --git a/metricbeat/helper/prometheus/prometheus_test.go b/metricbeat/helper/prometheus/prometheus_test.go index b135f0683d6a..9f7fc8a34a42 100644 --- a/metricbeat/helper/prometheus/prometheus_test.go +++ b/metricbeat/helper/prometheus/prometheus_test.go @@ -63,6 +63,14 @@ histogram_decimal_metric_bucket{le="+Inf"} 5 histogram_decimal_metric_sum 4.31 histogram_decimal_metric_count 5 +` + + promInfoMetrics = ` +# TYPE target info +target_info 1 +# TYPE first_metric gauge +first_metric{label1="value1",label2="value2",label3="Value3",label4="FOO"} 1 + ` promGaugeKeyLabel = ` @@ -530,6 +538,69 @@ func TestPrometheus(t *testing.T) { } } +// NOTE: if the content type = text/plain prometheus doesn't support Info metrics +// with the current implementation, info metrics should just be ignored and all other metrics +// correctly processed +func TestInfoMetricPrometheus(t *testing.T) { + + p := &prometheus{mockFetcher{response: promInfoMetrics}, logp.NewLogger("test")} + + tests := []struct { + mapping *MetricsMapping + msg string + expected []mapstr.M + }{ + { + msg: "Ignore metrics not in mapping", + mapping: &MetricsMapping{ + Metrics: map[string]MetricMap{ + "first_metric": Metric("first.metric"), + }, + }, + expected: []mapstr.M{ + mapstr.M{ + "first": mapstr.M{ + "metric": 1.0, + }, + }, + }, + }, + { + msg: "Ignore metric in mapping but of unsupported type (eg. Info metric)", + mapping: &MetricsMapping{ + Metrics: map[string]MetricMap{ + "first_metric": Metric("first.metric"), + "target_info": Metric("target.info"), + }, + }, + expected: []mapstr.M{ + mapstr.M{ + "first": mapstr.M{ + "metric": 1.0, + }, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.msg, func(t *testing.T) { + reporter := &mbtest.CapturingReporterV2{} + _ = p.ReportProcessedMetrics(test.mapping, reporter) + assert.Nil(t, reporter.GetErrors(), test.msg) + // Sort slice to avoid randomness + res := reporter.GetEvents() + sort.Slice(res, func(i, j int) bool { + return res[i].MetricSetFields.String() < res[j].MetricSetFields.String() + }) + assert.Equal(t, len(test.expected), len(res)) + for j, ev := range res { + assert.Equal(t, test.expected[j], ev.MetricSetFields, test.msg) + } + }) + } +} + func TestPrometheusKeyLabels(t *testing.T) { testCases := []struct { diff --git a/metricbeat/helper/prometheus/textparse.go b/metricbeat/helper/prometheus/textparse.go index 4ce573ca6abd..4dca85c3aa5a 100644 --- a/metricbeat/helper/prometheus/textparse.go +++ b/metricbeat/helper/prometheus/textparse.go @@ -18,6 +18,8 @@ package prometheus import ( + "errors" + "io" "math" "mime" "net/http" @@ -30,6 +32,8 @@ import ( "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/textparse" "github.com/prometheus/prometheus/pkg/timestamp" + + "github.com/elastic/elastic-agent-libs/logp" ) const ( @@ -476,7 +480,7 @@ func histogramMetricName(name string, s float64, qv string, lbls string, t *int6 return name, metric } -func ParseMetricFamilies(b []byte, contentType string, ts time.Time) ([]*MetricFamily, error) { +func ParseMetricFamilies(b []byte, contentType string, ts time.Time, logger *logp.Logger) ([]*MetricFamily, error) { var ( parser = textparse.New(b, contentType) defTime = timestamp.FromTime(ts) @@ -495,8 +499,24 @@ func ParseMetricFamilies(b []byte, contentType string, ts time.Time) ([]*MetricF e exemplar.Exemplar ) if et, err = parser.Next(); err != nil { - // TODO: log here - // if errors.Is(err, io.EOF) {} + if strings.HasPrefix(err.Error(), "invalid metric type") { + logger.Debugf("Ignored invalid metric type : %v ", err) + + // NOTE: ignore any errors that are not EOF. This is to avoid breaking the parsing. + // if acceptHeader in the prometheus client is `Accept: text/plain; version=0.0.4` (like it is now) + // any `info` metrics are not supported, and then there will be ignored here. + // if acceptHeader in the prometheus client `Accept: application/openmetrics-text; version=0.0.1` + // any `info` metrics are supported, and then there will be parsed here. + continue + } + + if errors.Is(err, io.EOF) { + break + } + if strings.HasPrefix(err.Error(), "data does not end with # EOF") { + break + } + logger.Debugf("Error while parsing metrics: %v ", err) break } switch et { diff --git a/metricbeat/helper/prometheus/textparse_test.go b/metricbeat/helper/prometheus/textparse_test.go index 59b6b8ce1999..cd76e14691f1 100644 --- a/metricbeat/helper/prometheus/textparse_test.go +++ b/metricbeat/helper/prometheus/textparse_test.go @@ -23,6 +23,8 @@ import ( "github.com/prometheus/prometheus/pkg/labels" "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent-libs/logp" ) func stringp(x string) *string { @@ -86,7 +88,7 @@ metric_without_suffix 10 }, } - result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now()) + result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil) if err != nil { t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType) } @@ -135,7 +137,7 @@ process_cpu 20 }, } - result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now()) + result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now(), nil) if err != nil { t.Fatalf("ParseMetricFamilies for content type %s returned an error.", ContentTypeTextFormat) } @@ -189,7 +191,7 @@ second_metric 0 }, } - result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now()) + result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil) if err != nil { t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType) } @@ -242,7 +244,7 @@ second_metric 0 }, } - result, err := ParseMetricFamilies([]byte(input[1:]), ContentTypeTextFormat, time.Now()) + result, err := ParseMetricFamilies([]byte(input[1:]), ContentTypeTextFormat, time.Now(), nil) if err != nil { t.Fatalf("ParseMetricFamilies for content type %s returned an error.", ContentTypeTextFormat) } @@ -292,7 +294,60 @@ metric_without_suffix 3 }, } - result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now()) + result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil) + if err != nil { + t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType) + } + require.ElementsMatch(t, expected, result) +} + +func TestInfoPrometheus(t *testing.T) { + input := ` +# TYPE target info +target_info 1 +# TYPE first_metric gauge +first_metric{label1="value1"} 1 +# EOF +` + expected := []*MetricFamily{ + { + Name: stringp("target_info"), + Help: nil, + Type: "unknown", + Unit: nil, + Metric: []*OpenMetric{ + { + Label: []*labels.Label{}, + Name: stringp("target_info"), + Unknown: &Unknown{ + Value: float64p(1), + }, + }, + }, + }, + { + Name: stringp("first_metric"), + Help: nil, + Type: "gauge", + Unit: nil, + Metric: []*OpenMetric{ + { + Label: []*labels.Label{ + { + Name: "label1", + Value: "value1", + }, + }, + Name: stringp("first_metric"), + Gauge: &Gauge{ + Value: float64p(1), + }, + }, + }, + }, + } + + result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now(), logp.NewLogger("test")) if err != nil { t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType) } @@ -342,7 +397,7 @@ a{a="foo"} 1.0 }, } - result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now()) + result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil) if err != nil { t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType) } @@ -395,7 +450,7 @@ summary_metric_impossible 123 }, } - result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now()) + result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil) if err != nil { t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType) } @@ -447,7 +502,7 @@ summary_metric_impossible 123 }, } - result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now()) + result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now(), nil) if err != nil { t.Fatalf("ParseMetricFamilies for content type %s returned an error.", ContentTypeTextFormat) } @@ -507,7 +562,7 @@ http_server_requests_seconds_created{exception="None",uri="/actuator/prometheus" }, } - result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now()) + result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil) if err != nil { t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType) } @@ -566,7 +621,7 @@ http_server_requests_seconds_created{exception="None",uri="/actuator/prometheus" }, } - result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now()) + result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now(), nil) if err != nil { t.Fatalf("ParseMetricFamilies for content type %s returned an error.", ContentTypeTextFormat) } @@ -609,7 +664,7 @@ ggh 99 }, } - result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now()) + result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil) if err != nil { t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType) } @@ -644,7 +699,7 @@ redis_connected_clients{instance="rough-snowflake-web"} 10.0 }, }, } - result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now()) + result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil) if err != nil { t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType) } @@ -678,7 +733,7 @@ redis_connected_clients{instance="rough-snowflake-web"} 10.0` }, }, } - result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now()) + result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now(), nil) if err != nil { t.Fatalf("ParseMetricFamilies for content type %s returned an error.", ContentTypeTextFormat) }