Skip to content

Commit

Permalink
ignore prometheus parsing errors (#37383) (#37547)
Browse files Browse the repository at this point in the history
* ignore prometheus parsing errors

(cherry picked from commit 6f13899)

Co-authored-by: Giuseppe Santoro <[email protected]>
  • Loading branch information
mergify[bot] and gsantoro authored Jan 5, 2024
1 parent 246adc2 commit 7d5284a
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 18 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,18 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
- Fix EC2 host.cpu.usage {pull}35717[35717]
- Add option in SQL module to execute queries for all dbs. {pull}35688[35688]
- Add remaining dimensions for azure storage account to make them available for tsdb enablement. {pull}36331[36331]
- Add missing 'TransactionType' dimension for Azure Storage Account. {pull}36413[36413]
- Add log error when statsd server fails to start {pull}36477[36477]
- Fix CassandraConnectionClosures metric configuration {pull}34742[34742]
- Fix event mapping implementation for statsd module {pull}36925[36925]
- The region and availability_zone ecs fields nested within the cloud field. {pull}37015[37015]
- Fix CPU and memory metrics collection from privileged process on Windows {issue}17314[17314]{pull}37027[37027]
- Enhanced Azure Metrics metricset with refined grouping logic and resolved duplication issues for TSDB compatibility {pull}36823[36823]
- Fix memory leak on Windows {issue}37142[37142] {pull}37171[37171]
- Fix unintended skip in metric collection on Azure Monitor {issue}37204[37204] {pull}37203[37203]
- Fix the "api-version query parameter (?api-version=) is required for all requests" error in Azure Billing. {pull}37158[37158]
- Add memory hard limit from container metadata and remove usage percentage in AWS Fargate. {pull}37194[37194]
- Ignore parser errors from unsupported metrics types on Prometheus client and continue parsing until EOF is reached {pull}37383[37383]

*Osquerybeat*

Expand Down
2 changes: 1 addition & 1 deletion metricbeat/helper/openmetrics/openmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (p *openmetrics) GetFamilies() ([]*prometheus.MetricFamily, error) {
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
families, err := prometheus.ParseMetricFamilies(b, contentType, appendTime)
families, err := prometheus.ParseMetricFamilies(b, contentType, appendTime, p.logger)
if err != nil {
return nil, fmt.Errorf("failed to parse families: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion metricbeat/helper/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func (p *prometheus) GetFamilies() ([]*MetricFamily, error) {
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
families, err := ParseMetricFamilies(b, contentType, appendTime)
families, err := ParseMetricFamilies(b, contentType, appendTime, p.logger)
if err != nil {
return nil, fmt.Errorf("failed to parse families: %w", err)
}
Expand Down
71 changes: 71 additions & 0 deletions metricbeat/helper/prometheus/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ histogram_decimal_metric_bucket{le="+Inf"} 5
histogram_decimal_metric_sum 4.31
histogram_decimal_metric_count 5
`

promInfoMetrics = `
# TYPE target info
target_info 1
# TYPE first_metric gauge
first_metric{label1="value1",label2="value2",label3="Value3",label4="FOO"} 1
`

promGaugeKeyLabel = `
Expand Down Expand Up @@ -530,6 +538,69 @@ func TestPrometheus(t *testing.T) {
}
}

// NOTE: if the content type = text/plain prometheus doesn't support Info metrics
// with the current implementation, info metrics should just be ignored and all other metrics
// correctly processed
func TestInfoMetricPrometheus(t *testing.T) {

p := &prometheus{mockFetcher{response: promInfoMetrics}, logp.NewLogger("test")}

tests := []struct {
mapping *MetricsMapping
msg string
expected []mapstr.M
}{
{
msg: "Ignore metrics not in mapping",
mapping: &MetricsMapping{
Metrics: map[string]MetricMap{
"first_metric": Metric("first.metric"),
},
},
expected: []mapstr.M{
mapstr.M{
"first": mapstr.M{
"metric": 1.0,
},
},
},
},
{
msg: "Ignore metric in mapping but of unsupported type (eg. Info metric)",
mapping: &MetricsMapping{
Metrics: map[string]MetricMap{
"first_metric": Metric("first.metric"),
"target_info": Metric("target.info"),
},
},
expected: []mapstr.M{
mapstr.M{
"first": mapstr.M{
"metric": 1.0,
},
},
},
},
}

for _, test := range tests {
t.Run(test.msg, func(t *testing.T) {
reporter := &mbtest.CapturingReporterV2{}
_ = p.ReportProcessedMetrics(test.mapping, reporter)
assert.Nil(t, reporter.GetErrors(), test.msg)
// Sort slice to avoid randomness
res := reporter.GetEvents()
sort.Slice(res, func(i, j int) bool {
return res[i].MetricSetFields.String() < res[j].MetricSetFields.String()
})
assert.Equal(t, len(test.expected), len(res))
for j, ev := range res {
assert.Equal(t, test.expected[j], ev.MetricSetFields, test.msg)
}
})
}
}

func TestPrometheusKeyLabels(t *testing.T) {

testCases := []struct {
Expand Down
26 changes: 23 additions & 3 deletions metricbeat/helper/prometheus/textparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
package prometheus

import (
"errors"
"io"
"math"
"mime"
"net/http"
Expand All @@ -30,6 +32,8 @@ import (
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/pkg/textparse"
"github.com/prometheus/prometheus/pkg/timestamp"

"github.com/elastic/elastic-agent-libs/logp"
)

const (
Expand Down Expand Up @@ -476,7 +480,7 @@ func histogramMetricName(name string, s float64, qv string, lbls string, t *int6
return name, metric
}

func ParseMetricFamilies(b []byte, contentType string, ts time.Time) ([]*MetricFamily, error) {
func ParseMetricFamilies(b []byte, contentType string, ts time.Time, logger *logp.Logger) ([]*MetricFamily, error) {
var (
parser = textparse.New(b, contentType)
defTime = timestamp.FromTime(ts)
Expand All @@ -495,8 +499,24 @@ func ParseMetricFamilies(b []byte, contentType string, ts time.Time) ([]*MetricF
e exemplar.Exemplar
)
if et, err = parser.Next(); err != nil {
// TODO: log here
// if errors.Is(err, io.EOF) {}
if strings.HasPrefix(err.Error(), "invalid metric type") {
logger.Debugf("Ignored invalid metric type : %v ", err)

// NOTE: ignore any errors that are not EOF. This is to avoid breaking the parsing.
// if acceptHeader in the prometheus client is `Accept: text/plain; version=0.0.4` (like it is now)
// any `info` metrics are not supported, and then there will be ignored here.
// if acceptHeader in the prometheus client `Accept: application/openmetrics-text; version=0.0.1`
// any `info` metrics are supported, and then there will be parsed here.
continue
}

if errors.Is(err, io.EOF) {
break
}
if strings.HasPrefix(err.Error(), "data does not end with # EOF") {
break
}
logger.Debugf("Error while parsing metrics: %v ", err)
break
}
switch et {
Expand Down
81 changes: 68 additions & 13 deletions metricbeat/helper/prometheus/textparse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (

"github.com/prometheus/prometheus/pkg/labels"
"github.com/stretchr/testify/require"

"github.com/elastic/elastic-agent-libs/logp"
)

func stringp(x string) *string {
Expand Down Expand Up @@ -86,7 +88,7 @@ metric_without_suffix 10
},
}

result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now())
result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType)
}
Expand Down Expand Up @@ -135,7 +137,7 @@ process_cpu 20
},
}

result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now())
result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", ContentTypeTextFormat)
}
Expand Down Expand Up @@ -189,7 +191,7 @@ second_metric 0
},
}

result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now())
result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType)
}
Expand Down Expand Up @@ -242,7 +244,7 @@ second_metric 0
},
}

result, err := ParseMetricFamilies([]byte(input[1:]), ContentTypeTextFormat, time.Now())
result, err := ParseMetricFamilies([]byte(input[1:]), ContentTypeTextFormat, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", ContentTypeTextFormat)
}
Expand Down Expand Up @@ -292,7 +294,60 @@ metric_without_suffix 3
},
}

result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now())
result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType)
}
require.ElementsMatch(t, expected, result)
}

func TestInfoPrometheus(t *testing.T) {
input := `
# TYPE target info
target_info 1
# TYPE first_metric gauge
first_metric{label1="value1"} 1
# EOF
`
expected := []*MetricFamily{
{
Name: stringp("target_info"),
Help: nil,
Type: "unknown",
Unit: nil,
Metric: []*OpenMetric{
{
Label: []*labels.Label{},
Name: stringp("target_info"),
Unknown: &Unknown{
Value: float64p(1),
},
},
},
},
{
Name: stringp("first_metric"),
Help: nil,
Type: "gauge",
Unit: nil,
Metric: []*OpenMetric{
{
Label: []*labels.Label{
{
Name: "label1",
Value: "value1",
},
},
Name: stringp("first_metric"),
Gauge: &Gauge{
Value: float64p(1),
},
},
},
},
}

result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now(), logp.NewLogger("test"))
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType)
}
Expand Down Expand Up @@ -342,7 +397,7 @@ a{a="foo"} 1.0
},
}

result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now())
result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType)
}
Expand Down Expand Up @@ -395,7 +450,7 @@ summary_metric_impossible 123
},
}

result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now())
result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType)
}
Expand Down Expand Up @@ -447,7 +502,7 @@ summary_metric_impossible 123
},
}

result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now())
result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", ContentTypeTextFormat)
}
Expand Down Expand Up @@ -507,7 +562,7 @@ http_server_requests_seconds_created{exception="None",uri="/actuator/prometheus"
},
}

result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now())
result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType)
}
Expand Down Expand Up @@ -566,7 +621,7 @@ http_server_requests_seconds_created{exception="None",uri="/actuator/prometheus"
},
}

result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now())
result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", ContentTypeTextFormat)
}
Expand Down Expand Up @@ -609,7 +664,7 @@ ggh 99
},
}

result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now())
result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType)
}
Expand Down Expand Up @@ -644,7 +699,7 @@ redis_connected_clients{instance="rough-snowflake-web"} 10.0
},
},
}
result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now())
result, err := ParseMetricFamilies([]byte(input[1:]), OpenMetricsType, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", OpenMetricsType)
}
Expand Down Expand Up @@ -678,7 +733,7 @@ redis_connected_clients{instance="rough-snowflake-web"} 10.0`
},
},
}
result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now())
result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error.", ContentTypeTextFormat)
}
Expand Down

0 comments on commit 7d5284a

Please sign in to comment.