From 05b010c83575052c05607c95411b382dde7846da Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Wed, 29 Jan 2025 17:41:53 +0100 Subject: [PATCH] reworked knownmetrics to include namespace and metric kind and added internal metrics support Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 14 +- internal/newtelemetry/client.go | 53 +- internal/newtelemetry/client_config.go | 84 +- internal/newtelemetry/client_test.go | 41 +- internal/newtelemetry/distributions.go | 13 +- internal/newtelemetry/globalclient.go | 26 +- .../internal/knownmetrics/common_metrics.json | 1050 ++++++++++++++--- .../knownmetrics/generator/generator.go | 40 +- .../internal/knownmetrics/golang_metrics.json | 6 +- .../internal/knownmetrics/known_metrics.go | 32 +- internal/newtelemetry/internal/recorder.go | 16 +- .../internal/transport/namespace.go | 15 +- internal/newtelemetry/internal/writer.go | 4 +- internal/newtelemetry/metrics.go | 4 +- 14 files changed, 1118 insertions(+), 280 deletions(-) diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index 5d7616c277..ca7159befc 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -16,12 +16,14 @@ import ( type Namespace = transport.Namespace const ( - NamespaceGeneral = transport.NamespaceGeneral - NamespaceTracers = transport.NamespaceTracers - NamespaceProfilers = transport.NamespaceProfilers - NamespaceAppSec = transport.NamespaceAppSec - NamespaceIAST = transport.NamespaceIAST - NamespaceTelemetry = transport.NamespaceTelemetry + NamespaceGeneral = transport.NamespaceGeneral + NamespaceTracers = transport.NamespaceTracers + NamespaceProfilers = transport.NamespaceProfilers + NamespaceAppSec = transport.NamespaceAppSec + NamespaceIAST = transport.NamespaceIAST + NamespaceCIVisibility = transport.NamespaceCIVisibility + NamespaceMLOps = transport.NamespaceMLOps + NamespaceRUM = transport.NamespaceRUM ) // Origin describes the source of a configuration change diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 56b35cb610..8c1b879982 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -7,6 +7,8 @@ package newtelemetry import ( "errors" + "os" + "strconv" "sync" "gopkg.in/DataDog/dd-trace-go.v1/internal/log" @@ -53,10 +55,7 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client writer: writer, clientConfig: config, flushMapper: mapper.NewDefaultMapper(config.HeartbeatInterval, config.ExtendedHeartbeatInterval), - // This means that, by default, we incur dataloss if we spend ~30mins without flushing, considering we send telemetry data this looks reasonable. - // This also means that in the worst case scenario, memory-wise, the app is stabilized after running for 30mins. - // TODO: tweak this value once we get real telemetry data from the telemetry client - payloadQueue: internal.NewRingQueue[transport.Payload](4, 32), + payloadQueue: internal.NewRingQueue[transport.Payload](config.PayloadQueueSize.Min, config.PayloadQueueSize.Max), dependencies: dependencies{ DependencyLoader: config.DependencyLoader, @@ -86,7 +85,7 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client client.flushTicker = internal.NewTicker(func() { client.Flush() - }, config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) + }, config.FlushInterval.Min, config.FlushInterval.Max) return client, nil } @@ -230,6 +229,7 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { } results, err := c.writer.Flush(payload) + c.computeFlushMetrics(results, err) if err != nil { // We stop flushing when we encounter a fatal error, put the payloads in the queue and return the error log.Error("error while flushing telemetry data: %v", err) @@ -264,6 +264,49 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { return nbBytes, nil } +// computeFlushMetrics computes and submits the metrics for the flush operation using the output from the writer.Flush method. +// It will submit the number of requests, responses, errors, the number of bytes sent and the duration of the call that was successful. +func (c *client) computeFlushMetrics(results []internal.EndpointRequestResult, err error) { + if !c.clientConfig.InternalMetricsEnabled { + return + } + + indexToEndpoint := func(i int) string { + if i == 0 { + return "agent" + } + return "agentless" + } + + for i, result := range results { + c.Count(transport.NamespaceTelemetry, "telemetry_api.requests", map[string]string{"endpoint": indexToEndpoint(i)}).Submit(1) + if result.StatusCode != 0 { + c.Count(transport.NamespaceTelemetry, "telemetry_api.responses", map[string]string{"endpoint": indexToEndpoint(i), "status_code": strconv.Itoa(result.StatusCode)}).Submit(1) + } + + if result.Error != nil { + typ := "network" + if os.IsTimeout(result.Error) { + typ = "timeout" + } + var writerStatusCodeError *internal.WriterStatusCodeError + if errors.As(result.Error, &writerStatusCodeError) { + typ = "status_code" + } + c.Count(transport.NamespaceTelemetry, "telemetry_api.errors", map[string]string{"endpoint": indexToEndpoint(i), "type": typ}).Submit(1) + } + } + + if err != nil { + return + } + + successfulCall := results[len(results)-1] + endpoint := indexToEndpoint(len(results) - 1) + c.Distribution(transport.NamespaceTelemetry, "telemetry_api.bytes", map[string]string{"endpoint": endpoint}).Submit(float64(successfulCall.PayloadByteSize)) + c.Distribution(transport.NamespaceTelemetry, "telemetry_api.ms", map[string]string{"endpoint": endpoint}).Submit(float64(successfulCall.CallDuration.Milliseconds())) +} + func (c *client) appStart() { c.flushMapperMu.Lock() defer c.flushMapperMu.Unlock() diff --git a/internal/newtelemetry/client_config.go b/internal/newtelemetry/client_config.go index 7da0a5c2ee..bbee82d61a 100644 --- a/internal/newtelemetry/client_config.go +++ b/internal/newtelemetry/client_config.go @@ -17,6 +17,11 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" ) +type Range[T any] struct { + Min T + Max T +} + type ClientConfig struct { // DependencyLoader determines how dependency data is sent via telemetry. // If nil, the library should not send the app-dependencies-loaded event. @@ -24,7 +29,7 @@ type ClientConfig struct { // This can be controlled via the env var DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED DependencyLoader func() (*debug.BuildInfo, bool) - // MetricsEnabled etermines whether metrics are sent via telemetry. + // MetricsEnabled determines whether metrics are sent via telemetry. // If false, libraries should not send the generate-metrics or distributions events. // This can be controlled via the env var DD_TELEMETRY_METRICS_ENABLED MetricsEnabled bool @@ -33,6 +38,9 @@ type ClientConfig struct { // This can be controlled via the env var DD_TELEMETRY_LOG_COLLECTION_ENABLED LogsEnabled bool + // InternalMetricsEnabled determines whether client stats metrics are sent via telemetry. + InternalMetricsEnabled bool + // AgentlessURL is the full URL to the agentless telemetry endpoint. (optional) // Defaults to https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry AgentlessURL string @@ -51,13 +59,23 @@ type ClientConfig struct { // ExtendedHeartbeatInterval is the interval at which to send an extended heartbeat payload, defaults to 24h. ExtendedHeartbeatInterval time.Duration - // FlushIntervalRange is the interval at which the client flushes the data. + // FlushInterval is the interval at which the client flushes the data. // By default, the client will start to Flush at 60s intervals and will reduce the interval based on the load till it hit 15s // Both values cannot be higher than 60s because the heartbeat need to be sent at least every 60s. - FlushIntervalRange struct { - Min time.Duration - Max time.Duration - } + FlushInterval Range[time.Duration] + + // PayloadQueueSize is the size of the payload queue. + // This means that, by default, we incur dataloss if we spend ~30mins without flushing, considering we send telemetry data this looks reasonable. + // This also means that in the worst case scenario, memory-wise, the app is stabilized after running for 30mins. + // Ideally both values should be power of 2 because of the way the ring queue is implemented as it's growing + PayloadQueueSize Range[int] + + // DistributionsSize is the size of the distribution queue. + // Default max size is a 2^14 array of float64 (2^3 bytes) which makes a distribution 128KB bytes array _at worse_. + // Considering we add a point per user request on a simple http server, we would be losing data after 2^14 requests per minute or about 280 requests per second or under 3ms per request. + // If this throughput is constant, the telemetry client flush ticker speed will increase to, at best, double twice to flush 15 seconds of data each time. + // Which will bring our max throughput to 1100 points per second or about 750µs per request. + DistributionsSize Range[int] // Debug enables debug mode for the telemetry clientt and sent it to the backend so it logs the request Debug bool @@ -71,7 +89,7 @@ type ClientConfig struct { EarlyFlushPayloadSize int } -const ( +var ( // agentlessURL is the endpoint used to send telemetry in an agentless environment. It is // also the default URL in case connecting to the agent URL fails. agentlessURL = "https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry" @@ -94,6 +112,18 @@ const ( // maxPayloadSize is specified by the backend to be 5MB. The goal is to never reach this value otherwise our data will be silently dropped. maxPayloadSize = 5 * 1024 * 1024 // 5MB + + // TODO: tweak this value once we get real telemetry data from the telemetry client + defaultPayloadQueueSize = Range[int]{ + Min: 4, + Max: 21, + } + + // TODO: tweak this value once we get telemetry data from the telemetry client + distributionsSize = Range[int]{ + Min: 1 << 8, + Max: 1 << 14, + } ) // clamp squeezes a value between a minimum and maximum value. @@ -106,12 +136,12 @@ func (config ClientConfig) validateConfig() error { return fmt.Errorf("HeartbeatInterval cannot be higher than 60s, got %v", config.HeartbeatInterval) } - if config.FlushIntervalRange.Min > 60*time.Second || config.FlushIntervalRange.Max > 60*time.Second { - return fmt.Errorf("FlushIntervalRange cannot be higher than 60s, got Min: %v, Max: %v", config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) + if config.FlushInterval.Min > 60*time.Second || config.FlushInterval.Max > 60*time.Second { + return fmt.Errorf("FlushIntervalRange cannot be higher than 60s, got Min: %v, Max: %v", config.FlushInterval.Min, config.FlushInterval.Max) } - if config.FlushIntervalRange.Min > config.FlushIntervalRange.Max { - return fmt.Errorf("FlushIntervalRange Min cannot be higher than Max, got Min: %v, Max: %v", config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) + if config.FlushInterval.Min > config.FlushInterval.Max { + return fmt.Errorf("FlushIntervalRange Min cannot be higher than Max, got Min: %v, Max: %v", config.FlushInterval.Min, config.FlushInterval.Max) } if config.EarlyFlushPayloadSize > maxPayloadSize || config.EarlyFlushPayloadSize <= 0 { @@ -137,16 +167,16 @@ func defaultConfig(config ClientConfig) ClientConfig { config.HeartbeatInterval = clamp(config.HeartbeatInterval, time.Microsecond, 60*time.Second) } - if config.FlushIntervalRange.Min == 0 { - config.FlushIntervalRange.Min = defaultMinFlushInterval + if config.FlushInterval.Min == 0 { + config.FlushInterval.Min = defaultMinFlushInterval } else { - config.FlushIntervalRange.Min = clamp(config.FlushIntervalRange.Min, time.Microsecond, 60*time.Second) + config.FlushInterval.Min = clamp(config.FlushInterval.Min, time.Microsecond, 60*time.Second) } - if config.FlushIntervalRange.Max == 0 { - config.FlushIntervalRange.Max = defaultMaxFlushInterval + if config.FlushInterval.Max == 0 { + config.FlushInterval.Max = defaultMaxFlushInterval } else { - config.FlushIntervalRange.Max = clamp(config.FlushIntervalRange.Max, time.Microsecond, 60*time.Second) + config.FlushInterval.Max = clamp(config.FlushInterval.Max, time.Microsecond, 60*time.Second) } if config.DependencyLoader == nil && globalinternal.BoolEnv("DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED", true) { @@ -161,6 +191,10 @@ func defaultConfig(config ClientConfig) ClientConfig { config.LogsEnabled = globalinternal.BoolEnv("DD_TELEMETRY_LOG_COLLECTION_ENABLED", true) } + if !config.InternalMetricsEnabled { + config.InternalMetricsEnabled = true + } + if config.EarlyFlushPayloadSize == 0 { config.EarlyFlushPayloadSize = defaultEarlyFlushPayloadSize } @@ -169,6 +203,22 @@ func defaultConfig(config ClientConfig) ClientConfig { config.ExtendedHeartbeatInterval = defaultExtendedHeartbeatInterval } + if config.PayloadQueueSize.Min == 0 { + config.PayloadQueueSize.Min = defaultPayloadQueueSize.Min + } + + if config.PayloadQueueSize.Max == 0 { + config.PayloadQueueSize.Max = defaultPayloadQueueSize.Max + } + + if config.DistributionsSize.Min == 0 { + config.DistributionsSize.Min = distributionsSize.Min + } + + if config.DistributionsSize.Max == 0 { + config.DistributionsSize.Max = distributionsSize.Max + } + return config } diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 14e4587bf2..42050b200c 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -886,7 +886,7 @@ func TestClientFlush(t *testing.T) { }, }, { - name: "full-distribution", + name: "distribution-overflow", when: func(c *client) { handler := c.Distribution(NamespaceGeneral, "init_time", nil) for i := 0; i < 1<<16; i++ { @@ -911,7 +911,9 @@ func TestClientFlush(t *testing.T) { t.Parallel() config := defaultConfig(test.clientConfig) config.AgentURL = "http://localhost:8126" - config.DependencyLoader = test.clientConfig.DependencyLoader // Don't use the default dependency loader + config.DependencyLoader = test.clientConfig.DependencyLoader // Don't use the default dependency loader + config.InternalMetricsEnabled = test.clientConfig.InternalMetricsEnabled // only enabled internal metrics when explicitly set + config.InternalMetricsEnabled = false c, err := newClient(tracerConfig, config) require.NoError(t, err) defer c.Close() @@ -934,15 +936,23 @@ func TestClientFlush(t *testing.T) { func TestMetricsDisabled(t *testing.T) { t.Setenv("DD_TELEMETRY_METRICS_ENABLED", "false") - client, err := NewClient("test-service", "test-env", "1.0.0", ClientConfig{AgentURL: "http://localhost:8126"}) + c, err := NewClient("test-service", "test-env", "1.0.0", ClientConfig{AgentURL: "http://localhost:8126"}) require.NoError(t, err) - defer client.Close() + recordWriter := &internal.RecordWriter{} + c.(*client).writer = recordWriter - assert.NotNil(t, client.Gauge(NamespaceTracers, "init_time", nil)) - assert.NotNil(t, client.Count(NamespaceTracers, "init_time", nil)) - assert.NotNil(t, client.Rate(NamespaceTracers, "init_time", nil)) - assert.NotNil(t, client.Distribution(NamespaceGeneral, "init_time", nil)) + defer c.Close() + + assert.NotNil(t, c.Gauge(NamespaceTracers, "init_time", nil)) + assert.NotNil(t, c.Count(NamespaceTracers, "init_time", nil)) + assert.NotNil(t, c.Rate(NamespaceTracers, "init_time", nil)) + assert.NotNil(t, c.Distribution(NamespaceGeneral, "init_time", nil)) + + c.Flush() + + payloads := recordWriter.Payloads() + require.Len(t, payloads, 0) } type testRoundTripper struct { @@ -1149,11 +1159,8 @@ func BenchmarkWorstCaseScenarioFloodLogging(b *testing.B) { clientConfig := ClientConfig{ HeartbeatInterval: time.Hour, ExtendedHeartbeatInterval: time.Hour, - FlushIntervalRange: struct { - Min time.Duration - Max time.Duration - }{Min: time.Second, Max: time.Second}, - AgentURL: "http://localhost:8126", + FlushInterval: Range[time.Duration]{Min: time.Second, Max: time.Second}, + AgentURL: "http://localhost:8126", // Empty transport to avoid sending data to the agent HTTPClient: &http.Client{ @@ -1326,11 +1333,9 @@ func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { clientConfig := ClientConfig{ HeartbeatInterval: time.Hour, ExtendedHeartbeatInterval: time.Hour, - FlushIntervalRange: struct { - Min time.Duration - Max time.Duration - }{Min: time.Second, Max: time.Second}, - AgentURL: "http://localhost:8126", + FlushInterval: Range[time.Duration]{Min: time.Second, Max: time.Second}, + DistributionsSize: Range[int]{256, -1}, + AgentURL: "http://localhost:8126", // Empty transport to avoid sending data to the agent HTTPClient: &http.Client{ diff --git a/internal/newtelemetry/distributions.go b/internal/newtelemetry/distributions.go index 1ac85a52ee..63ab031623 100644 --- a/internal/newtelemetry/distributions.go +++ b/internal/newtelemetry/distributions.go @@ -29,7 +29,7 @@ type distributions struct { // LoadOrStore returns a MetricHandle for the given distribution metric. If the metric key does not exist, it will be created. func (d *distributions) LoadOrStore(namespace Namespace, name string, tags map[string]string) MetricHandle { - if !knownmetrics.IsKnownMetricName(name) { + if !knownmetrics.IsKnownMetric(namespace, "distribution", name) { log.Debug("telemetry: metric name %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ "The metric will still be sent.", name) } @@ -41,11 +41,6 @@ func (d *distributions) LoadOrStore(namespace Namespace, name string, tags map[s key := distributionKey{namespace: namespace, name: name, tags: strings.TrimSuffix(compiledTags, ",")} - // Max size is a 2^14 array of float64 (2^3 bytes) which makes a distribution 128KB bytes array _at worse_. - // Considering we add a point per user request on a simple http server, we would be losing data after 2^14 requests per minute or about 280 requests per second or under 3ms per request. - // If this throughput is constant, the telemetry client flush ticker speed will increase to, at best, double twice to flush 15 seconds of data each time. - // Which will bring our max throughput to 1100 points per second or about 750µs per request. - // TODO: tweak this value once we get telemetry data from the telemetry client handle, _ := d.store.LoadOrStore(key, &distribution{key: key, values: internal.NewRingQueue[float64](1<<8, 1<<14)}) return handle @@ -74,12 +69,12 @@ type distribution struct { values *internal.RingQueue[float64] } -var logLossOnce sync.Once +var distrLogLossOnce sync.Once func (d *distribution) Submit(value float64) { d.newSubmit.Store(true) if !d.values.Enqueue(value) { - logLossOnce.Do(func() { + distrLogLossOnce.Do(func() { log.Debug("telemetry: distribution %q is losing values because the buffer is full", d.key.name) }) } @@ -99,7 +94,7 @@ func (d *distribution) payload() transport.DistributionSeries { Metric: d.key.name, Namespace: d.key.namespace, Tags: tags, - Common: knownmetrics.IsCommonMetricName(d.key.name), + Common: knownmetrics.IsCommonMetric(d.key.namespace, "distribution", d.key.name), Points: d.values.Flush(), } diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index edad33e9b2..133011a110 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -10,8 +10,8 @@ import ( "sync/atomic" globalinternal "gopkg.in/DataDog/dd-trace-go.v1/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) var ( @@ -181,6 +181,8 @@ func AddBulkAppConfig(kvs map[string]any, origin Origin) { }) } +var globalClientLogLossOnce sync.Once + // globalClientCall takes a function that takes a Client and calls it with the global client if it exists. // otherwise, it records the action for when the client is started. func globalClientCall(fun func(client Client)) { @@ -190,9 +192,13 @@ func globalClientCall(fun func(client Client)) { client := globalClient.Load() if client == nil || *client == nil { - globalClientRecorder.Record(func(client Client) { + if !globalClientRecorder.Record(func(client Client) { fun(client) - }) + }) { + globalClientLogLossOnce.Do(func() { + log.Debug("telemetry: global client recorder queue is full, dropping telemetry data, please start the telemetry client earlier to avoid data loss") + }) + } return } @@ -207,6 +213,8 @@ func newMetricsHotPointer(maker func(client Client) MetricHandle) *metricsHotPoi return &metricsHandleHotPointers[len(metricsHandleHotPointers)-1] } +var metricLogLossOnce sync.Once + // metricsHotPointer is a MetricHandle that holds a pointer to another MetricHandle and a recorder to replay actions done before the actual MetricHandle is set. type metricsHotPointer struct { ptr atomic.Pointer[MetricHandle] @@ -221,18 +229,18 @@ func (t *metricsHotPointer) Submit(value float64) { inner := t.ptr.Load() if inner == nil || *inner == nil { - t.recorder.Record(func(handle MetricHandle) { + if !t.recorder.Record(func(handle MetricHandle) { handle.Submit(value) - }) + }) { + metricLogLossOnce.Do(func() { + log.Debug("telemetry: metric is losing values because the telemetry client has not been started yet, dropping telemetry data, please start the telemetry client earlier to avoid data loss") + }) + } } (*inner).Submit(value) } -func (t *metricsHotPointer) payload() transport.MetricData { - return transport.MetricData{} -} - func (t *metricsHotPointer) swap(handle MetricHandle) { if t.ptr.Swap(&handle) == nil { t.recorder.Replay(handle) diff --git a/internal/newtelemetry/internal/knownmetrics/common_metrics.json b/internal/newtelemetry/internal/knownmetrics/common_metrics.json index f8d2dbfcea..8f79ae690d 100644 --- a/internal/newtelemetry/internal/knownmetrics/common_metrics.json +++ b/internal/newtelemetry/internal/knownmetrics/common_metrics.json @@ -1,177 +1,877 @@ [ - "code_coverage.errors", - "code_coverage.files", - "code_coverage.is_empty", - "code_coverage_finished", - "code_coverage_started", - "context_header.truncated", - "context_header_style.extracted", - "context_header_style.injected", - "docker_lib_injection.failure", - "docker_lib_injection.success", - "early_flake_detection.request", - "early_flake_detection.request_errors", - "early_flake_detection.request_ms", - "early_flake_detection.response_bytes", - "early_flake_detection.response_tests", - "endpoint_payload.bytes", - "endpoint_payload.dropped", - "endpoint_payload.events_count", - "endpoint_payload.events_serialization_ms", - "endpoint_payload.requests", - "endpoint_payload.requests_errors", - "endpoint_payload.requests_ms", - "evaluators.error", - "evaluators.init", - "evaluators.rule_sample_rate", - "evaluators.run", - "event_created", - "event_finished", - "events_enqueued_for_serialization", - "executed.propagation", - "executed.sink", - "executed.source", - "executed.tainted", - "exporter_fallback", - "flaky_tests.request", - "flaky_tests.request_errors", - "flaky_tests.request_ms", - "flaky_tests.response_bytes", - "flaky_tests.response_tests", - "git.command", - "git.command_errors", - "git.command_ms", - "git_requests.objects_pack", - "git_requests.objects_pack_bytes", - "git_requests.objects_pack_errors", - "git_requests.objects_pack_files", - "git_requests.objects_pack_ms", - "git_requests.search_commits", - "git_requests.search_commits_errors", - "git_requests.search_commits_ms", - "git_requests.settings", - "git_requests.settings_errors", - "git_requests.settings_ms", - "git_requests.settings_response", - "host_lib_injection.failure", - "host_lib_injection.success", - "impacted_tests_detection.request", - "impacted_tests_detection.request_errors", - "impacted_tests_detection.request_ms", - "impacted_tests_detection.response_bytes", - "impacted_tests_detection.response_files", - "init_time", - "inject.error", - "inject.language_detection", - "inject.latency.baseline", - "inject.latency.end_to_end", - "inject.latency.init_container", - "inject.skip", - "inject.success", - "injection.content_security_policy", - "injection.failed", - "injection.initialization.failed", - "injection.initialization.succeed", - "injection.installation", - "injection.installation.duration", - "injection.ms", - "injection.response.bytes", - "injection.skipped", - "injection.succeed", - "instrum.user_auth.missing_user_id", - "instrum.user_auth.missing_user_login", - "instrumented.propagation", - "instrumented.sink", - "instrumented.source", - "integration_errors", - "itr_forced_run", - "itr_skippable_tests.request", - "itr_skippable_tests.request_errors", - "itr_skippable_tests.request_ms", - "itr_skippable_tests.response_bytes", - "itr_skippable_tests.response_suites", - "itr_skippable_tests.response_tests", - "itr_skipped", - "itr_unskippable", - "json.tag.size.exceeded", - "k8s_lib_injection.failure", - "k8s_lib_injection.success", - "known_tests.request", - "known_tests.request_errors", - "known_tests.request_ms", - "known_tests.response_bytes", - "known_tests.response_tests", - "library_entrypoint.abort", - "library_entrypoint.abort.integration", - "library_entrypoint.abort.runtime", - "library_entrypoint.complete", - "library_entrypoint.error", - "library_entrypoint.injector.error", - "logs_created", - "manual_api_events", - "otel.env.hiding", - "otel.env.invalid", - "otel.env.unsupported", - "profile_api.bytes", - "profile_api.errors", - "profile_api.ms", - "profile_api.requests", - "profile_api.responses", - "rasp.rule.eval", - "rasp.rule.match", - "rasp.timeout", - "request.tainted", - "server.active_sessions", - "server.memory_usage", - "server.submitted_payloads", - "span.start", - "span_created", - "span_finished", - "span_pointer_calculation", - "span_pointer_calculation.issue", - "spans_created", - "spans_dropped", - "spans_enqueued_for_serialization", - "spans_finished", - "ssi_heuristic.number_of_profiles", - "ssi_heuristic.number_of_runtime_id", - "stats_api.bytes", - "stats_api.errors", - "stats_api.ms", - "stats_api.requests", - "stats_api.responses", - "stats_buckets", - "suppressed.vulnerabilities", - "telemetry_api.bytes", - "telemetry_api.errors", - "telemetry_api.ms", - "telemetry_api.requests", - "telemetry_api.responses", - "test_session", - "trace_api.bytes", - "trace_api.errors", - "trace_api.ms", - "trace_api.requests", - "trace_api.responses", - "trace_chunk_serialization.bytes", - "trace_chunk_serialization.ms", - "trace_chunk_size", - "trace_chunks_dropped", - "trace_chunks_enqueued", - "trace_chunks_enqueued_for_serialization", - "trace_chunks_sent", - "trace_partial_flush.count", - "trace_partial_flush.spans_closed", - "trace_partial_flush.spans_remaining", - "trace_segments_closed", - "trace_segments_created", - "tracer_init_time", - "waf.config_errors", - "waf.duration", - "waf.duration_ext", - "waf.init", - "waf.input_truncated", - "waf.requests", - "waf.truncated_value_size", - "waf.updates" + { + "namespace": "appsec", + "type": "count", + "name": "instrum.user_auth.missing_user_id" + }, + { + "namespace": "appsec", + "type": "count", + "name": "instrum.user_auth.missing_user_login" + }, + { + "namespace": "appsec", + "type": "count", + "name": "rasp.rule.eval" + }, + { + "namespace": "appsec", + "type": "count", + "name": "rasp.rule.match" + }, + { + "namespace": "appsec", + "type": "count", + "name": "rasp.timeout" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.config_errors" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.init" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.input_truncated" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.requests" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.updates" + }, + { + "namespace": "appsec", + "type": "distribution", + "name": "waf.duration" + }, + { + "namespace": "appsec", + "type": "distribution", + "name": "waf.duration_ext" + }, + { + "namespace": "appsec", + "type": "distribution", + "name": "waf.truncated_value_size" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "code_coverage.errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "code_coverage.is_empty" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "code_coverage_finished" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "code_coverage_started" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "early_flake_detection.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "early_flake_detection.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "endpoint_payload.dropped" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "endpoint_payload.requests" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "endpoint_payload.requests_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "event_created" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "event_finished" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "events_enqueued_for_serialization" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "flaky_tests.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "flaky_tests.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git.command" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git.command_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.objects_pack" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.objects_pack_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.search_commits" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.search_commits_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.settings" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.settings_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.settings_response" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "impacted_tests_detection.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "impacted_tests_detection.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_forced_run" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skippable_tests.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skippable_tests.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skippable_tests.response_suites" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skippable_tests.response_tests" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skipped" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_unskippable" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "known_tests.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "known_tests.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "manual_api_events" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "test_session" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "code_coverage.files" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "early_flake_detection.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "early_flake_detection.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "early_flake_detection.response_tests" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "endpoint_payload.bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "endpoint_payload.events_count" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "endpoint_payload.events_serialization_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "endpoint_payload.requests_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "flaky_tests.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "flaky_tests.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "flaky_tests.response_tests" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git.command_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.objects_pack_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.objects_pack_files" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.objects_pack_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.search_commits_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.settings_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "impacted_tests_detection.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "impacted_tests_detection.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "impacted_tests_detection.response_files" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "itr_skippable_tests.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "itr_skippable_tests.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "known_tests.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "known_tests.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "known_tests.response_tests" + }, + { + "namespace": "general", + "type": "count", + "name": "logs_created" + }, + { + "namespace": "general", + "type": "distribution", + "name": "init_time" + }, + { + "namespace": "general", + "type": "distribution", + "name": "tracer_init_time" + }, + { + "namespace": "iast", + "type": "count", + "name": "executed.propagation" + }, + { + "namespace": "iast", + "type": "count", + "name": "executed.sink" + }, + { + "namespace": "iast", + "type": "count", + "name": "executed.source" + }, + { + "namespace": "iast", + "type": "count", + "name": "executed.tainted" + }, + { + "namespace": "iast", + "type": "count", + "name": "instrumented.propagation" + }, + { + "namespace": "iast", + "type": "count", + "name": "instrumented.sink" + }, + { + "namespace": "iast", + "type": "count", + "name": "instrumented.source" + }, + { + "namespace": "iast", + "type": "count", + "name": "json.tag.size.exceeded" + }, + { + "namespace": "iast", + "type": "count", + "name": "request.tainted" + }, + { + "namespace": "iast", + "type": "count", + "name": "suppressed.vulnerabilities" + }, + { + "namespace": "mlobs", + "type": "count", + "name": "evaluators.error" + }, + { + "namespace": "mlobs", + "type": "count", + "name": "evaluators.init" + }, + { + "namespace": "mlobs", + "type": "count", + "name": "evaluators.run" + }, + { + "namespace": "mlobs", + "type": "count", + "name": "span.start" + }, + { + "namespace": "mlobs", + "type": "distribution", + "name": "evaluators.rule_sample_rate" + }, + { + "namespace": "profilers", + "type": "count", + "name": "profile_api.errors" + }, + { + "namespace": "profilers", + "type": "count", + "name": "profile_api.requests" + }, + { + "namespace": "profilers", + "type": "count", + "name": "profile_api.responses" + }, + { + "namespace": "profilers", + "type": "count", + "name": "ssi_heuristic.number_of_profiles" + }, + { + "namespace": "profilers", + "type": "count", + "name": "ssi_heuristic.number_of_runtime_id" + }, + { + "namespace": "profilers", + "type": "distribution", + "name": "profile_api.bytes" + }, + { + "namespace": "profilers", + "type": "distribution", + "name": "profile_api.ms" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.content_security_policy" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.failed" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.initialization.failed" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.initialization.succeed" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.installation" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.skipped" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.succeed" + }, + { + "namespace": "rum", + "type": "distribution", + "name": "injection.installation.duration" + }, + { + "namespace": "rum", + "type": "distribution", + "name": "injection.ms" + }, + { + "namespace": "rum", + "type": "distribution", + "name": "injection.response.bytes" + }, + { + "namespace": "sidecar", + "type": "count", + "name": "server.submitted_payloads" + }, + { + "namespace": "sidecar", + "type": "distribution", + "name": "server.memory_usage" + }, + { + "namespace": "sidecar", + "type": "gauge", + "name": "server.active_sessions" + }, + { + "namespace": "telemetry", + "type": "count", + "name": "telemetry_api.errors" + }, + { + "namespace": "telemetry", + "type": "count", + "name": "telemetry_api.requests" + }, + { + "namespace": "telemetry", + "type": "count", + "name": "telemetry_api.responses" + }, + { + "namespace": "telemetry", + "type": "distribution", + "name": "telemetry_api.bytes" + }, + { + "namespace": "telemetry", + "type": "distribution", + "name": "telemetry_api.ms" + }, + { + "namespace": "tracers", + "type": "count", + "name": "context_header.truncated" + }, + { + "namespace": "tracers", + "type": "count", + "name": "context_header_style.extracted" + }, + { + "namespace": "tracers", + "type": "count", + "name": "context_header_style.injected" + }, + { + "namespace": "tracers", + "type": "count", + "name": "docker_lib_injection.failure" + }, + { + "namespace": "tracers", + "type": "count", + "name": "docker_lib_injection.success" + }, + { + "namespace": "tracers", + "type": "count", + "name": "exporter_fallback" + }, + { + "namespace": "tracers", + "type": "count", + "name": "host_lib_injection.failure" + }, + { + "namespace": "tracers", + "type": "count", + "name": "host_lib_injection.success" + }, + { + "namespace": "tracers", + "type": "count", + "name": "inject.error" + }, + { + "namespace": "tracers", + "type": "count", + "name": "inject.language_detection" + }, + { + "namespace": "tracers", + "type": "count", + "name": "inject.skip" + }, + { + "namespace": "tracers", + "type": "count", + "name": "inject.success" + }, + { + "namespace": "tracers", + "type": "count", + "name": "integration_errors" + }, + { + "namespace": "tracers", + "type": "count", + "name": "k8s_lib_injection.failure" + }, + { + "namespace": "tracers", + "type": "count", + "name": "k8s_lib_injection.success" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.abort" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.abort.integration" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.abort.runtime" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.complete" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.error" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.injector.error" + }, + { + "namespace": "tracers", + "type": "count", + "name": "otel.env.hiding" + }, + { + "namespace": "tracers", + "type": "count", + "name": "otel.env.invalid" + }, + { + "namespace": "tracers", + "type": "count", + "name": "otel.env.unsupported" + }, + { + "namespace": "tracers", + "type": "count", + "name": "span_created" + }, + { + "namespace": "tracers", + "type": "count", + "name": "span_finished" + }, + { + "namespace": "tracers", + "type": "count", + "name": "span_pointer_calculation" + }, + { + "namespace": "tracers", + "type": "count", + "name": "span_pointer_calculation.issue" + }, + { + "namespace": "tracers", + "type": "count", + "name": "spans_created" + }, + { + "namespace": "tracers", + "type": "count", + "name": "spans_dropped" + }, + { + "namespace": "tracers", + "type": "count", + "name": "spans_enqueued_for_serialization" + }, + { + "namespace": "tracers", + "type": "count", + "name": "spans_finished" + }, + { + "namespace": "tracers", + "type": "count", + "name": "stats_api.errors" + }, + { + "namespace": "tracers", + "type": "count", + "name": "stats_api.requests" + }, + { + "namespace": "tracers", + "type": "count", + "name": "stats_api.responses" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_api.errors" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_api.requests" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_api.responses" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_chunks_dropped" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_chunks_enqueued" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_chunks_enqueued_for_serialization" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_chunks_sent" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_partial_flush.count" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_segments_closed" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_segments_created" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "inject.latency.baseline" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "inject.latency.end_to_end" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "inject.latency.init_container" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "stats_api.bytes" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "stats_api.ms" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_api.bytes" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_api.ms" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_chunk_serialization.bytes" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_chunk_serialization.ms" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_chunk_size" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_partial_flush.spans_closed" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_partial_flush.spans_remaining" + }, + { + "namespace": "tracers", + "type": "gauge", + "name": "stats_buckets" + } ] diff --git a/internal/newtelemetry/internal/knownmetrics/generator/generator.go b/internal/newtelemetry/internal/knownmetrics/generator/generator.go index 031cddecdc..71504f31c5 100644 --- a/internal/newtelemetry/internal/knownmetrics/generator/generator.go +++ b/internal/newtelemetry/internal/knownmetrics/generator/generator.go @@ -20,6 +20,9 @@ import ( "strings" "golang.org/x/exp/slices" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/knownmetrics" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) // This represents the base64-encoded URL of api.github.com to download the configuration file. @@ -34,7 +37,7 @@ func base64Decode(encoded string) string { return string(decoded) } -func downloadFromDdgo(remoteURL, localPath, branch, token string, getMetricNames func(map[string]any) []string) error { +func downloadFromDdgo(remoteURL, localPath, branch, token string, getMetricNames func(map[string]any) []knownmetrics.Declaration) error { request, err := http.NewRequest(http.MethodGet, remoteURL, nil) if err != nil { return err @@ -66,7 +69,15 @@ func downloadFromDdgo(remoteURL, localPath, branch, token string, getMetricNames } metricNames := getMetricNames(decoded) - slices.SortStableFunc(metricNames, strings.Compare) + slices.SortStableFunc(metricNames, func(i, j knownmetrics.Declaration) int { + if i.Namespace != j.Namespace { + return strings.Compare(string(i.Namespace), string(j.Namespace)) + } + if i.Type != j.Type { + return strings.Compare(i.Type, j.Type) + } + return strings.Compare(i.Name, j.Name) + }) fp, err := os.OpenFile(localPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) if err != nil { @@ -80,8 +91,8 @@ func downloadFromDdgo(remoteURL, localPath, branch, token string, getMetricNames return encoder.Encode(metricNames) } -func getCommonMetricNames(input map[string]any) []string { - var names []string +func getCommonMetricNames(input map[string]any) []knownmetrics.Declaration { + var names []knownmetrics.Declaration for category, value := range input { if strings.HasPrefix(category, "$") { continue @@ -89,10 +100,16 @@ func getCommonMetricNames(input map[string]any) []string { metrics := value.(map[string]any) for metricKey, value := range metrics { - names = append(names, metricKey) + metric := knownmetrics.Declaration{ + Namespace: transport.Namespace(category), + Name: metricKey, + Type: value.(map[string]any)["metric_type"].(string), + } + names = append(names, metric) if aliases, ok := value.(map[string]any)["aliases"]; ok { for _, alias := range aliases.([]any) { - names = append(names, alias.(string)) + metric.Name = alias.(string) + names = append(names, metric) } } } @@ -100,13 +117,16 @@ func getCommonMetricNames(input map[string]any) []string { return names } -func getGoMetricNames(input map[string]any) []string { - var names []string - for key := range input { +func getGoMetricNames(input map[string]any) []knownmetrics.Declaration { + var names []knownmetrics.Declaration + for key, value := range input { if strings.HasPrefix(key, "$") { continue } - names = append(names, key) + names = append(names, knownmetrics.Declaration{ + Name: key, + Type: value.(map[string]any)["metric_type"].(string), + }) } return names } diff --git a/internal/newtelemetry/internal/knownmetrics/golang_metrics.json b/internal/newtelemetry/internal/knownmetrics/golang_metrics.json index 54396b6e4a..c2c581bf85 100644 --- a/internal/newtelemetry/internal/knownmetrics/golang_metrics.json +++ b/internal/newtelemetry/internal/knownmetrics/golang_metrics.json @@ -1,3 +1,7 @@ [ - "orchestrion.enabled" + { + "namespace": "", + "type": "gauge", + "name": "orchestrion.enabled" + } ] diff --git a/internal/newtelemetry/internal/knownmetrics/known_metrics.go b/internal/newtelemetry/internal/knownmetrics/known_metrics.go index 858c1918ac..14a9991752 100644 --- a/internal/newtelemetry/internal/knownmetrics/known_metrics.go +++ b/internal/newtelemetry/internal/knownmetrics/known_metrics.go @@ -13,6 +13,7 @@ import ( "slices" "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) //go:embed common_metrics.json @@ -21,33 +22,42 @@ var commonMetricsJSON []byte //go:embed golang_metrics.json var golangMetricsJSON []byte +type Declaration struct { + Namespace transport.Namespace `json:"namespace"` + Type string `json:"type"` + Name string `json:"name"` +} + var ( commonMetrics = parseMetricNames(commonMetricsJSON) golangMetrics = parseMetricNames(golangMetricsJSON) ) -func parseMetricNames(bytes []byte) []string { - var names []string +func parseMetricNames(bytes []byte) []Declaration { + var names []Declaration if err := json.Unmarshal(bytes, &names); err != nil { log.Error("telemetry: failed to parse metric names: %v", err) } return names } -// IsKnownMetricName returns true if the given metric name is a known metric by the backend +// IsKnownMetric returns true if the given metric name is a known metric by the backend // This is linked to generated common_metrics.json file and golang_metrics.json file. If you added new metrics to the backend, you should rerun the generator. -func IsKnownMetricName(name string) bool { - return slices.Contains(commonMetrics, name) || slices.Contains(golangMetrics, name) +func IsKnownMetric(namespace transport.Namespace, typ, name string) bool { + decl := Declaration{Namespace: namespace, Type: typ, Name: name} + return slices.Contains(commonMetrics, decl) || slices.Contains(golangMetrics, decl) } -// IsCommonMetricName returns true if the given metric name is a known common (cross-language) metric by the backend +// IsCommonMetric returns true if the given metric name is a known common (cross-language) metric by the backend // This is linked to the generated common_metrics.json file. If you added new metrics to the backend, you should rerun the generator. -func IsCommonMetricName(name string) bool { - return slices.Contains(commonMetrics, name) +func IsCommonMetric(namespace transport.Namespace, typ, name string) bool { + decl := Declaration{Namespace: namespace, Type: typ, Name: name} + return slices.Contains(commonMetrics, decl) } -// IsLanguageMetricName returns true if the given metric name is a known Go language metric by the backend +// IsLanguageMetric returns true if the given metric name is a known Go language metric by the backend // This is linked to the generated golang_metrics.json file. If you added new metrics to the backend, you should rerun the generator. -func IsLanguageMetricName(name string) bool { - return slices.Contains(golangMetrics, name) +func IsLanguageMetric(typ, name string) bool { + decl := Declaration{Type: typ, Name: name} + return slices.Contains(golangMetrics, decl) } diff --git a/internal/newtelemetry/internal/recorder.go b/internal/newtelemetry/internal/recorder.go index edfb843524..f7b1aa8793 100644 --- a/internal/newtelemetry/internal/recorder.go +++ b/internal/newtelemetry/internal/recorder.go @@ -5,17 +5,13 @@ package internal -import ( - "gopkg.in/DataDog/dd-trace-go.v1/internal/log" -) - // Recorder is a generic thread-safe type that records functions that could have taken place before object T was created. // Once object T is created, the Recorder can replay all the recorded functions with object T as an argument. type Recorder[T any] struct { queue *RingQueue[func(T)] } -// NewRecorder creates a new [Recorder] instance. with 512 as the maximum number of recorded functions. +// NewRecorder creates a new [Recorder] instance. with 512 as the maximum number of recorded functions before overflowing. func NewRecorder[T any]() Recorder[T] { return Recorder[T]{ // TODO: tweak this value once we get telemetry data from the telemetry client @@ -23,13 +19,13 @@ func NewRecorder[T any]() Recorder[T] { } } -func (r Recorder[T]) Record(f func(T)) { +// Record takes a function and records it in the Recorder's queue. If the queue is full, it returns false. +// Once Replay is called, all recorded functions will be replayed with object T as an argument in order of recording. +func (r Recorder[T]) Record(f func(T)) bool { if r.queue == nil { - return - } - if !r.queue.Enqueue(f) { - log.Debug("telemetry: recorder queue is full, dropping record") + return true } + return r.queue.Enqueue(f) } func (r Recorder[T]) Replay(t T) { diff --git a/internal/newtelemetry/internal/transport/namespace.go b/internal/newtelemetry/internal/transport/namespace.go index 9c4dfe1f37..02d94aca19 100644 --- a/internal/newtelemetry/internal/transport/namespace.go +++ b/internal/newtelemetry/internal/transport/namespace.go @@ -8,10 +8,13 @@ package transport type Namespace string const ( - NamespaceGeneral Namespace = "general" - NamespaceTracers Namespace = "tracers" - NamespaceProfilers Namespace = "profilers" - NamespaceAppSec Namespace = "appsec" - NamespaceIAST Namespace = "iast" - NamespaceTelemetry Namespace = "telemetry" + NamespaceGeneral Namespace = "general" + NamespaceTracers Namespace = "tracers" + NamespaceProfilers Namespace = "profilers" + NamespaceAppSec Namespace = "appsec" + NamespaceIAST Namespace = "iast" + NamespaceTelemetry Namespace = "telemetry" + NamespaceCIVisibility Namespace = "civisibility" + NamespaceMLOps Namespace = "mlops" + NamespaceRUM Namespace = "rum" ) diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go index e2302c9929..34b64759f0 100644 --- a/internal/newtelemetry/internal/writer.go +++ b/internal/newtelemetry/internal/writer.go @@ -98,6 +98,8 @@ type EndpointRequestResult struct { PayloadByteSize int // CallDuration is the duration of the call to the endpoint if the call was successful CallDuration time.Duration + // StatusCode is the status code of the response from the endpoint even if the call failed but only with an actual HTTP error + StatusCode int } type writer struct { @@ -257,7 +259,7 @@ func (w *writer) Flush(payload transport.Payload) ([]EndpointRequestResult, erro results = append(results, EndpointRequestResult{Error: &WriterStatusCodeError{ StatusCode: response.Status, Body: string(respBodyBytes), - }}) + }, StatusCode: response.StatusCode}) continue } diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go index fe2142c0d5..7a160d7032 100644 --- a/internal/newtelemetry/metrics.go +++ b/internal/newtelemetry/metrics.go @@ -37,7 +37,7 @@ type metrics struct { // LoadOrStore returns a MetricHandle for the given metric key. If the metric key does not exist, it will be created. func (m *metrics) LoadOrStore(namespace Namespace, kind transport.MetricType, name string, tags map[string]string) MetricHandle { - if !knownmetrics.IsKnownMetricName(name) { + if !knownmetrics.IsKnownMetric(namespace, string(kind), name) { log.Debug("telemetry: metric name %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ "The metric will still be sent.", name) } @@ -108,7 +108,7 @@ func (c *metric) payload() transport.MetricData { Namespace: c.key.namespace, Tags: tags, Type: c.key.kind, - Common: knownmetrics.IsCommonMetricName(c.key.name), + Common: knownmetrics.IsCommonMetric(c.key.namespace, string(c.key.kind), c.key.name), Points: [][2]any{ {c.submitTime.Load(), c.value.Load()}, },