diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7292c8fee1..6b8b07351d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,8 +9,7 @@ variables: INDEX_FILE: index.txt KUBERNETES_SERVICE_ACCOUNT_OVERWRITE: dd-trace-go FF_USE_LEGACY_KUBERNETES_EXECUTION_STRATEGY: "true" - BENCHMARK_TARGETS: "BenchmarkStartRequestSpan|BenchmarkHttpServeTrace|BenchmarkTracerAddSpans|BenchmarkStartSpan|BenchmarkSingleSpanRetention|BenchmarkOTelApiWithCustomTags|BenchmarkInjectW3C|BenchmarkExtractW3C|BenchmarkPartialFlushing|BenchmarkGraphQL|BenchmarkSampleWAFContext|BenchmarkCaptureStackTrace|BenchmarkSetTagString|BenchmarkSetTagStringPtr|BenchmarkSetTagMetric|BenchmarkSetTagStringer|BenchmarkSerializeSpanLinksInMeta" - + BENCHMARK_TARGETS: "BenchmarkStartRequestSpan|BenchmarkHttpServeTrace|BenchmarkTracerAddSpans|BenchmarkStartSpan|BenchmarkSingleSpanRetention|BenchmarkOTelApiWithCustomTags|BenchmarkInjectW3C|BenchmarkExtractW3C|BenchmarkPartialFlushing|BenchmarkGraphQL|BenchmarkSampleWAFContext|BenchmarkCaptureStackTrace|BenchmarkSetTagString|BenchmarkSetTagStringPtr|BenchmarkSetTagMetric|BenchmarkSetTagStringer|BenchmarkSerializeSpanLinksInMeta|BenchmarkLogs|BenchmarkWorstCaseScenarioFloodLogging|BenchmarkMetrics|BenchmarkWorstCaseScenarioFloodMetrics" include: - ".gitlab/benchmarks.yml" - ".gitlab/macrobenchmarks.yml" diff --git a/internal/globalconfig/globalconfig.go b/internal/globalconfig/globalconfig.go index a36f50035f..2d6d21e35e 100644 --- a/internal/globalconfig/globalconfig.go +++ b/internal/globalconfig/globalconfig.go @@ -9,6 +9,7 @@ package globalconfig import ( "math" + "os" "sync" "gopkg.in/DataDog/dd-trace-go.v1/internal" @@ -130,3 +131,18 @@ func HeaderTagsLen() int { func ClearHeaderTags() { cfg.headersAsTags.Clear() } + +// InstrumentationInstallID returns the install ID as described in DD_INSTRUMENTATION_INSTALL_ID +func InstrumentationInstallID() string { + return os.Getenv("DD_INSTRUMENTATION_INSTALL_ID") +} + +// InstrumentationInstallType returns the install type as described in DD_INSTRUMENTATION_INSTALL_TYPE +func InstrumentationInstallType() string { + return os.Getenv("DD_INSTRUMENTATION_INSTALL_TYPE") +} + +// InstrumentationInstallTime returns the install time as described in DD_INSTRUMENTATION_INSTALL_TIME +func InstrumentationInstallTime() string { + return os.Getenv("DD_INSTRUMENTATION_INSTALL_TIME") +} diff --git a/internal/newtelemetry/README.md b/internal/newtelemetry/README.md new file mode 100644 index 0000000000..5e940ddd32 --- /dev/null +++ b/internal/newtelemetry/README.md @@ -0,0 +1,43 @@ +# Instrumentation Telemetry Client + +This documentation details the current architecture of the Instrumentation Telemetry Client of dd-trace-go and was are its capabilities. + +### Data Flow + +```mermaid +flowchart TD + linkStyle default interpolate basis + globalclient@{ shape: circle } -->|client == nil| recorder + globalclient -->|client != nil| client + recorder@{ shape: cyl } --> client@{ shape: circle } + + subgraph datasources + integrations@{ shape: cyl } + configuration@{ shape: cyl } + dependencies@{ shape: cyl } + products@{ shape: cyl } + logs@{ shape: cyl } + metrics@{ shape: cyl } + end + + client --> datasources + + subgraph mapper + direction LR + app-started --> + default[message-batch
heartbeat
extended-heartbeat] --> app-closing + end + + flush@{ shape:rounded } + + queue@{ shape: cyl } --> flush + + datasources -..->|at flush| mapper --> flush + flush -->|if writer fails| queue + + flush --> writer + + writer --> agent@{ shape: das } + writer --> backend@{ shape: stadium } + agent --> backend +``` diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go new file mode 100644 index 0000000000..41cba3e6c7 --- /dev/null +++ b/internal/newtelemetry/api.go @@ -0,0 +1,152 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +import ( + "io" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +// Namespace describes a product to distinguish telemetry coming from +// different products used by the same application +type Namespace = transport.Namespace + +// Goland is having a hard time with the following const block, it keeps deleting the type +// +//goland:noinspection GoVarAndConstTypeMayBeOmitted +const ( + NamespaceGeneral Namespace = transport.NamespaceGeneral + NamespaceTracers Namespace = transport.NamespaceTracers + NamespaceProfilers Namespace = transport.NamespaceProfilers + NamespaceAppSec Namespace = transport.NamespaceAppSec + NamespaceIAST Namespace = transport.NamespaceIAST + NamespaceCIVisibility Namespace = transport.NamespaceCIVisibility + NamespaceMLOps Namespace = transport.NamespaceMLOps + NamespaceRUM Namespace = transport.NamespaceRUM +) + +// Origin describes the source of a configuration change +type Origin = transport.Origin + +// Goland is having a hard time with the following const block, it keeps deleting the type +// +//goland:noinspection GoVarAndConstTypeMayBeOmitted +const ( + OriginDefault Origin = transport.OriginDefault + OriginCode Origin = transport.OriginCode + OriginDDConfig Origin = transport.OriginDDConfig + OriginEnvVar Origin = transport.OriginEnvVar + OriginRemoteConfig Origin = transport.OriginRemoteConfig +) + +// LogLevel describes the level of a log message +type LogLevel = transport.LogLevel + +// Goland is having a hard time with the following const block, it keeps deleting the type +// +//goland:noinspection GoVarAndConstTypeMayBeOmitted +const ( + LogDebug LogLevel = transport.LogLevelDebug + LogWarn LogLevel = transport.LogLevelWarn + LogError LogLevel = transport.LogLevelError +) + +// MetricHandle can be used to submit different values for the same metric. +// MetricHandle is used to reduce lock contention when submitting metrics. +// This can also be used ephemerally to submit a single metric value like this: +// +// ```go +// telemetry.metric(telemetry.Appsec, "my-count", map[string]string{"tag1": "true", "tag2": "1.0"}).Submit(1.0) +// ``` +type MetricHandle interface { + // Submit submits a value to the metric handle. + Submit(value float64) + // Get returns the last value submitted to the metric handle. + Get() float64 +} + +// Integration is an integration that is configured to be traced. +type Integration struct { + // Name is an arbitrary string that must stay constant for the integration. + Name string + // Version is the version of the integration/dependency that is being loaded. + Version string + // Error is the error that occurred while loading the integration. If this field is specified, the integration is + // considered to be having been forcefully disabled because of the error. + Error string +} + +// Configuration is a key-value pair that is used to configure the application. +type Configuration struct { + // Key is the key of the configuration. + Name string + // Value is the value of the configuration. Need to be json serializable. + Value any + // Origin is the source of the configuration change. + Origin Origin +} + +// LogOption is a function that modifies the log message that is sent to the telemetry. +type LogOption func(key *loggerKey, value *loggerValue) + +// Client constitutes all the functions available concurrently for the telemetry users. All methods are thread-safe +// This is an interface for easier testing but all functions will be mirrored at the package level to call +// the global client. +type Client interface { + io.Closer + + // Count obtains the metric handle for the given parameters, or creates a new one if none was created just yet. + // Tags cannot contain commas. + Count(namespace Namespace, name string, tags []string) MetricHandle + + // Rate obtains the metric handle for the given parameters, or creates a new one if none was created just yet. + // Tags cannot contain commas. + Rate(namespace Namespace, name string, tags []string) MetricHandle + + // Gauge obtains the metric handle for the given parameters, or creates a new one if none was created just yet. + // Tags cannot contain commas. + Gauge(namespace Namespace, name string, tags []string) MetricHandle + + // Distribution obtains the metric handle for the given parameters, or creates a new one if none was created just yet. + // Tags cannot contain commas. + Distribution(namespace Namespace, name string, tags []string) MetricHandle + + // Log sends a telemetry log at the desired level with the given text and options. + // Options include sending key-value pairs as tags, and a stack trace frozen from inside the Log function. + Log(level LogLevel, text string, options ...LogOption) + + // ProductStarted declares a product to have started at the customer’s request + ProductStarted(product Namespace) + + // ProductStopped declares a product to have being stopped by the customer + ProductStopped(product Namespace) + + // ProductStartError declares that a product could not start because of the following error + ProductStartError(product Namespace, err error) + + // RegisterAppConfig adds a key value pair to the app configuration and send the change to telemetry + // value has to be json serializable and the origin is the source of the change. + RegisterAppConfig(key string, value any, origin Origin) + + // RegisterAppConfigs adds a list of key value pairs to the app configuration and sends the change to telemetry. + // Same as AddAppConfig but for multiple values. + RegisterAppConfigs(kvs ...Configuration) + + // MarkIntegrationAsLoaded marks an integration as loaded in the telemetry + MarkIntegrationAsLoaded(integration Integration) + + // Flush closes the client and flushes any remaining data. + Flush() + + // AppStart sends the telemetry necessary to signal that the app is starting. + // Preferred use via StartApp package level function + AppStart() + + // AppStop sends the telemetry necessary to signal that the app is stopping. + // Preferred use via StopApp package level function + AppStop() +} diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go new file mode 100644 index 0000000000..a37d25629d --- /dev/null +++ b/internal/newtelemetry/client.go @@ -0,0 +1,344 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +import ( + "errors" + "os" + "strconv" + "sync" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/mapper" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +// NewClient creates a new telemetry client with the given service, environment, and version and config. +func NewClient(service, env, version string, config ClientConfig) (Client, error) { + if service == "" { + return nil, errors.New("service name must not be empty") + } + + config = defaultConfig(config) + if err := config.validateConfig(); err != nil { + return nil, err + } + + return newClient(internal.TracerConfig{Service: service, Env: env, Version: version}, config) +} + +func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client, error) { + writerConfig, err := newWriterConfig(config, tracerConfig) + if err != nil { + return nil, err + } + + writer, err := internal.NewWriter(writerConfig) + if err != nil { + return nil, err + } + + client := &client{ + tracerConfig: tracerConfig, + writer: writer, + clientConfig: config, + flushMapper: mapper.NewDefaultMapper(config.HeartbeatInterval, config.ExtendedHeartbeatInterval), + payloadQueue: internal.NewRingQueue[transport.Payload](config.PayloadQueueSize), + + dependencies: dependencies{ + DependencyLoader: config.DependencyLoader, + }, + metrics: metrics{ + skipAllowlist: config.Debug, + pool: internal.NewSyncPool(func() *metricPoint { return &metricPoint{} }), + }, + distributions: distributions{ + skipAllowlist: config.Debug, + queueSize: config.DistributionsSize, + pool: internal.NewSyncPool(func() []float64 { return make([]float64, config.DistributionsSize.Min) }), + }, + } + + client.dataSources = append(client.dataSources, + &client.integrations, + &client.products, + &client.configuration, + &client.dependencies, + ) + + if config.LogsEnabled { + client.dataSources = append(client.dataSources, &client.logger) + } + + if config.MetricsEnabled { + client.dataSources = append(client.dataSources, &client.metrics, &client.distributions) + } + + client.flushTicker = internal.NewTicker(client.Flush, config.FlushInterval) + + return client, nil +} + +// dataSources is where the data that will be flushed is coming from. I.e metrics, logs, configurations, etc. +type dataSource interface { + Payload() transport.Payload +} + +type client struct { + tracerConfig internal.TracerConfig + clientConfig ClientConfig + + // Data sources + dataSources []dataSource + integrations integrations + products products + configuration configuration + dependencies dependencies + logger logger + metrics metrics + distributions distributions + + // flushMapper is the transformer to use for the next flush on the gathered bodies on this tick + flushMapper mapper.Mapper + flushMapperMu sync.Mutex + + // flushTicker is the ticker that triggers a call to client.Flush every flush interval + flushTicker *internal.Ticker + + // writer is the writer to use to send the payloads to the backend or the agent + writer internal.Writer + + // payloadQueue is used when we cannot flush previously built payload for multiple reasons. + payloadQueue *internal.RingQueue[transport.Payload] +} + +func (c *client) Log(level LogLevel, text string, options ...LogOption) { + if !c.clientConfig.LogsEnabled { + return + } + + c.logger.Add(level, text, options...) +} + +func (c *client) MarkIntegrationAsLoaded(integration Integration) { + c.integrations.Add(integration) +} + +func (c *client) Count(namespace Namespace, name string, tags []string) MetricHandle { + if !c.clientConfig.MetricsEnabled { + return noopMetricHandle{} + } + return c.metrics.LoadOrStore(namespace, transport.CountMetric, name, tags) +} + +func (c *client) Rate(namespace Namespace, name string, tags []string) MetricHandle { + if !c.clientConfig.MetricsEnabled { + return noopMetricHandle{} + } + return c.metrics.LoadOrStore(namespace, transport.RateMetric, name, tags) +} + +func (c *client) Gauge(namespace Namespace, name string, tags []string) MetricHandle { + if !c.clientConfig.MetricsEnabled { + return noopMetricHandle{} + } + return c.metrics.LoadOrStore(namespace, transport.GaugeMetric, name, tags) +} + +func (c *client) Distribution(namespace Namespace, name string, tags []string) MetricHandle { + if !c.clientConfig.MetricsEnabled { + return noopMetricHandle{} + } + return c.distributions.LoadOrStore(namespace, name, tags) +} + +func (c *client) ProductStarted(product Namespace) { + c.products.Add(product, true, nil) +} + +func (c *client) ProductStopped(product Namespace) { + c.products.Add(product, false, nil) +} + +func (c *client) ProductStartError(product Namespace, err error) { + c.products.Add(product, false, err) +} + +func (c *client) RegisterAppConfig(key string, value any, origin Origin) { + c.configuration.Add(Configuration{key, value, origin}) +} + +func (c *client) RegisterAppConfigs(kvs ...Configuration) { + for _, value := range kvs { + c.configuration.Add(value) + } +} + +func (c *client) Config() ClientConfig { + return c.clientConfig +} + +// Flush sends all the data sources before calling flush +// This function is called by the flushTicker so it should not panic, or it will crash the whole customer application. +// If a panic occurs, we stop the telemetry and log the error. +func (c *client) Flush() { + defer func() { + r := recover() + if r == nil { + return + } + log.Warn("panic while flushing telemetry data, stopping telemetry: %v", r) + telemetryClientDisabled = true + if gc, ok := GlobalClient().(*client); ok && gc == c { + SwapClient(nil) + } + }() + + payloads := make([]transport.Payload, 0, 8) + for _, ds := range c.dataSources { + if payload := ds.Payload(); payload != nil { + payloads = append(payloads, payload) + } + } + + nbBytes, err := c.flush(payloads) + if err != nil { + log.Warn("telemetry: error while flushing: %v", err) + } + + if c.clientConfig.Debug { + log.Debug("telemetry: flushed %d bytes of data", nbBytes) + } +} + +func (c *client) transform(payloads []transport.Payload) []transport.Payload { + c.flushMapperMu.Lock() + defer c.flushMapperMu.Unlock() + payloads, c.flushMapper = c.flushMapper.Transform(payloads) + return payloads +} + +// flush sends all the data sources to the writer after having sent them through the [transform] function. +// It returns the amount of bytes sent to the writer. +func (c *client) flush(payloads []transport.Payload) (int, error) { + payloads = c.transform(payloads) + + if c.payloadQueue.IsEmpty() && len(payloads) == 0 { + return 0, nil + } + + emptyQueue := c.payloadQueue.IsEmpty() + // We enqueue the new payloads to preserve the order of the payloads + c.payloadQueue.Enqueue(payloads...) + payloads = c.payloadQueue.Flush() + + var ( + nbBytes int + speedIncreased bool + failedCalls []internal.EndpointRequestResult + ) + + for i, payload := range payloads { + results, err := c.writer.Flush(payload) + c.computeFlushMetrics(results, err) + if err != nil { + // We stop flushing when we encounter a fatal error, put the bodies in the queue and return the error + if results[len(results)-1].StatusCode == 413 { // If the payload is too large we have no way to divide it, we can only skip it... + log.Warn("telemetry: tried sending a payload that was too large, dropping it") + continue + } + c.payloadQueue.Enqueue(payloads[i:]...) + return nbBytes, err + } + + failedCalls = append(failedCalls, results[:len(results)-1]...) + successfulCall := results[len(results)-1] + + if !speedIncreased && successfulCall.PayloadByteSize > c.clientConfig.EarlyFlushPayloadSize { + // We increase the speed of the flushTicker to try to flush the remaining bodies faster as we are at risk of sending too large bodies to the backend + c.flushTicker.CanIncreaseSpeed() + speedIncreased = true + } + + nbBytes += successfulCall.PayloadByteSize + } + + if emptyQueue && !speedIncreased { // If we did not send a very big payload, and we have no payloads + c.flushTicker.CanDecreaseSpeed() + } + + if len(failedCalls) > 0 { + var errs []error + for _, call := range failedCalls { + errs = append(errs, call.Error) + } + log.Debug("non-fatal error(s) while flushing telemetry data: %v", errors.Join(errs...)) + } + + return nbBytes, nil +} + +// computeFlushMetrics computes and submits the metrics for the flush operation using the output from the writer.Flush method. +// It will submit the number of requests, responses, errors, the number of bytes sent and the duration of the call that was successful. +func (c *client) computeFlushMetrics(results []internal.EndpointRequestResult, reason error) { + if !c.clientConfig.internalMetricsEnabled { + return + } + + indexToEndpoint := func(i int) string { + if i == 0 && c.clientConfig.AgentURL != "" { + return "agent" + } + return "agentless" + } + + for i, result := range results { + endpoint := "endpoint:" + indexToEndpoint(i) + c.Count(transport.NamespaceTelemetry, "telemetry_api.requests", []string{endpoint}).Submit(1) + if result.StatusCode != 0 { + c.Count(transport.NamespaceTelemetry, "telemetry_api.responses", []string{endpoint, "status_code:" + strconv.Itoa(result.StatusCode)}).Submit(1) + } + + if result.Error != nil { + typ := "type:network" + if os.IsTimeout(result.Error) { + typ = "type:timeout" + } + var writerStatusCodeError *internal.WriterStatusCodeError + if errors.As(result.Error, &writerStatusCodeError) { + typ = "type:status_code" + } + c.Count(transport.NamespaceTelemetry, "telemetry_api.errors", []string{endpoint, typ}).Submit(1) + } + } + + if reason != nil { + return + } + + successfulCall := results[len(results)-1] + endpoint := "endpoint:" + indexToEndpoint(len(results)-1) + c.Distribution(transport.NamespaceTelemetry, "telemetry_api.bytes", []string{endpoint}).Submit(float64(successfulCall.PayloadByteSize)) + c.Distribution(transport.NamespaceTelemetry, "telemetry_api.ms", []string{endpoint}).Submit(float64(successfulCall.CallDuration.Milliseconds())) +} + +func (c *client) AppStart() { + c.flushMapperMu.Lock() + defer c.flushMapperMu.Unlock() + c.flushMapper = mapper.NewAppStartedMapper(c.flushMapper) +} + +func (c *client) AppStop() { + c.flushMapperMu.Lock() + defer c.flushMapperMu.Unlock() + c.flushMapper = mapper.NewAppClosingMapper(c.flushMapper) +} + +func (c *client) Close() error { + c.flushTicker.Stop() + return nil +} diff --git a/internal/newtelemetry/client_config.go b/internal/newtelemetry/client_config.go new file mode 100644 index 0000000000..d0688e4434 --- /dev/null +++ b/internal/newtelemetry/client_config.go @@ -0,0 +1,275 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +import ( + "fmt" + "net/http" + "net/url" + "os" + "runtime/debug" + "time" + + globalinternal "gopkg.in/DataDog/dd-trace-go.v1/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" +) + +type ClientConfig struct { + // DependencyLoader determines how dependency data is sent via telemetry. + // The default value is [debug.ReadBuildInfo] since Application Security Monitoring uses this data to detect vulnerabilities in the ASM-SCA product + // To disable this feature, please implement a function that returns nil, false. + // This can only be controlled via the env var DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED + DependencyLoader func() (*debug.BuildInfo, bool) + + // MetricsEnabled determines whether metrics are sent via telemetry. + // If false, libraries should not send the generate-metrics or distributions events. + // This can only be controlled via the env var DD_TELEMETRY_METRICS_ENABLED + MetricsEnabled bool + + // LogsEnabled determines whether logs are sent via telemetry. + // This can only be controlled via the env var DD_TELEMETRY_LOG_COLLECTION_ENABLED + LogsEnabled bool + + // AgentlessURL is the full URL to the agentless telemetry endpoint. (optional) + // Defaults to https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry + AgentlessURL string + + // AgentURL is the url of the agent to send telemetry to. (optional) + // If the AgentURL is not set, the telemetry client will not attempt to connect to the agent before sending to the agentless endpoint. + AgentURL string + + // HTTPClient is the http client to use for sending telemetry, defaults to a http.DefaultClient copy. + HTTPClient *http.Client + + // HeartbeatInterval is the interval at which to send a heartbeat payload, defaults to 60s. + // The maximum value is 60s. + HeartbeatInterval time.Duration + + // ExtendedHeartbeatInterval is the interval at which to send an extended heartbeat payload, defaults to 24h. + ExtendedHeartbeatInterval time.Duration + + // FlushInterval is the interval at which the client flushes the data. + // By default, the client will start to Flush at 60s intervals and will reduce the interval based on the load till it hit 15s + // Both values cannot be higher than 60s because the heartbeat need to be sent at least every 60s. Values will be clamped otherwise. + FlushInterval internal.Range[time.Duration] + + // PayloadQueueSize is the size of the payload queue. Default range is [4, 32]. + PayloadQueueSize internal.Range[int] + + // DistributionsSize is the size of the distribution queue. Default range is [2^8, 2^14]. + DistributionsSize internal.Range[int] + + // Debug enables debug mode for the telemetry client and sent it to the backend so it logs the request + Debug bool + + // APIKey is the API key to use for sending telemetry to the agentless endpoint. (using DD_API_KEY env var by default) + APIKey string + + // EarlyFlushPayloadSize is the size of the payload that will trigger an early flush. + // This is necessary because backend won't allow bodies larger than 5MB. + // The default value here will be 2MB to take into account the large inaccuracy in estimating the size of bodies + EarlyFlushPayloadSize int + + // internalMetricsEnabled determines whether client stats metrics are sent via telemetry. Default to true. + internalMetricsEnabled bool +} + +var ( + // agentlessURL is the endpoint used to send telemetry in an agentless environment. It is + // also the default URL in case connecting to the agent URL fails. + agentlessURL = "https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry" + + // defaultHeartbeatInterval is the default interval at which the agent sends a heartbeat. + defaultHeartbeatInterval = time.Minute + + // defaultExtendedHeartbeatInterval is the default interval at which the agent sends an extended heartbeat. + defaultExtendedHeartbeatInterval = 24 * time.Hour + + // defaultMinFlushInterval is the default interval at which the client flushes the data. + defaultFlushIntervalRange = internal.Range[time.Duration]{ + Min: 15 * time.Second, + Max: 60 * time.Second, + } + + defaultAuthorizedHearbeatRange = internal.Range[time.Duration]{ + Min: time.Microsecond, + Max: time.Minute, + } + + agentProxyAPIPath = "/telemetry/proxy/api/v2/apmtelemetry" + + defaultEarlyFlushPayloadSize = 2 * 1024 * 1024 // 2MB + + // authorizedPayloadSize.Max is specified by the backend to be 5MB. The goal is to never reach this value otherwise our data will be silently dropped. + authorizedPayloadSize = internal.Range[int]{ + Min: 0, + Max: 5 * 1024 * 1024, // 5MB + } + + // TODO: tweak this value once we get real telemetry data from the telemetry client + // This means that, by default, we incur dataloss if we spend ~30mins without flushing, considering we send telemetry data this looks reasonable. + // This also means that in the worst case scenario, memory-wise, the app is stabilized after running for 30mins. + // Ideally both values should be power of 2 because of the way the ring queue is implemented as it's growing + defaultPayloadQueueSize = internal.Range[int]{ + Min: 4, + Max: 32, + } + + // TODO: tweak this value once we get telemetry data from the telemetry client + // Default max size is a 2^14 array of float64 (2^3 bytes) which makes a distribution 128KB bytes array _at worse_. + // Considering we add a point per user request on a simple http server, we would be losing data after 2^14 requests per minute or about 280 requests per second or under 3ms per request. + // If this throughput is constant, the telemetry client flush ticker speed will increase to, at best, double twice to flush 15 seconds of data each time. + // Which will bring our max throughput to 1100 points per second or about 750µs per request. + distributionsSize = internal.Range[int]{ + Min: 1 << 8, + Max: 1 << 14, + } +) + +func (config ClientConfig) validateConfig() error { + if config.HeartbeatInterval > time.Minute { + return fmt.Errorf("HeartbeatInterval cannot be higher than 60s, got %v", config.HeartbeatInterval) + } + + if config.FlushInterval.Min > time.Minute || config.FlushInterval.Max > time.Minute { + return fmt.Errorf("FlushIntervalRange cannot be higher than 60s, got Min: %v, Max: %v", config.FlushInterval.Min, config.FlushInterval.Max) + } + + if !config.FlushInterval.IsOrdered() { + return fmt.Errorf("FlushIntervalRange Min cannot be higher than Max, got Min: %v, Max: %v", config.FlushInterval.Min, config.FlushInterval.Max) + } + + if !authorizedPayloadSize.Contains(config.EarlyFlushPayloadSize) { + return fmt.Errorf("EarlyFlushPayloadSize must be between 0 and 5MB, got %v", config.EarlyFlushPayloadSize) + } + + return nil +} + +// defaultConfig returns a ClientConfig with default values set. +func defaultConfig(config ClientConfig) ClientConfig { + if config.AgentlessURL == "" { + config.AgentlessURL = agentlessURL + } + + if config.APIKey == "" { + config.APIKey = os.Getenv("DD_API_KEY") + if config.APIKey == "" { + config.APIKey = os.Getenv("DD-API-KEY") + } + } + + if config.FlushInterval.Min == 0 { + config.FlushInterval.Min = defaultFlushIntervalRange.Min + } else { + config.FlushInterval.Min = defaultAuthorizedHearbeatRange.Clamp(config.FlushInterval.Min) + } + + if config.FlushInterval.Max == 0 { + config.FlushInterval.Max = defaultFlushIntervalRange.Max + } else { + config.FlushInterval.Max = defaultAuthorizedHearbeatRange.Clamp(config.FlushInterval.Max) + } + + heartBeatInterval := defaultHeartbeatInterval + if config.HeartbeatInterval != 0 { + heartBeatInterval = config.HeartbeatInterval + } + + envVal := globalinternal.FloatEnv("DD_TELEMETRY_HEARTBEAT_INTERVAL", heartBeatInterval.Seconds()) + config.HeartbeatInterval = defaultAuthorizedHearbeatRange.Clamp(time.Duration(envVal * float64(time.Second))) + if config.HeartbeatInterval != defaultHeartbeatInterval { + log.Debug("telemetry: using custom heartbeat interval %v", config.HeartbeatInterval) + } + // Make sure we flush at least at each heartbeat interval + config.FlushInterval = config.FlushInterval.ReduceMax(config.HeartbeatInterval) + + if config.HeartbeatInterval == config.FlushInterval.Max { // Since the go ticker is not exact when it comes to the interval, we need to make sure the heartbeat is actually sent + config.HeartbeatInterval = config.HeartbeatInterval - 10*time.Millisecond + } + + if config.DependencyLoader == nil && globalinternal.BoolEnv("DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED", true) { + config.DependencyLoader = debug.ReadBuildInfo + } + + if !config.MetricsEnabled { + config.MetricsEnabled = globalinternal.BoolEnv("DD_TELEMETRY_METRICS_ENABLED", true) + } + + if !config.LogsEnabled { + config.LogsEnabled = globalinternal.BoolEnv("DD_TELEMETRY_LOG_COLLECTION_ENABLED", true) + } + + if !config.internalMetricsEnabled { + config.internalMetricsEnabled = true + } + + if config.EarlyFlushPayloadSize == 0 { + config.EarlyFlushPayloadSize = defaultEarlyFlushPayloadSize + } + + if config.ExtendedHeartbeatInterval == 0 { + config.ExtendedHeartbeatInterval = defaultExtendedHeartbeatInterval + } + + if config.PayloadQueueSize.Min == 0 { + config.PayloadQueueSize.Min = defaultPayloadQueueSize.Min + } + + if config.PayloadQueueSize.Max == 0 { + config.PayloadQueueSize.Max = defaultPayloadQueueSize.Max + } + + if config.DistributionsSize.Min == 0 { + config.DistributionsSize.Min = distributionsSize.Min + } + + if config.DistributionsSize.Max == 0 { + config.DistributionsSize.Max = distributionsSize.Max + } + + return config +} + +func newWriterConfig(config ClientConfig, tracerConfig internal.TracerConfig) (internal.WriterConfig, error) { + endpoints := make([]*http.Request, 0, 2) + if config.AgentURL != "" { + baseURL, err := url.Parse(config.AgentURL) + if err != nil { + return internal.WriterConfig{}, fmt.Errorf("invalid agent URL: %v", err) + } + + baseURL.Path = agentProxyAPIPath + request, err := http.NewRequest(http.MethodPost, baseURL.String(), nil) + if err != nil { + return internal.WriterConfig{}, fmt.Errorf("failed to create request: %v", err) + } + + endpoints = append(endpoints, request) + } + + if config.AgentlessURL != "" && config.APIKey != "" { + request, err := http.NewRequest(http.MethodPost, config.AgentlessURL, nil) + if err != nil { + return internal.WriterConfig{}, fmt.Errorf("failed to create request: %v", err) + } + + request.Header.Set("DD-API-KEY", config.APIKey) + endpoints = append(endpoints, request) + } + + if len(endpoints) == 0 { + return internal.WriterConfig{}, fmt.Errorf("telemetry: could not build any endpoint, please provide an AgentURL or an APIKey with an optional AgentlessURL") + } + + return internal.WriterConfig{ + TracerConfig: tracerConfig, + Endpoints: endpoints, + HTTPClient: config.HTTPClient, + Debug: config.Debug, + }, nil +} diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go new file mode 100644 index 0000000000..66e2a508d5 --- /dev/null +++ b/internal/newtelemetry/client_test.go @@ -0,0 +1,1592 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "runtime" + "runtime/debug" + "slices" + "strconv" + "strings" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" + "gopkg.in/DataDog/dd-trace-go.v1/internal/osinfo" + "gopkg.in/DataDog/dd-trace-go.v1/internal/version" +) + +func TestNewClient(t *testing.T) { + for _, test := range []struct { + name string + tracerConfig internal.TracerConfig + clientConfig ClientConfig + newErr string + }{ + { + name: "nominal", + tracerConfig: internal.TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + }, + clientConfig: ClientConfig{ + AgentURL: "http://localhost:8126", + }, + }, + { + name: "empty service", + tracerConfig: internal.TracerConfig{}, + newErr: "service name must not be empty", + }, + { + name: "empty agent url", + tracerConfig: internal.TracerConfig{ + Service: "test-service", + }, + clientConfig: ClientConfig{}, + newErr: "could not build any endpoint", + }, + { + name: "invalid agent url", + tracerConfig: internal.TracerConfig{ + Service: "test-service", + }, + clientConfig: ClientConfig{ + AgentURL: "toto_protocol://localhost:8126", + }, + newErr: "invalid agent URL", + }, + { + name: "Too big payload size", + tracerConfig: internal.TracerConfig{ + Service: "test-service", + }, + clientConfig: ClientConfig{ + AgentURL: "http://localhost:8126", + EarlyFlushPayloadSize: 64 * 1024 * 1024, // 64MB + }, + newErr: "EarlyFlushPayloadSize must be between 0 and 5MB", + }, + } { + t.Run(test.name, func(t *testing.T) { + c, err := NewClient(test.tracerConfig.Service, test.tracerConfig.Env, test.tracerConfig.Version, test.clientConfig) + if err == nil { + defer c.Close() + } + + if test.newErr == "" { + require.NoError(t, err) + } else { + require.ErrorContains(t, err, test.newErr) + } + }) + } +} + +func TestClientFlush(t *testing.T) { + tracerConfig := internal.TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + } + + type testParams struct { + name string + clientConfig ClientConfig + when func(c *client) + expect func(*testing.T, []transport.Payload) + } + + testcases := []testParams{ + { + name: "heartbeat", + clientConfig: ClientConfig{ + HeartbeatInterval: time.Nanosecond, + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppHeartbeat{}, payload) + assert.Equal(t, payload.RequestType(), transport.RequestTypeAppHeartbeat) + }, + }, + { + name: "extended-heartbeat-config", + clientConfig: ClientConfig{ + HeartbeatInterval: time.Nanosecond, + ExtendedHeartbeatInterval: time.Nanosecond, + }, + when: func(c *client) { + c.RegisterAppConfig("key", "value", OriginDefault) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.MessageBatch{}, payload) + batch := payload.(transport.MessageBatch) + require.Len(t, batch, 2) + assert.Equal(t, transport.RequestTypeAppClientConfigurationChange, batch[0].RequestType) + assert.Equal(t, transport.RequestTypeAppExtendedHeartBeat, batch[1].RequestType) + + assert.Len(t, batch[1].Payload.(transport.AppExtendedHeartbeat).Configuration, 0) + }, + }, + { + name: "extended-heartbeat-integrations", + clientConfig: ClientConfig{ + HeartbeatInterval: time.Nanosecond, + ExtendedHeartbeatInterval: time.Nanosecond, + }, + when: func(c *client) { + c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.MessageBatch{}, payload) + batch := payload.(transport.MessageBatch) + require.Len(t, batch, 2) + assert.Equal(t, transport.RequestTypeAppIntegrationsChange, batch[0].RequestType) + assert.Equal(t, transport.RequestTypeAppExtendedHeartBeat, batch[1].RequestType) + assert.Len(t, batch[1].Payload.(transport.AppExtendedHeartbeat).Integrations, 1) + assert.Equal(t, batch[1].Payload.(transport.AppExtendedHeartbeat).Integrations[0].Name, "test-integration") + assert.Equal(t, batch[1].Payload.(transport.AppExtendedHeartbeat).Integrations[0].Version, "1.0.0") + }, + }, + { + name: "configuration-default", + when: func(c *client) { + c.RegisterAppConfig("key", "value", OriginDefault) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppClientConfigurationChange{}, payload) + config := payload.(transport.AppClientConfigurationChange) + assert.Len(t, config.Configuration, 1) + assert.Equal(t, config.Configuration[0].Name, "key") + assert.Equal(t, config.Configuration[0].Value, "value") + assert.Equal(t, config.Configuration[0].Origin, OriginDefault) + }, + }, + { + name: "configuration-default", + when: func(c *client) { + c.RegisterAppConfig("key", "value", OriginDefault) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppClientConfigurationChange{}, payload) + config := payload.(transport.AppClientConfigurationChange) + assert.Len(t, config.Configuration, 1) + assert.Equal(t, config.Configuration[0].Name, "key") + assert.Equal(t, config.Configuration[0].Value, "value") + assert.Equal(t, config.Configuration[0].Origin, OriginDefault) + }, + }, + { + name: "configuration-complex-values", + when: func(c *client) { + c.RegisterAppConfigs( + Configuration{Name: "key1", Value: []string{"value1", "value2"}, Origin: OriginDefault}, + Configuration{Name: "key2", Value: map[string]string{"key": "value", "key2": "value2"}, Origin: OriginCode}, + Configuration{Name: "key3", Value: []int{1, 2, 3}, Origin: OriginDDConfig}, + Configuration{Name: "key4", Value: struct { + A string + }{A: "1"}, Origin: OriginEnvVar}, + Configuration{Name: "key5", Value: map[int]struct{ X int }{1: {X: 1}}, Origin: OriginRemoteConfig}, + ) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppClientConfigurationChange{}, payload) + config := payload.(transport.AppClientConfigurationChange) + + slices.SortStableFunc(config.Configuration, func(a, b transport.ConfKeyValue) int { + return strings.Compare(a.Name, b.Name) + }) + + assert.Len(t, config.Configuration, 5) + assert.Equal(t, "key1", config.Configuration[0].Name) + assert.Equal(t, "value1,value2", config.Configuration[0].Value) + assert.Equal(t, OriginDefault, config.Configuration[0].Origin) + assert.Equal(t, "key2", config.Configuration[1].Name) + assert.Equal(t, "key:value,key2:value2", config.Configuration[1].Value) + assert.Equal(t, OriginCode, config.Configuration[1].Origin) + assert.Equal(t, "key3", config.Configuration[2].Name) + assert.Equal(t, "[1 2 3]", config.Configuration[2].Value) + assert.Equal(t, OriginDDConfig, config.Configuration[2].Origin) + assert.Equal(t, "key4", config.Configuration[3].Name) + assert.Equal(t, "{1}", config.Configuration[3].Value) + assert.Equal(t, OriginEnvVar, config.Configuration[3].Origin) + assert.Equal(t, "key5", config.Configuration[4].Name) + assert.Equal(t, "1:{1}", config.Configuration[4].Value) + assert.Equal(t, OriginRemoteConfig, config.Configuration[4].Origin) + }, + }, + { + name: "product-start", + when: func(c *client) { + c.ProductStarted("test-product") + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppProductChange{}, payload) + productChange := payload.(transport.AppProductChange) + assert.Len(t, productChange.Products, 1) + assert.True(t, productChange.Products[Namespace("test-product")].Enabled) + }, + }, + { + name: "product-start-error", + when: func(c *client) { + c.ProductStartError("test-product", errors.New("test-error")) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppProductChange{}, payload) + productChange := payload.(transport.AppProductChange) + assert.Len(t, productChange.Products, 1) + assert.False(t, productChange.Products[Namespace("test-product")].Enabled) + assert.Equal(t, "test-error", productChange.Products[Namespace("test-product")].Error.Message) + }, + }, + { + name: "product-stop", + when: func(c *client) { + c.ProductStopped("test-product") + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppProductChange{}, payload) + productChange := payload.(transport.AppProductChange) + assert.Len(t, productChange.Products, 1) + assert.False(t, productChange.Products[Namespace("test-product")].Enabled) + }, + }, + { + name: "integration", + when: func(c *client) { + c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppIntegrationChange{}, payload) + integrationChange := payload.(transport.AppIntegrationChange) + assert.Len(t, integrationChange.Integrations, 1) + assert.Equal(t, integrationChange.Integrations[0].Name, "test-integration") + assert.Equal(t, integrationChange.Integrations[0].Version, "1.0.0") + assert.True(t, integrationChange.Integrations[0].Enabled) + }, + }, + { + name: "integration-error", + when: func(c *client) { + c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0", Error: "test-error"}) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppIntegrationChange{}, payload) + integrationChange := payload.(transport.AppIntegrationChange) + assert.Len(t, integrationChange.Integrations, 1) + assert.Equal(t, integrationChange.Integrations[0].Name, "test-integration") + assert.Equal(t, integrationChange.Integrations[0].Version, "1.0.0") + assert.False(t, integrationChange.Integrations[0].Enabled) + assert.Equal(t, integrationChange.Integrations[0].Error, "test-error") + }, + }, + { + name: "product+integration", + when: func(c *client) { + c.ProductStarted("test-product") + c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.MessageBatch{}, payload) + batch := payload.(transport.MessageBatch) + assert.Len(t, batch, 2) + for _, payload := range batch { + switch p := payload.Payload.(type) { + case transport.AppProductChange: + assert.Equal(t, transport.RequestTypeAppProductChange, payload.RequestType) + assert.Len(t, p.Products, 1) + assert.True(t, p.Products[Namespace("test-product")].Enabled) + case transport.AppIntegrationChange: + assert.Equal(t, transport.RequestTypeAppIntegrationsChange, payload.RequestType) + assert.Len(t, p.Integrations, 1) + assert.Equal(t, p.Integrations[0].Name, "test-integration") + assert.Equal(t, p.Integrations[0].Version, "1.0.0") + assert.True(t, p.Integrations[0].Enabled) + default: + t.Fatalf("unexpected payload type: %T", p) + } + } + }, + }, + { + name: "product+integration+heartbeat", + clientConfig: ClientConfig{ + HeartbeatInterval: time.Nanosecond, + }, + when: func(c *client) { + c.ProductStarted("test-product") + c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.MessageBatch{}, payload) + batch := payload.(transport.MessageBatch) + assert.Len(t, batch, 3) + for _, payload := range batch { + switch p := payload.Payload.(type) { + case transport.AppProductChange: + assert.Equal(t, transport.RequestTypeAppProductChange, payload.RequestType) + assert.Len(t, p.Products, 1) + assert.True(t, p.Products[Namespace("test-product")].Enabled) + case transport.AppIntegrationChange: + assert.Equal(t, transport.RequestTypeAppIntegrationsChange, payload.RequestType) + assert.Len(t, p.Integrations, 1) + assert.Equal(t, p.Integrations[0].Name, "test-integration") + assert.Equal(t, p.Integrations[0].Version, "1.0.0") + assert.True(t, p.Integrations[0].Enabled) + case transport.AppHeartbeat: + assert.Equal(t, transport.RequestTypeAppHeartbeat, payload.RequestType) + default: + t.Fatalf("unexpected payload type: %T", p) + } + } + }, + }, + { + name: "app-started", + when: func(c *client) { + c.AppStart() + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppStarted{}, payload) + appStart := payload.(transport.AppStarted) + assert.Equal(t, appStart.InstallSignature.InstallID, globalconfig.InstrumentationInstallID()) + assert.Equal(t, appStart.InstallSignature.InstallType, globalconfig.InstrumentationInstallType()) + assert.Equal(t, appStart.InstallSignature.InstallTime, globalconfig.InstrumentationInstallTime()) + }, + }, + { + name: "app-started-with-product", + when: func(c *client) { + c.AppStart() + c.ProductStarted("test-product") + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppStarted{}, payload) + appStart := payload.(transport.AppStarted) + assert.Equal(t, appStart.Products[Namespace("test-product")].Enabled, true) + }, + }, + { + name: "app-started-with-configuration", + when: func(c *client) { + c.AppStart() + c.RegisterAppConfig("key", "value", OriginDefault) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppStarted{}, payload) + appStart := payload.(transport.AppStarted) + require.Len(t, appStart.Configuration, 1) + assert.Equal(t, appStart.Configuration[0].Name, "key") + assert.Equal(t, appStart.Configuration[0].Value, "value") + }, + }, + { + name: "app-started+integrations", + when: func(c *client) { + c.AppStart() + c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppStarted{}, payload) + appStart := payload.(transport.AppStarted) + assert.Equal(t, globalconfig.InstrumentationInstallID(), appStart.InstallSignature.InstallID) + assert.Equal(t, globalconfig.InstrumentationInstallType(), appStart.InstallSignature.InstallType) + assert.Equal(t, globalconfig.InstrumentationInstallTime(), appStart.InstallSignature.InstallTime) + + payload = payloads[1] + require.IsType(t, transport.AppIntegrationChange{}, payload) + p := payload.(transport.AppIntegrationChange) + + assert.Len(t, p.Integrations, 1) + assert.Equal(t, p.Integrations[0].Name, "test-integration") + assert.Equal(t, p.Integrations[0].Version, "1.0.0") + }, + }, + { + name: "app-started+heartbeat", + clientConfig: ClientConfig{ + HeartbeatInterval: time.Nanosecond, + }, + when: func(c *client) { + c.AppStart() + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppStarted{}, payload) + appStart := payload.(transport.AppStarted) + assert.Equal(t, globalconfig.InstrumentationInstallID(), appStart.InstallSignature.InstallID) + assert.Equal(t, globalconfig.InstrumentationInstallType(), appStart.InstallSignature.InstallType) + assert.Equal(t, globalconfig.InstrumentationInstallTime(), appStart.InstallSignature.InstallTime) + + payload = payloads[1] + require.IsType(t, transport.AppHeartbeat{}, payload) + }, + }, + { + name: "app-stopped", + when: func(c *client) { + c.AppStop() + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppClosing{}, payload) + }, + }, + { + name: "app-dependencies-loaded", + clientConfig: ClientConfig{ + DependencyLoader: func() (*debug.BuildInfo, bool) { + return &debug.BuildInfo{ + Deps: []*debug.Module{ + {Path: "test", Version: "v1.0.0"}, + {Path: "test2", Version: "v2.0.0"}, + {Path: "test3", Version: "3.0.0"}, + }, + }, true + }, + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppDependenciesLoaded{}, payload) + deps := payload.(transport.AppDependenciesLoaded) + + assert.Len(t, deps.Dependencies, 3) + assert.Equal(t, deps.Dependencies[0].Name, "test") + assert.Equal(t, deps.Dependencies[0].Version, "1.0.0") + assert.Equal(t, deps.Dependencies[1].Name, "test2") + assert.Equal(t, deps.Dependencies[1].Version, "2.0.0") + assert.Equal(t, deps.Dependencies[2].Name, "test3") + assert.Equal(t, deps.Dependencies[2].Version, "3.0.0") + }, + }, + { + name: "app-many-dependencies-loaded", + clientConfig: ClientConfig{ + DependencyLoader: func() (*debug.BuildInfo, bool) { + modules := make([]*debug.Module, 2001) + for i := range modules { + modules[i] = &debug.Module{ + Path: fmt.Sprintf("test-%d", i), + Version: fmt.Sprintf("v%d.0.0", i), + } + } + return &debug.BuildInfo{ + Deps: modules, + }, true + }, + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppDependenciesLoaded{}, payload) + deps := payload.(transport.AppDependenciesLoaded) + + if len(deps.Dependencies) != 2000 && len(deps.Dependencies) != 1 { + t.Fatalf("expected 2000 and 1 dependencies, got %d", len(deps.Dependencies)) + } + + if len(deps.Dependencies) == 1 { + assert.Equal(t, deps.Dependencies[0].Name, "test-0") + assert.Equal(t, deps.Dependencies[0].Version, "0.0.0") + return + } + + for i := range deps.Dependencies { + assert.Equal(t, deps.Dependencies[i].Name, fmt.Sprintf("test-%d", i)) + assert.Equal(t, deps.Dependencies[i].Version, fmt.Sprintf("%d.0.0", i)) + } + }, + }, + { + name: "single-log-debug", + when: func(c *client) { + c.Log(LogDebug, "test") + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, transport.LogLevelDebug, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + }, + }, + { + name: "single-log-warn", + when: func(c *client) { + c.Log(LogWarn, "test") + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, transport.LogLevelWarn, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + }, + }, + { + name: "single-log-error", + when: func(c *client) { + c.Log(LogError, "test") + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + }, + }, + { + name: "multiple-logs-same-key", + when: func(c *client) { + c.Log(LogError, "test") + c.Log(LogError, "test") + c.Log(LogError, "test") + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + assert.Equal(t, uint32(3), logs.Logs[0].Count) + }, + }, + { + name: "single-log-with-tag", + when: func(c *client) { + c.Log(LogError, "test", WithTags([]string{"key:value"})) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + assert.Equal(t, "key:value", logs.Logs[0].Tags) + }, + }, + { + name: "single-log-with-tags", + when: func(c *client) { + c.Log(LogError, "test", WithTags([]string{"key:value", "key2:value2"})) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + tags := strings.Split(logs.Logs[0].Tags, ",") + assert.Contains(t, tags, "key:value") + assert.Contains(t, tags, "key2:value2") + }, + }, + { + name: "single-log-with-tags-and-without", + when: func(c *client) { + c.Log(LogError, "test", WithTags([]string{"key:value", "key2:value2"})) + c.Log(LogError, "test") + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 2) + + slices.SortStableFunc(logs.Logs, func(i, j transport.LogMessage) int { + return strings.Compare(i.Tags, j.Tags) + }) + + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + assert.Equal(t, uint32(1), logs.Logs[0].Count) + assert.Empty(t, logs.Logs[0].Tags) + + assert.Equal(t, transport.LogLevelError, logs.Logs[1].Level) + assert.Equal(t, "test", logs.Logs[1].Message) + assert.Equal(t, uint32(1), logs.Logs[1].Count) + tags := strings.Split(logs.Logs[1].Tags, ",") + assert.Contains(t, tags, "key:value") + assert.Contains(t, tags, "key2:value2") + }, + }, + { + name: "single-log-with-stacktrace", + when: func(c *client) { + c.Log(LogError, "test", WithStacktrace()) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + assert.Contains(t, logs.Logs[0].StackTrace, "internal/newtelemetry/client_test.go") + }, + }, + { + name: "single-log-with-stacktrace-and-tags", + when: func(c *client) { + c.Log(LogError, "test", WithStacktrace(), WithTags([]string{"key:value", "key2:value2"})) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + assert.Contains(t, logs.Logs[0].StackTrace, "internal/newtelemetry/client_test.go") + tags := strings.Split(logs.Logs[0].Tags, ",") + assert.Contains(t, tags, "key:value") + assert.Contains(t, tags, "key2:value2") + + }, + }, + { + name: "multiple-logs-different-levels", + when: func(c *client) { + c.Log(LogError, "test") + c.Log(LogWarn, "test") + c.Log(LogDebug, "test") + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 3) + + slices.SortStableFunc(logs.Logs, func(i, j transport.LogMessage) int { + return strings.Compare(string(i.Level), string(j.Level)) + }) + + assert.Equal(t, transport.LogLevelDebug, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + assert.Equal(t, uint32(1), logs.Logs[0].Count) + assert.Equal(t, transport.LogLevelError, logs.Logs[1].Level) + assert.Equal(t, "test", logs.Logs[1].Message) + assert.Equal(t, uint32(1), logs.Logs[1].Count) + assert.Equal(t, transport.LogLevelWarn, logs.Logs[2].Level) + assert.Equal(t, "test", logs.Logs[2].Message) + assert.Equal(t, uint32(1), logs.Logs[2].Count) + }, + }, + { + name: "simple-count", + when: func(c *client) { + c.Count(NamespaceTracers, "init_time", nil).Submit(1) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 1) + assert.Equal(t, transport.CountMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time", metrics.Series[0].Metric) + assert.Empty(t, metrics.Series[0].Tags) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + assert.Equal(t, 1.0, metrics.Series[0].Points[0][1]) + }, + }, + { + name: "count-multiple-call-same-handle", + when: func(c *client) { + handle1 := c.Count(NamespaceTracers, "init_time", nil) + handle2 := c.Count(NamespaceTracers, "init_time", nil) + + handle2.Submit(1) + handle1.Submit(1) + handle1.Submit(3) + handle2.Submit(2) + handle2.Submit(10) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 1) + assert.Equal(t, transport.CountMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time", metrics.Series[0].Metric) + assert.Empty(t, metrics.Series[0].Tags) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + assert.Equal(t, 17.0, metrics.Series[0].Points[0][1]) + }, + }, + { + name: "multiple-count-by-name", + when: func(c *client) { + c.Count(NamespaceTracers, "init_time_1", nil).Submit(1) + c.Count(NamespaceTracers, "init_time_2", nil).Submit(2) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 2) + + assert.Equal(t, transport.CountMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time_"+strconv.Itoa(int(metrics.Series[0].Points[0][1].(float64))), metrics.Series[0].Metric) + + assert.Empty(t, metrics.Series[0].Tags) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + + assert.Equal(t, transport.CountMetric, metrics.Series[1].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[1].Namespace) + assert.Equal(t, "init_time_"+strconv.Itoa(int(metrics.Series[1].Points[0][1].(float64))), metrics.Series[1].Metric) + assert.Empty(t, metrics.Series[1].Tags) + assert.NotZero(t, metrics.Series[1].Points[0][0]) + }, + }, + { + name: "multiple-count-by-tags", + when: func(c *client) { + c.Count(NamespaceTracers, "init_time", []string{"test:1"}).Submit(1) + c.Count(NamespaceTracers, "init_time", []string{"test:2"}).Submit(2) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 2) + + assert.Equal(t, transport.CountMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time", metrics.Series[0].Metric) + + assert.Contains(t, metrics.Series[0].Tags, "test:"+strconv.Itoa(int(metrics.Series[0].Points[0][1].(float64)))) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + + assert.Equal(t, transport.CountMetric, metrics.Series[1].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[1].Namespace) + assert.Equal(t, "init_time", metrics.Series[1].Metric) + assert.Contains(t, metrics.Series[1].Tags, "test:"+strconv.Itoa(int(metrics.Series[1].Points[0][1].(float64)))) + assert.NotZero(t, metrics.Series[1].Points[0][0]) + }, + }, + { + name: "simple-gauge", + when: func(c *client) { + c.Gauge(NamespaceTracers, "init_time", nil).Submit(1) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 1) + assert.Equal(t, transport.GaugeMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time", metrics.Series[0].Metric) + assert.Empty(t, metrics.Series[0].Tags) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + assert.Equal(t, 1.0, metrics.Series[0].Points[0][1]) + }, + }, + { + name: "multiple-gauge-by-name", + when: func(c *client) { + c.Gauge(NamespaceTracers, "init_time_1", nil).Submit(1) + c.Gauge(NamespaceTracers, "init_time_2", nil).Submit(2) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 2) + + assert.Equal(t, transport.GaugeMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time_"+strconv.Itoa(int(metrics.Series[0].Points[0][1].(float64))), metrics.Series[0].Metric) + + assert.Empty(t, metrics.Series[0].Tags) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + + assert.Equal(t, transport.GaugeMetric, metrics.Series[1].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[1].Namespace) + assert.Equal(t, "init_time_"+strconv.Itoa(int(metrics.Series[1].Points[0][1].(float64))), metrics.Series[1].Metric) + assert.Empty(t, metrics.Series[1].Tags) + assert.NotZero(t, metrics.Series[1].Points[0][0]) + }, + }, + { + name: "multiple-gauge-by-tags", + when: func(c *client) { + c.Gauge(NamespaceTracers, "init_time", []string{"test:1"}).Submit(1) + c.Gauge(NamespaceTracers, "init_time", []string{"test:2"}).Submit(2) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 2) + + assert.Equal(t, transport.GaugeMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time", metrics.Series[0].Metric) + + assert.Contains(t, metrics.Series[0].Tags, "test:"+strconv.Itoa(int(metrics.Series[0].Points[0][1].(float64)))) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + + assert.Equal(t, transport.GaugeMetric, metrics.Series[1].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[1].Namespace) + assert.Equal(t, "init_time", metrics.Series[1].Metric) + assert.Contains(t, metrics.Series[1].Tags, "test:"+strconv.Itoa(int(metrics.Series[1].Points[0][1].(float64)))) + assert.NotZero(t, metrics.Series[1].Points[0][0]) + }, + }, + { + name: "simple-rate", + when: func(c *client) { + handle := c.Rate(NamespaceTracers, "init_time", nil) + handle.Submit(1) + + rate := handle.(*rate) + // So the rate is not +Infinity because the interval is zero + now := rate.intervalStart.Load() + sub := now.Add(-time.Second) + rate.intervalStart.Store(&sub) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 1) + assert.Equal(t, transport.RateMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time", metrics.Series[0].Metric) + assert.Empty(t, metrics.Series[0].Tags) + assert.NotZero(t, metrics.Series[0].Interval) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + assert.LessOrEqual(t, metrics.Series[0].Points[0][1], 1.1) + }, + }, + { + name: "simple-distribution", + when: func(c *client) { + c.Distribution(NamespaceGeneral, "init_time", nil).Submit(1) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, payload, transport.Distributions{}) + distributions := payload.(transport.Distributions) + require.Len(t, distributions.Series, 1) + assert.Equal(t, NamespaceGeneral, distributions.Series[0].Namespace) + assert.Equal(t, "init_time", distributions.Series[0].Metric) + assert.Empty(t, distributions.Series[0].Tags) + require.Len(t, distributions.Series[0].Points, 1) + assert.Equal(t, 1.0, distributions.Series[0].Points[0]) + }, + }, + { + name: "multiple-distribution-by-name", + when: func(c *client) { + c.Distribution(NamespaceTracers, "init_time_1", nil).Submit(1) + c.Distribution(NamespaceTracers, "init_time_2", nil).Submit(2) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, payload, transport.Distributions{}) + distributions := payload.(transport.Distributions) + require.Len(t, distributions.Series, 2) + + assert.Equal(t, "init_time_"+strconv.Itoa(int(distributions.Series[0].Points[0])), distributions.Series[0].Metric) + assert.Equal(t, "init_time_"+strconv.Itoa(int(distributions.Series[1].Points[0])), distributions.Series[1].Metric) + }, + }, + { + name: "multiple-distribution-by-tags", + when: func(c *client) { + c.Distribution(NamespaceTracers, "init_time", []string{"test:1"}).Submit(1) + c.Distribution(NamespaceTracers, "init_time", []string{"test:2"}).Submit(2) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, payload, transport.Distributions{}) + distributions := payload.(transport.Distributions) + require.Len(t, distributions.Series, 2) + + assert.Contains(t, distributions.Series[0].Tags, "test:"+strconv.Itoa(int(distributions.Series[0].Points[0]))) + assert.Contains(t, distributions.Series[1].Tags, "test:"+strconv.Itoa(int(distributions.Series[1].Points[0]))) + }, + }, + { + name: "distribution-overflow", + when: func(c *client) { + handler := c.Distribution(NamespaceGeneral, "init_time", nil) + for i := 0; i < 1<<16; i++ { + handler.Submit(float64(i)) + } + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, payload, transport.Distributions{}) + distributions := payload.(transport.Distributions) + require.Len(t, distributions.Series, 1) + assert.Equal(t, NamespaceGeneral, distributions.Series[0].Namespace) + assert.Equal(t, "init_time", distributions.Series[0].Metric) + assert.Empty(t, distributions.Series[0].Tags) + + // Should not contain the first passed point + assert.NotContains(t, distributions.Series[0].Points, 0.0) + }, + }, + } + + for _, test := range testcases { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + config := defaultConfig(test.clientConfig) + config.AgentURL = "http://localhost:8126" + config.DependencyLoader = test.clientConfig.DependencyLoader // Don't use the default dependency loader + config.internalMetricsEnabled = test.clientConfig.internalMetricsEnabled // only enabled internal metrics when explicitly set + config.internalMetricsEnabled = false + c, err := newClient(tracerConfig, config) + require.NoError(t, err) + defer c.Close() + + recordWriter := &internal.RecordWriter{} + c.writer = recordWriter + + if test.when != nil { + test.when(c) + } + c.Flush() + + payloads := recordWriter.Payloads() + require.LessOrEqual(t, 1, len(payloads)) + test.expect(t, payloads) + }) + } +} + +func TestMetricsDisabled(t *testing.T) { + t.Setenv("DD_TELEMETRY_METRICS_ENABLED", "false") + + c, err := NewClient("test-service", "test-env", "1.0.0", ClientConfig{AgentURL: "http://localhost:8126"}) + require.NoError(t, err) + + recordWriter := &internal.RecordWriter{} + c.(*client).writer = recordWriter + + defer c.Close() + + assert.NotNil(t, c.Gauge(NamespaceTracers, "init_time", nil)) + assert.NotNil(t, c.Count(NamespaceTracers, "init_time", nil)) + assert.NotNil(t, c.Rate(NamespaceTracers, "init_time", nil)) + assert.NotNil(t, c.Distribution(NamespaceGeneral, "init_time", nil)) + + c.Flush() + + payloads := recordWriter.Payloads() + require.Len(t, payloads, 0) +} + +type testRoundTripper struct { + t *testing.T + roundTrip func(*http.Request) (*http.Response, error) + bodies []transport.Body +} + +func (t *testRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + defer req.Body.Close() + body, err := io.ReadAll(req.Body) + require.NoError(t.t, err) + req.Body = io.NopCloser(bytes.NewReader(body)) + t.bodies = append(t.bodies, parseRequest(t.t, req.Header, body)) + return t.roundTrip(req) +} + +func parseRequest(t *testing.T, headers http.Header, raw []byte) transport.Body { + t.Helper() + + assert.Equal(t, "v2", headers.Get("DD-Telemetry-API-Version")) + assert.Equal(t, "application/json", headers.Get("Content-Type")) + assert.Equal(t, "go", headers.Get("DD-Client-Library-Language")) + assert.Equal(t, "test-env", headers.Get("DD-Agent-Env")) + assert.Equal(t, version.Tag, headers.Get("DD-Client-Library-Version")) + assert.Equal(t, globalconfig.InstrumentationInstallID(), headers.Get("DD-Agent-Install-Id")) + assert.Equal(t, globalconfig.InstrumentationInstallType(), headers.Get("DD-Agent-Install-Type")) + assert.Equal(t, globalconfig.InstrumentationInstallTime(), headers.Get("DD-Agent-Install-Time")) + + assert.NotEmpty(t, headers.Get("DD-Agent-Hostname")) + + var body transport.Body + require.NoError(t, json.Unmarshal(raw, &body)) + + assert.Equal(t, string(body.RequestType), headers.Get("DD-Telemetry-Request-Type")) + assert.Equal(t, "test-service", body.Application.ServiceName) + assert.Equal(t, "test-env", body.Application.Env) + assert.Equal(t, "1.0.0", body.Application.ServiceVersion) + assert.Equal(t, "go", body.Application.LanguageName) + assert.Equal(t, runtime.Version(), body.Application.LanguageVersion) + + assert.NotEmpty(t, body.Host.Hostname) + assert.Equal(t, osinfo.OSName(), body.Host.OS) + assert.Equal(t, osinfo.OSVersion(), body.Host.OSVersion) + assert.Equal(t, osinfo.Architecture(), body.Host.Architecture) + assert.Equal(t, osinfo.KernelName(), body.Host.KernelName) + assert.Equal(t, osinfo.KernelRelease(), body.Host.KernelRelease) + assert.Equal(t, osinfo.KernelVersion(), body.Host.KernelVersion) + + assert.Equal(t, "v2", body.APIVersion) + assert.NotZero(t, body.TracerTime) + assert.LessOrEqual(t, int64(1), body.SeqID) + assert.Equal(t, globalconfig.RuntimeID(), body.RuntimeID) + + return body +} + +func TestClientEnd2End(t *testing.T) { + tracerConfig := internal.TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + } + + for _, test := range []struct { + name string + when func(*client) + roundtrip func(*testing.T, *http.Request) (*http.Response, error) + expect func(*testing.T, []transport.Body) + }{ + { + name: "app-start", + when: func(c *client) { + c.AppStart() + }, + expect: func(t *testing.T, bodies []transport.Body) { + require.Len(t, bodies, 1) + assert.Equal(t, transport.RequestTypeAppStarted, bodies[0].RequestType) + }, + }, + { + name: "app-stop", + when: func(c *client) { + c.AppStop() + }, + expect: func(t *testing.T, bodies []transport.Body) { + require.Len(t, bodies, 1) + assert.Equal(t, transport.RequestTypeAppClosing, bodies[0].RequestType) + }, + }, + { + name: "app-start+app-stop", + when: func(c *client) { + c.AppStart() + c.AppStop() + }, + expect: func(t *testing.T, bodies []transport.Body) { + require.Len(t, bodies, 2) + assert.Equal(t, transport.RequestTypeAppStarted, bodies[0].RequestType) + assert.Equal(t, transport.RequestTypeAppClosing, bodies[1].RequestType) + }, + }, + { + name: "message-batch", + when: func(c *client) { + c.RegisterAppConfig("key", "value", OriginCode) + c.ProductStarted(NamespaceAppSec) + }, + expect: func(t *testing.T, bodies []transport.Body) { + require.Len(t, bodies, 1) + assert.Equal(t, transport.RequestTypeMessageBatch, bodies[0].RequestType) + batch := bodies[0].Payload.(transport.MessageBatch) + require.Len(t, batch, 2) + assert.Equal(t, transport.RequestTypeAppProductChange, batch[0].RequestType) + assert.Equal(t, transport.RequestTypeAppClientConfigurationChange, batch[1].RequestType) + }, + }, + { + name: "fail-agent-endpoint", + when: func(c *client) { + c.AppStart() + }, + roundtrip: func(_ *testing.T, req *http.Request) (*http.Response, error) { + if strings.Contains(req.URL.Host, "localhost") { + return nil, errors.New("failed") + } + return &http.Response{StatusCode: http.StatusOK}, nil + }, + expect: func(t *testing.T, bodies []transport.Body) { + require.Len(t, bodies, 2) + require.Equal(t, transport.RequestTypeAppStarted, bodies[0].RequestType) + require.Equal(t, transport.RequestTypeAppStarted, bodies[1].RequestType) + }, + }, + { + name: "fail-all-endpoint", + when: func(c *client) { + c.AppStart() + }, + roundtrip: func(_ *testing.T, _ *http.Request) (*http.Response, error) { + return nil, errors.New("failed") + }, + expect: func(t *testing.T, bodies []transport.Body) { + require.Len(t, bodies, 2) + require.Equal(t, transport.RequestTypeAppStarted, bodies[0].RequestType) + require.Equal(t, transport.RequestTypeAppStarted, bodies[1].RequestType) + }, + }, + } { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + rt := &testRoundTripper{ + t: t, + roundTrip: func(req *http.Request) (*http.Response, error) { + if test.roundtrip != nil { + return test.roundtrip(t, req) + } + return &http.Response{StatusCode: http.StatusOK}, nil + }, + } + clientConfig := ClientConfig{ + AgentURL: "http://localhost:8126", + APIKey: "apikey", + HTTPClient: &http.Client{ + Timeout: 5 * time.Second, + Transport: rt, + }, + Debug: true, + } + + clientConfig = defaultConfig(clientConfig) + clientConfig.DependencyLoader = nil + + c, err := newClient(tracerConfig, clientConfig) + require.NoError(t, err) + defer c.Close() + + test.when(c) + c.Flush() + test.expect(t, rt.bodies) + }) + } +} + +func TestHeartBeatInterval(t *testing.T) { + startTime := time.Now() + payloadtimes := make([]time.Duration, 0, 32) + c, err := NewClient("test-service", "test-env", "1.0.0", ClientConfig{ + AgentURL: "http://localhost:8126", + HeartbeatInterval: 2 * time.Second, + HTTPClient: &http.Client{ + Timeout: 5 * time.Second, + Transport: &testRoundTripper{ + t: t, + roundTrip: func(_ *http.Request) (*http.Response, error) { + payloadtimes = append(payloadtimes, time.Since(startTime)) + startTime = time.Now() + return &http.Response{ + StatusCode: http.StatusOK, + }, nil + }, + }, + }, + }) + require.NoError(t, err) + defer c.Close() + + for i := 0; i < 10; i++ { + c.Log(LogError, "test") + time.Sleep(1 * time.Second) + } + + // 10 seconds have passed, we should have sent 5 heartbeats + + c.Flush() + c.Close() + + require.InDelta(t, 5, len(payloadtimes), 1) + sum := 0.0 + for _, d := range payloadtimes { + sum += d.Seconds() + } + + assert.InDelta(t, 2, sum/5, 0.1) +} + +func TestSendingFailures(t *testing.T) { + cfg := ClientConfig{ + AgentURL: "http://localhost:8126", + HTTPClient: &http.Client{ + Timeout: 5 * time.Second, + Transport: &testRoundTripper{ + t: t, + roundTrip: func(_ *http.Request) (*http.Response, error) { + return nil, errors.New("failed") + }, + }, + }, + } + + c, err := newClient(internal.TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + }, defaultConfig(cfg)) + + require.NoError(t, err) + defer c.Close() + + c.Log(LogError, "test") + c.Flush() + + require.False(t, c.payloadQueue.IsEmpty()) + payload := c.payloadQueue.ReversePeek() + require.NotNil(t, payload) + + assert.Equal(t, transport.RequestTypeLogs, payload.RequestType()) + logs := payload.(transport.Logs) + assert.Len(t, logs.Logs, 1) + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) +} + +func BenchmarkLogs(b *testing.B) { + clientConfig := ClientConfig{ + HeartbeatInterval: time.Hour, + ExtendedHeartbeatInterval: time.Hour, + AgentURL: "http://localhost:8126", + } + + b.Run("simple", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Log(LogDebug, "this is supposed to be a DEBUG log of representative length with a variable message: "+strconv.Itoa(i%10)) + } + }) + + b.Run("with-tags", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Log(LogWarn, "this is supposed to be a WARN log of representative length", WithTags([]string{"key:" + strconv.Itoa(i%10)})) + } + }) + + b.Run("with-stacktrace", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Log(LogError, "this is supposed to be a ERROR log of representative length", WithStacktrace()) + } + }) +} + +type noopTransport struct{} + +func (noopTransport) RoundTrip(*http.Request) (*http.Response, error) { + return &http.Response{StatusCode: http.StatusOK}, nil +} + +func BenchmarkWorstCaseScenarioFloodLogging(b *testing.B) { + b.ReportAllocs() + nbSameLogs := 10 + nbDifferentLogs := 100 + nbGoroutines := 25 + + clientConfig := ClientConfig{ + HeartbeatInterval: time.Hour, + ExtendedHeartbeatInterval: time.Hour, + FlushInterval: internal.Range[time.Duration]{Min: time.Second, Max: time.Second}, + AgentURL: "http://localhost:8126", + + // Empty transport to avoid sending data to the agent + HTTPClient: &http.Client{ + Timeout: 5 * time.Second, + Transport: noopTransport{}, + }, + } + + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + + for x := 0; x < b.N; x++ { + var wg sync.WaitGroup + + for i := 0; i < nbGoroutines; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < nbDifferentLogs; j++ { + for k := 0; k < nbSameLogs; k++ { + c.Log(LogDebug, "this is supposed to be a DEBUG log of representative length"+strconv.Itoa(i), WithTags([]string{"key:" + strconv.Itoa(j)})) + } + } + }() + } + + wg.Wait() + } + + b.ReportMetric(float64(b.Elapsed().Nanoseconds()/int64(nbGoroutines*nbDifferentLogs*nbSameLogs*b.N)), "ns/log") +} + +func BenchmarkMetrics(b *testing.B) { + b.ReportAllocs() + clientConfig := ClientConfig{ + HeartbeatInterval: time.Hour, + ExtendedHeartbeatInterval: time.Hour, + AgentURL: "http://localhost:8126", + } + + b.Run("count+get-handle", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Count(NamespaceGeneral, "logs_created", nil).Submit(1) + } + }) + + b.Run("count+handle-reused", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + handle := c.Count(NamespaceGeneral, "logs_created", nil) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + handle.Submit(1) + } + }) + + b.Run("gauge+get-handle", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Gauge(NamespaceTracers, "stats_buckets", nil).Submit(1) + } + }) + + b.Run("gauge+handle-reused", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + handle := c.Gauge(NamespaceTracers, "stats_buckets", nil) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + handle.Submit(1) + } + }) + + b.Run("rate+get-handle", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Rate(NamespaceTracers, "init_time", nil).Submit(1) + } + }) + + b.Run("rate+handle-reused", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + handle := c.Rate(NamespaceTracers, "init_time", nil) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + handle.Submit(1) + } + }) + + b.Run("distribution+get-handle", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Distribution(NamespaceGeneral, "init_time", nil).Submit(1) + } + }) + + b.Run("distribution+handle-reused", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + handle := c.Distribution(NamespaceGeneral, "init_time", nil) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + handle.Submit(1) + } + }) +} + +func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { + nbSameMetric := 1000 + nbDifferentMetrics := 50 + nbGoroutines := 50 + + clientConfig := ClientConfig{ + HeartbeatInterval: time.Hour, + ExtendedHeartbeatInterval: time.Hour, + FlushInterval: internal.Range[time.Duration]{Min: time.Second, Max: time.Second}, + DistributionsSize: internal.Range[int]{Min: 256, Max: -1}, + AgentURL: "http://localhost:8126", + + // Empty transport to avoid sending data to the agent + HTTPClient: &http.Client{ + Timeout: 5 * time.Second, + Transport: noopTransport{}, + }, + } + + b.Run("get-handle", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + + for x := 0; x < b.N; x++ { + var wg sync.WaitGroup + + for i := 0; i < nbGoroutines; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < nbDifferentMetrics; j++ { + metricName := "init_time_" + strconv.Itoa(j) + for k := 0; k < nbSameMetric; k++ { + c.Count(NamespaceGeneral, metricName, []string{"test:1"}).Submit(1) + } + } + }() + } + + wg.Wait() + } + + b.ReportMetric(float64(b.Elapsed().Nanoseconds()/int64(nbGoroutines*nbDifferentMetrics*nbSameMetric*b.N)), "ns/point") + }) + + b.Run("handle-reused", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + + for x := 0; x < b.N; x++ { + var wg sync.WaitGroup + + handles := make([]MetricHandle, nbDifferentMetrics) + for i := 0; i < nbDifferentMetrics; i++ { + handles[i] = c.Count(NamespaceGeneral, "init_time_"+strconv.Itoa(i), []string{"test:1"}) + } + for i := 0; i < nbGoroutines; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < nbDifferentMetrics; j++ { + for k := 0; k < nbSameMetric; k++ { + handles[j].Submit(1) + } + } + }() + } + + wg.Wait() + } + + b.ReportMetric(float64(b.Elapsed().Nanoseconds()/int64(nbGoroutines*nbDifferentMetrics*nbSameMetric*b.N)), "ns/point") + }) + +} diff --git a/internal/newtelemetry/configuration.go b/internal/newtelemetry/configuration.go new file mode 100644 index 0000000000..3cdbe76477 --- /dev/null +++ b/internal/newtelemetry/configuration.go @@ -0,0 +1,156 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "encoding/json" + "fmt" + "math" + "reflect" + "slices" + "strings" + "sync" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +type configuration struct { + mu sync.Mutex + config map[string]transport.ConfKeyValue + seqID uint64 +} + +func (c *configuration) Add(kv Configuration) { + c.mu.Lock() + defer c.mu.Unlock() + + if c.config == nil { + c.config = make(map[string]transport.ConfKeyValue) + } + + c.config[kv.Name] = transport.ConfKeyValue{ + Name: kv.Name, + Value: kv.Value, + Origin: kv.Origin, + } +} + +func (c *configuration) Payload() transport.Payload { + c.mu.Lock() + defer c.mu.Unlock() + if len(c.config) == 0 { + return nil + } + + configs := make([]transport.ConfKeyValue, len(c.config)) + idx := 0 + for _, conf := range c.config { + if conf.Origin == "" { + conf.Origin = transport.OriginDefault + } + conf.Value = SanitizeConfigValue(conf.Value) + conf.SeqID = c.seqID + configs[idx] = conf + idx++ + c.seqID++ + delete(c.config, conf.Name) + } + + return transport.AppClientConfigurationChange{ + Configuration: configs, + } +} + +// SanitizeConfigValue sanitizes the value of a configuration key to ensure it can be marshalled. +func SanitizeConfigValue(value any) any { + if value == nil { + return "" + } + + // Skip reflection for basic types + switch val := value.(type) { + case string, bool, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64: + return val + case float32: + if math.IsNaN(float64(val)) || math.IsInf(float64(val), 0) { + return "" + } + return val + case float64: + // https://github.com/golang/go/issues/59627 + if math.IsNaN(val) || math.IsInf(val, 0) { + return nil + } + return val + case []string: + return strings.Join(val, ",") // Retro compatibility with old code + } + + if _, ok := value.(json.Marshaler); ok { + return value + } + + if v, ok := value.(fmt.Stringer); ok { + return v.String() + } + + valueOf := reflect.ValueOf(value) + + // Unwrap pointers and interfaces up to 10 levels deep. + for i := 0; i < 10; i++ { + if valueOf.Kind() == reflect.Ptr || valueOf.Kind() == reflect.Interface { + valueOf = valueOf.Elem() + } else { + break + } + } + + switch { + case valueOf.Kind() == reflect.Slice, valueOf.Kind() == reflect.Array: + var sb strings.Builder + sb.WriteString("[") + for i := 0; i < valueOf.Len(); i++ { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(fmt.Sprintf("%v", valueOf.Index(i).Interface())) + } + sb.WriteString("]") + return sb.String() + case valueOf.Kind() == reflect.Map: + kvPair := make([]struct { + key string + value string + }, valueOf.Len()) + + iter := valueOf.MapRange() + for i := 0; iter.Next(); i++ { + kvPair[i].key = fmt.Sprintf("%v", iter.Key().Interface()) + kvPair[i].value = fmt.Sprintf("%v", iter.Value().Interface()) + } + + slices.SortStableFunc(kvPair, func(a, b struct { + key string + value string + }) int { + return strings.Compare(a.key, b.key) + }) + + var sb strings.Builder + for _, k := range kvPair { + if sb.Len() > 0 { + sb.WriteString(",") + } + sb.WriteString(k.key) + sb.WriteString(":") + sb.WriteString(k.value) + } + + return sb.String() + } + + return fmt.Sprintf("%v", value) +} diff --git a/internal/newtelemetry/dependencies.go b/internal/newtelemetry/dependencies.go new file mode 100644 index 0000000000..10e2bee3d3 --- /dev/null +++ b/internal/newtelemetry/dependencies.go @@ -0,0 +1,94 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "runtime/debug" + "strings" + "sync" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +type dependencies struct { + DependencyLoader func() (*debug.BuildInfo, bool) + + once sync.Once + + mu sync.Mutex + payloads []transport.Payload +} + +func (d *dependencies) Payload() transport.Payload { + d.once.Do(func() { + deps := d.loadDeps() + // Requirement described here: + // https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/main/GeneratedDocumentation/ApiDocs/v2/producing-telemetry.md#app-dependencies-loaded + const maxPerPayload = 2000 + if len(deps) > maxPerPayload { + log.Debug("telemetry: too many (%d) dependencies to send, sending over multiple bodies", len(deps)) + } + + for i := 0; i < len(deps); i += maxPerPayload { + end := min(i+maxPerPayload, len(deps)) + + d.payloads = append(d.payloads, transport.AppDependenciesLoaded{ + Dependencies: deps[i:end], + }) + } + }) + + d.mu.Lock() + defer d.mu.Unlock() + + if len(d.payloads) == 0 { + return nil + } + + // return payloads one by one + payloadZero := d.payloads[0] + if len(d.payloads) == 1 { + d.payloads = nil + } + + if len(d.payloads) > 1 { + d.payloads = d.payloads[1:] + } + + return payloadZero +} + +// loadDeps returns the dependencies from the DependencyLoader, formatted for telemetry intake. +func (d *dependencies) loadDeps() []transport.Dependency { + if d.DependencyLoader == nil { + return nil + } + + deps, ok := d.DependencyLoader() + if !ok { + log.Debug("telemetry: could not read build info, no dependencies will be reported") + return nil + } + + transportDeps := make([]transport.Dependency, 0, len(deps.Deps)) + for _, dep := range deps.Deps { + if dep == nil { + continue + } + + if dep.Replace != nil && dep.Replace.Version != "" { + dep = dep.Replace + } + + transportDeps = append(transportDeps, transport.Dependency{ + Name: dep.Path, + Version: strings.TrimPrefix(dep.Version, "v"), + }) + } + + return transportDeps +} diff --git a/internal/newtelemetry/distributions.go b/internal/newtelemetry/distributions.go new file mode 100644 index 0000000000..a556936731 --- /dev/null +++ b/internal/newtelemetry/distributions.go @@ -0,0 +1,90 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "sync" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/knownmetrics" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +type distributions struct { + store internal.SyncMap[metricKey, *distribution] + pool *internal.SyncPool[[]float64] + queueSize internal.Range[int] + skipAllowlist bool // Debugging feature to skip the allowlist of known metrics +} + +// LoadOrStore returns a MetricHandle for the given distribution metric. If the metric key does not exist, it will be created. +func (d *distributions) LoadOrStore(namespace Namespace, name string, tags []string) MetricHandle { + kind := transport.DistMetric + key := newMetricKey(namespace, kind, name, tags) + handle, loaded := d.store.LoadOrStore(key, &distribution{ + key: key, + values: internal.NewRingQueueWithPool[float64](d.queueSize, d.pool), + }) + if !loaded { // The metric is new: validate and log issues about it + if err := validateMetricKey(namespace, kind, name, tags); err != nil { + log.Warn("telemetry: %v", err) + } + } + + return handle +} + +func (d *distributions) Payload() transport.Payload { + series := make([]transport.DistributionSeries, 0, d.store.Len()) + d.store.Range(func(_ metricKey, handle *distribution) bool { + if payload := handle.payload(); payload.Namespace != "" { + series = append(series, payload) + } + return true + }) + + if len(series) == 0 { + return nil + } + + return transport.Distributions{Series: series, SkipAllowlist: d.skipAllowlist} +} + +type distribution struct { + key metricKey + values *internal.RingQueue[float64] + + logLoss sync.Once +} + +func (d *distribution) Submit(value float64) { + if !d.values.Enqueue(value) { + d.logLoss.Do(func() { + log.Debug("telemetry: distribution %q is losing values because the buffer is full", d.key.name) + }) + } +} + +func (d *distribution) Get() float64 { + return d.values.ReversePeek() +} + +func (d *distribution) payload() transport.DistributionSeries { + if d.values.IsEmpty() { + return transport.DistributionSeries{} + } + + data := transport.DistributionSeries{ + Metric: d.key.name, + Namespace: d.key.namespace, + Tags: d.key.SplitTags(), + Common: knownmetrics.IsCommonMetric(d.key.namespace, d.key.kind, d.key.name), + Points: d.values.Flush(), + } + + return data +} diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go new file mode 100644 index 0000000000..efd156bb0c --- /dev/null +++ b/internal/newtelemetry/globalclient.go @@ -0,0 +1,265 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +import ( + "sync" + "sync/atomic" + + globalinternal "gopkg.in/DataDog/dd-trace-go.v1/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +var ( + globalClient atomic.Pointer[Client] + + // globalClientRecorder contains all actions done on the global client done before StartApp() with an actual client object is called + globalClientRecorder = internal.NewRecorder[Client]() + + // metricsHandleSwappablePointers contains all the swappableMetricHandle, used to replay actions done before the actual MetricHandle is set + metricsHandleSwappablePointers internal.SyncMap[metricKey, *swappableMetricHandle] +) + +// GlobalClient returns the global telemetry client. +func GlobalClient() Client { + client := globalClient.Load() + if client == nil { + return nil + } + return *client +} + +// StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global (*client) +// then calls client.Flush on the client asynchronously. +func StartApp(client Client) { + if Disabled() { + return + } + + if GlobalClient() != nil { + log.Debug("telemetry: StartApp called multiple times, ignoring") + return + } + + SwapClient(client) + + client.AppStart() + + go func() { + client.Flush() + log.Debug("telemetry: sucessfully flushed the telemetry app-started payload") + }() +} + +// SwapClient swaps the global client with the given client and Flush the old (*client). +func SwapClient(client Client) { + if Disabled() { + return + } + + if oldClient := globalClient.Swap(&client); oldClient != nil && *oldClient != nil { + (*oldClient).Close() + } + + if client == nil { + return + } + + globalClientRecorder.Replay(client) + // Swap all metrics hot pointers to the new MetricHandle + metricsHandleSwappablePointers.Range(func(_ metricKey, value *swappableMetricHandle) bool { + value.swap(value.maker(client)) + return true + }) +} + +// MockClient swaps the global client with the given client and clears the recorder to make sure external calls are not replayed. +// It returns a function that can be used to swap back the global client +func MockClient(client Client) func() { + globalClientRecorder.Clear() + metricsHandleSwappablePointers.Range(func(key metricKey, _ *swappableMetricHandle) bool { + metricsHandleSwappablePointers.Delete(key) + return true + }) + + oldClient := GlobalClient() + SwapClient(client) + return func() { + SwapClient(oldClient) + } +} + +// StopApp creates the app-stopped telemetry, adding to the queue and Flush all the queue before stopping the (*client). +func StopApp() { + if client := globalClient.Swap(nil); client != nil && *client != nil { + (*client).AppStop() + (*client).Flush() + (*client).Close() + } +} + +var telemetryClientDisabled = !globalinternal.BoolEnv("DD_INSTRUMENTATION_TELEMETRY_ENABLED", true) + +// Disabled returns whether instrumentation telemetry is disabled +// according to the DD_INSTRUMENTATION_TELEMETRY_ENABLED env var +func Disabled() bool { + return telemetryClientDisabled +} + +// Count creates a new metric handle for the given parameters that can be used to submit values. +// Count will always return a [MetricHandle], even if telemetry is disabled or the client has yet to start. +// The [MetricHandle] is then swapped with the actual [MetricHandle] once the client is started. +func Count(namespace Namespace, name string, tags []string) MetricHandle { + return globalClientNewMetric(namespace, transport.CountMetric, name, tags) +} + +// Rate creates a new metric handle for the given parameters that can be used to submit values. +// Rate will always return a [MetricHandle], even if telemetry is disabled or the client has yet to start. +// The [MetricHandle] is then swapped with the actual [MetricHandle] once the client is started. +func Rate(namespace Namespace, name string, tags []string) MetricHandle { + return globalClientNewMetric(namespace, transport.RateMetric, name, tags) +} + +// Gauge creates a new metric handle for the given parameters that can be used to submit values. +// Gauge will always return a [MetricHandle], even if telemetry is disabled or the client has yet to start. +// The [MetricHandle] is then swapped with the actual [MetricHandle] once the client is started. +func Gauge(namespace Namespace, name string, tags []string) MetricHandle { + return globalClientNewMetric(namespace, transport.GaugeMetric, name, tags) +} + +// Distribution creates a new metric handle for the given parameters that can be used to submit values. +// Distribution will always return a [MetricHandle], even if telemetry is disabled or the client has yet to start. +// The [MetricHandle] is then swapped with the actual [MetricHandle] once the client is started. +// The Get() method of the [MetricHandle] will return the last value submitted. +// Distribution MetricHandle is advised to be held in a variable more than the rest of the metric types to avoid too many useless allocations. +func Distribution(namespace Namespace, name string, tags []string) MetricHandle { + return globalClientNewMetric(namespace, transport.DistMetric, name, tags) +} + +func Log(level LogLevel, text string, options ...LogOption) { + globalClientCall(func(client Client) { + client.Log(level, text, options...) + }) +} + +// ProductStarted declares a product to have started at the customer’s request. If telemetry is disabled, it will do nothing. +// If the telemetry client has not started yet, it will record the action and replay it once the client is started. +func ProductStarted(product Namespace) { + globalClientCall(func(client Client) { + client.ProductStarted(product) + }) +} + +// ProductStopped declares a product to have being stopped by the customer. If telemetry is disabled, it will do nothing. +// If the telemetry client has not started yet, it will record the action and replay it once the client is started. +func ProductStopped(product Namespace) { + globalClientCall(func(client Client) { + client.ProductStopped(product) + }) +} + +// ProductStartError declares that a product could not start because of the following error. If telemetry is disabled, it will do nothing. +// If the telemetry client has not started yet, it will record the action and replay it once the client is started. +func ProductStartError(product Namespace, err error) { + globalClientCall(func(client Client) { + client.ProductStartError(product, err) + }) +} + +// RegisterAppConfig adds a key value pair to the app configuration and send the change to telemetry +// value has to be json serializable and the origin is the source of the change. If telemetry is disabled, it will do nothing. +// If the telemetry client has not started yet, it will record the action and replay it once the client is started. +func RegisterAppConfig(key string, value any, origin Origin) { + globalClientCall(func(client Client) { + client.RegisterAppConfig(key, value, origin) + }) +} + +// RegisterAppConfigs adds a list of key value pairs to the app configuration and sends the change to telemetry. +// Same as AddAppConfig but for multiple values. If telemetry is disabled, it will do nothing. +// If the telemetry client has not started yet, it will record the action and replay it once the client is started. +func RegisterAppConfigs(kvs ...Configuration) { + globalClientCall(func(client Client) { + client.RegisterAppConfigs(kvs...) + }) +} + +// MarkIntegrationAsLoaded marks an integration as loaded in the telemetry. If telemetry is disabled +// or the client has not started yet it will record the action and replay it once the client is started. +func MarkIntegrationAsLoaded(integration Integration) { + globalClientCall(func(client Client) { + client.MarkIntegrationAsLoaded(integration) + }) +} + +// LoadIntegration marks an integration as loaded in the telemetry client. If telemetry is disabled, it will do nothing. +// If the telemetry client has not started yet, it will record the action and replay it once the client is started. +func LoadIntegration(integration string) { + globalClientCall(func(client Client) { + client.MarkIntegrationAsLoaded(Integration{ + Name: integration, + }) + }) +} + +var globalClientLogLossOnce sync.Once + +// globalClientCall takes a function that takes a Client and calls it with the global client if it exists. +// otherwise, it records the action for when the client is started. +func globalClientCall(fun func(client Client)) { + if Disabled() { + return + } + + client := globalClient.Load() + if client == nil || *client == nil { + if !globalClientRecorder.Record(fun) { + globalClientLogLossOnce.Do(func() { + log.Debug("telemetry: global client recorder queue is full, dropping telemetry data, please start the telemetry client earlier to avoid data loss") + }) + } + return + } + + fun(*client) +} + +var noopMetricHandleInstance = noopMetricHandle{} + +func globalClientNewMetric(namespace Namespace, kind transport.MetricType, name string, tags []string) MetricHandle { + if Disabled() { + return noopMetricHandleInstance + } + + maker := func(client Client) MetricHandle { + switch kind { + case transport.CountMetric: + return client.Count(namespace, name, tags) + case transport.RateMetric: + return client.Rate(namespace, name, tags) + case transport.GaugeMetric: + return client.Gauge(namespace, name, tags) + case transport.DistMetric: + return client.Distribution(namespace, name, tags) + } + log.Warn("telemetry: unknown metric type %q", kind) + return nil + } + key := newMetricKey(namespace, kind, name, tags) + wrapper := &swappableMetricHandle{maker: maker} + if client := globalClient.Load(); client == nil || *client == nil { + wrapper.recorder = internal.NewRecorder[MetricHandle]() + } + hotPtr, loaded := metricsHandleSwappablePointers.LoadOrStore(key, wrapper) + if !loaded { + globalClientCall(func(client Client) { + hotPtr.swap(maker(client)) + }) + } + return hotPtr +} diff --git a/internal/newtelemetry/integration.go b/internal/newtelemetry/integration.go new file mode 100644 index 0000000000..d77ef116cd --- /dev/null +++ b/internal/newtelemetry/integration.go @@ -0,0 +1,41 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "sync" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +type integrations struct { + mu sync.Mutex + integrations []transport.Integration +} + +func (i *integrations) Add(integration Integration) { + i.mu.Lock() + defer i.mu.Unlock() + i.integrations = append(i.integrations, transport.Integration{ + Name: integration.Name, + Version: integration.Version, + Enabled: integration.Error == "", // no error means the integration was enabled successfully + Error: integration.Error, + }) +} + +func (i *integrations) Payload() transport.Payload { + i.mu.Lock() + defer i.mu.Unlock() + if len(i.integrations) == 0 { + return nil + } + integrations := i.integrations + i.integrations = nil + return transport.AppIntegrationChange{ + Integrations: integrations, + } +} diff --git a/internal/newtelemetry/internal/knownmetrics/common_metrics.json b/internal/newtelemetry/internal/knownmetrics/common_metrics.json new file mode 100644 index 0000000000..8f79ae690d --- /dev/null +++ b/internal/newtelemetry/internal/knownmetrics/common_metrics.json @@ -0,0 +1,877 @@ +[ + { + "namespace": "appsec", + "type": "count", + "name": "instrum.user_auth.missing_user_id" + }, + { + "namespace": "appsec", + "type": "count", + "name": "instrum.user_auth.missing_user_login" + }, + { + "namespace": "appsec", + "type": "count", + "name": "rasp.rule.eval" + }, + { + "namespace": "appsec", + "type": "count", + "name": "rasp.rule.match" + }, + { + "namespace": "appsec", + "type": "count", + "name": "rasp.timeout" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.config_errors" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.init" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.input_truncated" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.requests" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.updates" + }, + { + "namespace": "appsec", + "type": "distribution", + "name": "waf.duration" + }, + { + "namespace": "appsec", + "type": "distribution", + "name": "waf.duration_ext" + }, + { + "namespace": "appsec", + "type": "distribution", + "name": "waf.truncated_value_size" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "code_coverage.errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "code_coverage.is_empty" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "code_coverage_finished" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "code_coverage_started" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "early_flake_detection.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "early_flake_detection.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "endpoint_payload.dropped" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "endpoint_payload.requests" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "endpoint_payload.requests_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "event_created" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "event_finished" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "events_enqueued_for_serialization" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "flaky_tests.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "flaky_tests.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git.command" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git.command_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.objects_pack" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.objects_pack_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.search_commits" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.search_commits_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.settings" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.settings_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.settings_response" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "impacted_tests_detection.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "impacted_tests_detection.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_forced_run" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skippable_tests.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skippable_tests.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skippable_tests.response_suites" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skippable_tests.response_tests" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skipped" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_unskippable" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "known_tests.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "known_tests.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "manual_api_events" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "test_session" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "code_coverage.files" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "early_flake_detection.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "early_flake_detection.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "early_flake_detection.response_tests" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "endpoint_payload.bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "endpoint_payload.events_count" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "endpoint_payload.events_serialization_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "endpoint_payload.requests_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "flaky_tests.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "flaky_tests.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "flaky_tests.response_tests" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git.command_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.objects_pack_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.objects_pack_files" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.objects_pack_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.search_commits_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.settings_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "impacted_tests_detection.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "impacted_tests_detection.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "impacted_tests_detection.response_files" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "itr_skippable_tests.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "itr_skippable_tests.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "known_tests.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "known_tests.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "known_tests.response_tests" + }, + { + "namespace": "general", + "type": "count", + "name": "logs_created" + }, + { + "namespace": "general", + "type": "distribution", + "name": "init_time" + }, + { + "namespace": "general", + "type": "distribution", + "name": "tracer_init_time" + }, + { + "namespace": "iast", + "type": "count", + "name": "executed.propagation" + }, + { + "namespace": "iast", + "type": "count", + "name": "executed.sink" + }, + { + "namespace": "iast", + "type": "count", + "name": "executed.source" + }, + { + "namespace": "iast", + "type": "count", + "name": "executed.tainted" + }, + { + "namespace": "iast", + "type": "count", + "name": "instrumented.propagation" + }, + { + "namespace": "iast", + "type": "count", + "name": "instrumented.sink" + }, + { + "namespace": "iast", + "type": "count", + "name": "instrumented.source" + }, + { + "namespace": "iast", + "type": "count", + "name": "json.tag.size.exceeded" + }, + { + "namespace": "iast", + "type": "count", + "name": "request.tainted" + }, + { + "namespace": "iast", + "type": "count", + "name": "suppressed.vulnerabilities" + }, + { + "namespace": "mlobs", + "type": "count", + "name": "evaluators.error" + }, + { + "namespace": "mlobs", + "type": "count", + "name": "evaluators.init" + }, + { + "namespace": "mlobs", + "type": "count", + "name": "evaluators.run" + }, + { + "namespace": "mlobs", + "type": "count", + "name": "span.start" + }, + { + "namespace": "mlobs", + "type": "distribution", + "name": "evaluators.rule_sample_rate" + }, + { + "namespace": "profilers", + "type": "count", + "name": "profile_api.errors" + }, + { + "namespace": "profilers", + "type": "count", + "name": "profile_api.requests" + }, + { + "namespace": "profilers", + "type": "count", + "name": "profile_api.responses" + }, + { + "namespace": "profilers", + "type": "count", + "name": "ssi_heuristic.number_of_profiles" + }, + { + "namespace": "profilers", + "type": "count", + "name": "ssi_heuristic.number_of_runtime_id" + }, + { + "namespace": "profilers", + "type": "distribution", + "name": "profile_api.bytes" + }, + { + "namespace": "profilers", + "type": "distribution", + "name": "profile_api.ms" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.content_security_policy" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.failed" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.initialization.failed" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.initialization.succeed" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.installation" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.skipped" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.succeed" + }, + { + "namespace": "rum", + "type": "distribution", + "name": "injection.installation.duration" + }, + { + "namespace": "rum", + "type": "distribution", + "name": "injection.ms" + }, + { + "namespace": "rum", + "type": "distribution", + "name": "injection.response.bytes" + }, + { + "namespace": "sidecar", + "type": "count", + "name": "server.submitted_payloads" + }, + { + "namespace": "sidecar", + "type": "distribution", + "name": "server.memory_usage" + }, + { + "namespace": "sidecar", + "type": "gauge", + "name": "server.active_sessions" + }, + { + "namespace": "telemetry", + "type": "count", + "name": "telemetry_api.errors" + }, + { + "namespace": "telemetry", + "type": "count", + "name": "telemetry_api.requests" + }, + { + "namespace": "telemetry", + "type": "count", + "name": "telemetry_api.responses" + }, + { + "namespace": "telemetry", + "type": "distribution", + "name": "telemetry_api.bytes" + }, + { + "namespace": "telemetry", + "type": "distribution", + "name": "telemetry_api.ms" + }, + { + "namespace": "tracers", + "type": "count", + "name": "context_header.truncated" + }, + { + "namespace": "tracers", + "type": "count", + "name": "context_header_style.extracted" + }, + { + "namespace": "tracers", + "type": "count", + "name": "context_header_style.injected" + }, + { + "namespace": "tracers", + "type": "count", + "name": "docker_lib_injection.failure" + }, + { + "namespace": "tracers", + "type": "count", + "name": "docker_lib_injection.success" + }, + { + "namespace": "tracers", + "type": "count", + "name": "exporter_fallback" + }, + { + "namespace": "tracers", + "type": "count", + "name": "host_lib_injection.failure" + }, + { + "namespace": "tracers", + "type": "count", + "name": "host_lib_injection.success" + }, + { + "namespace": "tracers", + "type": "count", + "name": "inject.error" + }, + { + "namespace": "tracers", + "type": "count", + "name": "inject.language_detection" + }, + { + "namespace": "tracers", + "type": "count", + "name": "inject.skip" + }, + { + "namespace": "tracers", + "type": "count", + "name": "inject.success" + }, + { + "namespace": "tracers", + "type": "count", + "name": "integration_errors" + }, + { + "namespace": "tracers", + "type": "count", + "name": "k8s_lib_injection.failure" + }, + { + "namespace": "tracers", + "type": "count", + "name": "k8s_lib_injection.success" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.abort" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.abort.integration" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.abort.runtime" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.complete" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.error" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.injector.error" + }, + { + "namespace": "tracers", + "type": "count", + "name": "otel.env.hiding" + }, + { + "namespace": "tracers", + "type": "count", + "name": "otel.env.invalid" + }, + { + "namespace": "tracers", + "type": "count", + "name": "otel.env.unsupported" + }, + { + "namespace": "tracers", + "type": "count", + "name": "span_created" + }, + { + "namespace": "tracers", + "type": "count", + "name": "span_finished" + }, + { + "namespace": "tracers", + "type": "count", + "name": "span_pointer_calculation" + }, + { + "namespace": "tracers", + "type": "count", + "name": "span_pointer_calculation.issue" + }, + { + "namespace": "tracers", + "type": "count", + "name": "spans_created" + }, + { + "namespace": "tracers", + "type": "count", + "name": "spans_dropped" + }, + { + "namespace": "tracers", + "type": "count", + "name": "spans_enqueued_for_serialization" + }, + { + "namespace": "tracers", + "type": "count", + "name": "spans_finished" + }, + { + "namespace": "tracers", + "type": "count", + "name": "stats_api.errors" + }, + { + "namespace": "tracers", + "type": "count", + "name": "stats_api.requests" + }, + { + "namespace": "tracers", + "type": "count", + "name": "stats_api.responses" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_api.errors" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_api.requests" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_api.responses" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_chunks_dropped" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_chunks_enqueued" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_chunks_enqueued_for_serialization" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_chunks_sent" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_partial_flush.count" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_segments_closed" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_segments_created" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "inject.latency.baseline" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "inject.latency.end_to_end" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "inject.latency.init_container" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "stats_api.bytes" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "stats_api.ms" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_api.bytes" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_api.ms" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_chunk_serialization.bytes" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_chunk_serialization.ms" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_chunk_size" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_partial_flush.spans_closed" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_partial_flush.spans_remaining" + }, + { + "namespace": "tracers", + "type": "gauge", + "name": "stats_buckets" + } +] diff --git a/internal/newtelemetry/internal/knownmetrics/generator/generator.go b/internal/newtelemetry/internal/knownmetrics/generator/generator.go new file mode 100644 index 0000000000..1cdf13ff38 --- /dev/null +++ b/internal/newtelemetry/internal/knownmetrics/generator/generator.go @@ -0,0 +1,166 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package main + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "flag" + "fmt" + "net/http" + "os" + "os/exec" + "path/filepath" + "runtime" + "slices" + "strings" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/knownmetrics" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +// This represents the base64-encoded URL of api.github.com to download the configuration file. +// This can be easily decoded manually, but it is encoded to prevent the URL from being scanned by bots. +const ( + commonMetricsURL = "aHR0cHM6Ly9hcGkuZ2l0aHViLmNvbS9yZXBvcy9EYXRhRG9nL2RkLWdvL2NvbnRlbnRzL3RyYWNlL2FwcHMvdHJhY2VyLXRlbGVtZXRyeS1pbnRha2UvdGVsZW1ldHJ5LW1ldHJpY3Mvc3RhdGljL2NvbW1vbl9tZXRyaWNzLmpzb24=" + goMetricsURL = "aHR0cHM6Ly9hcGkuZ2l0aHViLmNvbS9yZXBvcy9EYXRhRG9nL2RkLWdvL2NvbnRlbnRzL3RyYWNlL2FwcHMvdHJhY2VyLXRlbGVtZXRyeS1pbnRha2UvdGVsZW1ldHJ5LW1ldHJpY3Mvc3RhdGljL2dvbGFuZ19tZXRyaWNzLmpzb24=" +) + +func base64Decode(encoded string) string { + decoded, _ := base64.StdEncoding.DecodeString(encoded) + return string(decoded) +} + +func downloadFromDdgo(remoteURL, localPath, branch, token string, getMetricNames func(map[string]any) []knownmetrics.Declaration) error { + request, err := http.NewRequest(http.MethodGet, remoteURL, nil) + if err != nil { + return err + } + + // Following the documentation described here: + // https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28 + + request.Header.Add("Authorization", "Bearer "+token) + request.Header.Add("Accept", "application/vnd.github.v3.raw") + request.Header.Add("X-GitHub-Api-Version", "2022-11-28") + request.URL.Query().Add("ref", branch) + + response, err := http.DefaultClient.Do(request) + if err != nil { + return err + } + + defer response.Body.Close() + + if response.StatusCode != http.StatusOK { + return fmt.Errorf("unexpected status code: %s", response.Status) + } + + var decoded map[string]any + + if err := json.NewDecoder(response.Body).Decode(&decoded); err != nil { + return err + } + + metricNames := getMetricNames(decoded) + slices.SortStableFunc(metricNames, func(i, j knownmetrics.Declaration) int { + if i.Namespace != j.Namespace { + return strings.Compare(string(i.Namespace), string(j.Namespace)) + } + if i.Type != j.Type { + return strings.Compare(string(i.Type), string(j.Type)) + } + return strings.Compare(i.Name, j.Name) + }) + + fp, err := os.Create(localPath) + if err != nil { + return err + } + + defer fp.Close() + + encoder := json.NewEncoder(fp) + encoder.SetIndent("", " ") + return encoder.Encode(metricNames) +} + +func getCommonMetricNames(input map[string]any) []knownmetrics.Declaration { + var names []knownmetrics.Declaration + for category, value := range input { + if strings.HasPrefix(category, "$") { + continue + } + + metrics := value.(map[string]any) + for metricKey, value := range metrics { + metric := knownmetrics.Declaration{ + Namespace: transport.Namespace(category), + Name: metricKey, + Type: transport.MetricType(value.(map[string]any)["metric_type"].(string)), + } + names = append(names, metric) + if aliases, ok := value.(map[string]any)["aliases"]; ok { + for _, alias := range aliases.([]any) { + metric.Name = alias.(string) + names = append(names, metric) + } + } + } + } + return names +} + +func getGoMetricNames(input map[string]any) []knownmetrics.Declaration { + var names []knownmetrics.Declaration + for key, value := range input { + if strings.HasPrefix(key, "$") { + continue + } + names = append(names, knownmetrics.Declaration{ + Name: key, + Type: transport.MetricType(value.(map[string]any)["metric_type"].(string)), + }) + } + return names +} + +func main() { + branch := flag.String("branch", "prod", "The branch to get the configuration from") + flag.Parse() + + githubToken := os.Getenv("GITHUB_TOKEN") + if githubToken == "" { + if _, err := exec.LookPath("gh"); err != nil { + fmt.Println("Please specify a GITHUB_TOKEN environment variable or install the GitHub CLI.") + os.Exit(2) + } + + var buf bytes.Buffer + cmd := exec.Command("gh", "auth", "token") + cmd.Stdout = &buf + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + fmt.Println("Failed to run `gh auth token`:", err) + os.Exit(1) + } + + githubToken = strings.TrimSpace(buf.String()) + } + + _, thisFile, _, _ := runtime.Caller(0) + dir := filepath.Dir(thisFile) + if err := downloadFromDdgo(base64Decode(commonMetricsURL), filepath.Join(dir, "..", "common_metrics.json"), *branch, githubToken, getCommonMetricNames); err != nil { + fmt.Println("Failed to download common metrics:", err) + os.Exit(1) + } + + if err := downloadFromDdgo(base64Decode(goMetricsURL), filepath.Join(dir, "..", "golang_metrics.json"), *branch, githubToken, getGoMetricNames); err != nil { + fmt.Println("Failed to download golang metrics:", err) + os.Exit(1) + } +} diff --git a/internal/newtelemetry/internal/knownmetrics/golang_metrics.json b/internal/newtelemetry/internal/knownmetrics/golang_metrics.json new file mode 100644 index 0000000000..c2c581bf85 --- /dev/null +++ b/internal/newtelemetry/internal/knownmetrics/golang_metrics.json @@ -0,0 +1,7 @@ +[ + { + "namespace": "", + "type": "gauge", + "name": "orchestrion.enabled" + } +] diff --git a/internal/newtelemetry/internal/knownmetrics/known_metrics.go b/internal/newtelemetry/internal/knownmetrics/known_metrics.go new file mode 100644 index 0000000000..4fb1c6b079 --- /dev/null +++ b/internal/newtelemetry/internal/knownmetrics/known_metrics.go @@ -0,0 +1,63 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +//go:generate go run gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/knownmetrics/generator + +package knownmetrics + +import ( + _ "embed" + "encoding/json" + "slices" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +//go:embed common_metrics.json +var commonMetricsJSON []byte + +//go:embed golang_metrics.json +var golangMetricsJSON []byte + +type Declaration struct { + Namespace transport.Namespace `json:"namespace"` + Type transport.MetricType `json:"type"` + Name string `json:"name"` +} + +var ( + commonMetrics = parseMetricNames(commonMetricsJSON) + golangMetrics = parseMetricNames(golangMetricsJSON) +) + +func parseMetricNames(bytes []byte) []Declaration { + var names []Declaration + if err := json.Unmarshal(bytes, &names); err != nil { + log.Error("telemetry: failed to parse metric names: %v", err) + } + return names +} + +// IsKnownMetric returns true if the given metric name is a known metric by the backend +// This is linked to generated common_metrics.json file and golang_metrics.json file. If you added new metrics to the backend, you should rerun the generator. +func IsKnownMetric(namespace transport.Namespace, typ transport.MetricType, name string) bool { + decl := Declaration{Namespace: namespace, Type: typ, Name: name} + return slices.Contains(commonMetrics, decl) || slices.Contains(golangMetrics, decl) +} + +// IsCommonMetric returns true if the given metric name is a known common (cross-language) metric by the backend +// This is linked to the generated common_metrics.json file. If you added new metrics to the backend, you should rerun the generator. +func IsCommonMetric(namespace transport.Namespace, typ transport.MetricType, name string) bool { + decl := Declaration{Namespace: namespace, Type: typ, Name: name} + return slices.Contains(commonMetrics, decl) +} + +// IsLanguageMetric returns true if the given metric name is a known Go language metric by the backend +// This is linked to the generated golang_metrics.json file. If you added new metrics to the backend, you should rerun the generator. +func IsLanguageMetric(typ transport.MetricType, name string) bool { + decl := Declaration{Type: typ, Name: name} + return slices.Contains(golangMetrics, decl) +} diff --git a/internal/newtelemetry/internal/mapper/app_closing.go b/internal/newtelemetry/internal/mapper/app_closing.go new file mode 100644 index 0000000000..aeba9c7adc --- /dev/null +++ b/internal/newtelemetry/internal/mapper/app_closing.go @@ -0,0 +1,23 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package mapper + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +// NewAppClosingMapper returns a new Mapper that appends an AppClosing payload to the given payloads and calls the underlying Mapper with it. +func NewAppClosingMapper(next Mapper) Mapper { + return &appClosingEnricher{next: next} +} + +type appClosingEnricher struct { + next Mapper +} + +func (t *appClosingEnricher) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { + return t.next.Transform(append(payloads, transport.AppClosing{})) +} diff --git a/internal/newtelemetry/internal/mapper/app_started.go b/internal/newtelemetry/internal/mapper/app_started.go new file mode 100644 index 0000000000..f281672a17 --- /dev/null +++ b/internal/newtelemetry/internal/mapper/app_started.go @@ -0,0 +1,48 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package mapper + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +type appStartedReducer struct { + next Mapper +} + +// NewAppStartedMapper returns a new Mapper that adds an AppStarted payload to the beginning of all payloads +// and pass it down to irs underlying mapper. +// The AppStarted payload ingest the [transport.AppClientConfigurationChange] and [transport.AppProductChange] payloads +func NewAppStartedMapper(next Mapper) Mapper { + return &appStartedReducer{next: next} +} + +func (t *appStartedReducer) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { + appStarted := transport.AppStarted{ + InstallSignature: transport.InstallSignature{ + InstallID: globalconfig.InstrumentationInstallID(), + InstallType: globalconfig.InstrumentationInstallType(), + InstallTime: globalconfig.InstrumentationInstallTime(), + }, + } + + payloadLefts := make([]transport.Payload, 0, len(payloads)) + for _, payload := range payloads { + switch payload := payload.(type) { + case transport.AppClientConfigurationChange: + appStarted.Configuration = payload.Configuration + case transport.AppProductChange: + appStarted.Products = payload.Products + default: + payloadLefts = append(payloadLefts, payload) + } + } + + // The app-started event should be the first event in the payload and not in an message-batch + payloads, mapper := t.next.Transform(payloadLefts) + return append([]transport.Payload{appStarted}, payloads...), mapper +} diff --git a/internal/newtelemetry/internal/mapper/default.go b/internal/newtelemetry/internal/mapper/default.go new file mode 100644 index 0000000000..12f75a5fb8 --- /dev/null +++ b/internal/newtelemetry/internal/mapper/default.go @@ -0,0 +1,99 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package mapper + +import ( + "time" + + "golang.org/x/time/rate" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +// NewDefaultMapper returns a Mapper that transforms payloads into a MessageBatch and adds a heartbeat message. +// The heartbeat message is added every heartbeatInterval. +func NewDefaultMapper(heartbeatInterval, extendedHeartBeatInterval time.Duration) Mapper { + mapper := &defaultMapper{ + heartbeatEnricher: heartbeatEnricher{ + RL: rate.NewLimiter(rate.Every(heartbeatInterval), 1), + extendedHeartbeatRL: rate.NewLimiter(rate.Every(extendedHeartBeatInterval), 1), + }, + } + + // The rate limiter is initialized with a token, but we want the first heartbeat to be sent in one minute, so we consume the token + mapper.heartbeatEnricher.RL.Allow() + mapper.heartbeatEnricher.extendedHeartbeatRL.Allow() + return mapper +} + +type defaultMapper struct { + heartbeatEnricher + messageBatchReducer +} + +func (t *defaultMapper) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { + payloads, _ = t.heartbeatEnricher.Transform(payloads) + payloads, _ = t.messageBatchReducer.Transform(payloads) + return payloads, t +} + +type messageBatchReducer struct{} + +func (t *messageBatchReducer) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { + if len(payloads) <= 1 { + return payloads, t + } + + messages := make([]transport.Message, len(payloads)) + for i, payload := range payloads { + messages[i] = transport.Message{ + RequestType: payload.RequestType(), + Payload: payload, + } + } + + return []transport.Payload{transport.MessageBatch(messages)}, t +} + +type heartbeatEnricher struct { + RL *rate.Limiter + extendedHeartbeatRL *rate.Limiter + + extendedHeartbeat transport.AppExtendedHeartbeat + heartBeat transport.AppHeartbeat +} + +func (t *heartbeatEnricher) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { + // Built the extended heartbeat using other payloads + // Composition described here: + // https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/main/GeneratedDocumentation/ApiDocs/v2/producing-telemetry.md#app-extended-heartbeat + for _, payload := range payloads { + switch payload := payload.(type) { + case transport.AppStarted: + // Should be sent only once anyway + t.extendedHeartbeat.Configuration = payload.Configuration + case transport.AppDependenciesLoaded: + if t.extendedHeartbeat.Dependencies == nil { + t.extendedHeartbeat.Dependencies = payload.Dependencies + } + case transport.AppIntegrationChange: + // The number of integrations should be small enough so we can just append to the list + t.extendedHeartbeat.Integrations = append(t.extendedHeartbeat.Integrations, payload.Integrations...) + } + } + + if !t.RL.Allow() { + // We don't send anything + return payloads, t + } + + if t.extendedHeartbeatRL.Allow() { + // We have an extended heartbeat to send + return append(payloads, t.extendedHeartbeat), t + } + + return append(payloads, t.heartBeat), t +} diff --git a/internal/newtelemetry/internal/mapper/mapper.go b/internal/newtelemetry/internal/mapper/mapper.go new file mode 100644 index 0000000000..84a376fee6 --- /dev/null +++ b/internal/newtelemetry/internal/mapper/mapper.go @@ -0,0 +1,17 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package mapper + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +// Mapper is an interface for transforming payloads to comply with different types of lifecycle events in the application. +type Mapper interface { + // Transform transforms the given payloads and returns the transformed payloads and the Mapper to use for the next + // transformation at the next flush a minute later + Transform([]transport.Payload) ([]transport.Payload, Mapper) +} diff --git a/internal/newtelemetry/internal/range.go b/internal/newtelemetry/internal/range.go new file mode 100644 index 0000000000..c2e1e1852f --- /dev/null +++ b/internal/newtelemetry/internal/range.go @@ -0,0 +1,39 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "cmp" +) + +// Range is a type that represents a range of values. +type Range[T cmp.Ordered] struct { + Min T + Max T +} + +// IsOrdered checks if the range is ordered. e.g. Min <= Max. +func (r Range[T]) IsOrdered() bool { + return r.Min <= r.Max +} + +// Contains checks if a value is within the range. +func (r Range[T]) Contains(value T) bool { + return value >= r.Min && value <= r.Max +} + +// Clamp squeezes a value between a minimum and maximum value. +func (r Range[T]) Clamp(value T) T { + return max(min(r.Max, value), r.Min) +} + +// ReduceMax returns a new range where value is the new max and min is either the current min or the new value to make sure the range is ordered. +func (r Range[T]) ReduceMax(value T) Range[T] { + return Range[T]{ + Min: min(r.Min, value), + Max: value, + } +} diff --git a/internal/newtelemetry/internal/recorder.go b/internal/newtelemetry/internal/recorder.go new file mode 100644 index 0000000000..69c6fdd7f4 --- /dev/null +++ b/internal/newtelemetry/internal/recorder.go @@ -0,0 +1,53 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +// Recorder is a generic thread-safe type that records functions that could have taken place before object T was created. +// Once object T is created, the Recorder can replay all the recorded functions with object T as an argument. +type Recorder[T any] struct { + queue *RingQueue[func(T)] +} + +// TODO: tweak these values once we get telemetry data from the telemetry client +var queueCap = Range[int]{ + Min: 16, // Initial queue capacity + Max: 512, // Maximum queue capacity +} + +// NewRecorder creates a new [Recorder] instance. with 512 as the maximum number of recorded functions before overflowing. +func NewRecorder[T any]() Recorder[T] { + return Recorder[T]{ + queue: NewRingQueue[func(T)](queueCap), + } +} + +// Record takes a function and records it in the [Recorder]'s queue. If the queue is full, it returns false. +// Once [Recorder.Replay] is called, all recorded functions will be replayed with object T as an argument in order of recording. +func (r Recorder[T]) Record(f func(T)) bool { + if r.queue == nil { + return true + } + return r.queue.Enqueue(f) +} + +// Replay uses T as an argument to replay all recorded functions in order of recording. +func (r Recorder[T]) Replay(t T) { + if r.queue == nil { + return + } + for { + f := r.queue.Dequeue() + if f == nil { + break + } + f(t) + } +} + +// Clear clears the Recorder's queue. +func (r Recorder[T]) Clear() { + r.queue.Clear() +} diff --git a/internal/newtelemetry/internal/ringbuffer.go b/internal/newtelemetry/internal/ringbuffer.go new file mode 100644 index 0000000000..aeccfb3e06 --- /dev/null +++ b/internal/newtelemetry/internal/ringbuffer.go @@ -0,0 +1,191 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package internal + +import ( + "sync" +) + +// RingQueue is a thread-safe ring buffer can be used to store a fixed number of elements and overwrite old values when full. +type RingQueue[T any] struct { + buffer []T + head, tail, count int + // mu is the lock for the buffer, head and tail. + mu sync.Mutex + // pool is the pool of buffers. Normally there should only be one or 2 buffers in the pool. + pool *SyncPool[[]T] + // BufferSizes is the range of buffer sizes that the ring queue can have. + BufferSizes Range[int] +} + +// NewRingQueue creates a new RingQueue with a minimum size and a maximum size. +func NewRingQueue[T any](rang Range[int]) *RingQueue[T] { + return &RingQueue[T]{ + buffer: make([]T, rang.Min), + pool: NewSyncPool[[]T](func() []T { return make([]T, rang.Min) }), + BufferSizes: rang, + } +} + +// NewRingQueueWithPool creates a new RingQueue with a minimum size, a maximum size and a pool. Make sure the pool is properly initialized with the right type +func NewRingQueueWithPool[T any](rang Range[int], pool *SyncPool[[]T]) *RingQueue[T] { + return &RingQueue[T]{ + buffer: make([]T, rang.Min), + pool: pool, + BufferSizes: rang, + } +} + +// Length returns the number of elements currently stored in the queue. +func (rq *RingQueue[T]) Length() int { + rq.mu.Lock() + defer rq.mu.Unlock() + + return rq.count +} + +func (rq *RingQueue[T]) resizeLocked() { + newBuf := make([]T, rq.BufferSizes.Clamp(rq.count*2)) + defer rq.releaseBuffer(rq.buffer) + + if rq.tail > rq.head { + copy(newBuf, rq.buffer[rq.head:rq.tail]) + } else { + n := copy(newBuf, rq.buffer[rq.head:]) + copy(newBuf[n:], rq.buffer[:rq.tail]) + } + + rq.head = 0 + rq.tail = rq.count + rq.buffer = newBuf +} + +func (rq *RingQueue[T]) enqueueLocked(elem T) bool { + spaceLeft := true + if rq.count == len(rq.buffer) { + if len(rq.buffer) == rq.BufferSizes.Max { + spaceLeft = false + // bitwise modulus + rq.head = (rq.head + 1) % len(rq.buffer) + rq.count-- + } else { + rq.resizeLocked() + } + } + + rq.buffer[rq.tail] = elem + rq.tail = (rq.tail + 1) % len(rq.buffer) + rq.count++ + return spaceLeft +} + +// ReversePeek returns the last element that was enqueued without removing it. +func (rq *RingQueue[T]) ReversePeek() T { + rq.mu.Lock() + defer rq.mu.Unlock() + if rq.count == 0 { + var zero T + return zero + } + return rq.buffer[(rq.tail-1+len(rq.buffer))%len(rq.buffer)] +} + +// Enqueue adds one or multiple values to the buffer. Returns false if at least one item had to be pulled out from the queue to make space for new ones +func (rq *RingQueue[T]) Enqueue(vals ...T) bool { + rq.mu.Lock() + defer rq.mu.Unlock() + if len(vals) == 0 { + return rq.IsFull() + } + + spaceLeft := true + for _, val := range vals { + spaceLeft = rq.enqueueLocked(val) + } + return spaceLeft +} + +// Dequeue removes a value from the buffer. +func (rq *RingQueue[T]) Dequeue() T { + rq.mu.Lock() + defer rq.mu.Unlock() + + if rq.count == 0 { + var zero T + return zero + } + + ret := rq.buffer[rq.head] + // bitwise modulus + rq.head = (rq.head + 1) % len(rq.buffer) + rq.count-- + return ret +} + +// getBuffer returns the current buffer and resets it. +func (rq *RingQueue[T]) getBuffer() []T { + rq.mu.Lock() + defer rq.mu.Unlock() + return rq.getBufferLocked() +} + +func (rq *RingQueue[T]) getBufferLocked() []T { + prevBuf := rq.buffer + rq.buffer = rq.pool.Get() + rq.head, rq.tail, rq.count = 0, 0, 0 + return prevBuf +} + +// Flush returns a copy of the buffer and resets it. +func (rq *RingQueue[T]) Flush() []T { + rq.mu.Lock() + head, count := rq.head, rq.count + buf := rq.getBufferLocked() + rq.mu.Unlock() + + defer rq.releaseBuffer(buf) + + copyBuf := make([]T, count) + for i := 0; i < count; i++ { + copyBuf[i] = buf[(head+i)%len(buf)] + } + + return copyBuf +} + +// releaseBuffer returns the buffer to the pool. +func (rq *RingQueue[T]) releaseBuffer(buf []T) { + var zero T + buf = buf[:cap(buf)] // Make sure nobody reduced the length of the buffer + for i := range buf { + buf[i] = zero + } + rq.pool.Put(buf) +} + +// IsEmpty returns true if the buffer is empty. +func (rq *RingQueue[T]) IsEmpty() bool { + return rq.Length() == 0 +} + +// IsFull returns true if the buffer is full and cannot accept more elements. +func (rq *RingQueue[T]) IsFull() bool { + rq.mu.Lock() + defer rq.mu.Unlock() + + return len(rq.buffer) == rq.count && len(rq.buffer) == rq.BufferSizes.Max +} + +// Clear removes all elements from the buffer. +func (rq *RingQueue[T]) Clear() { + rq.mu.Lock() + defer rq.mu.Unlock() + rq.head, rq.tail, rq.count = 0, 0, 0 + var zero T + for i := range rq.buffer { + rq.buffer[i] = zero + } +} diff --git a/internal/newtelemetry/internal/ringbuffer_test.go b/internal/newtelemetry/internal/ringbuffer_test.go new file mode 100644 index 0000000000..9b98eebc10 --- /dev/null +++ b/internal/newtelemetry/internal/ringbuffer_test.go @@ -0,0 +1,165 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestEnqueueSingleElement(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 5}) + success := queue.Enqueue(1) + assert.True(t, success) + assert.Equal(t, 1, queue.ReversePeek()) +} + +func TestEnqueueMultipleElements(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 5}) + success := queue.Enqueue(1, 2, 3) + assert.True(t, success) + assert.Equal(t, 3, queue.ReversePeek()) +} + +func TestEnqueueBeyondCapacity(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 5}) + success := queue.Enqueue(1, 2, 3, 4, 5, 6) + assert.False(t, success) + assert.Equal(t, 6, queue.ReversePeek()) +} + +func TestDequeueSingleElement(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 5}) + queue.Enqueue(1) + val := queue.Dequeue() + assert.Equal(t, 1, val) +} + +func TestDequeueMultipleElements(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 5}) + queue.Enqueue(1, 2, 3) + val1 := queue.Dequeue() + val2 := queue.Dequeue() + assert.Equal(t, 1, val1) + assert.Equal(t, 2, val2) +} + +func TestDequeueFromEmptyQueueAfterBeingFull(t *testing.T) { + queue := NewRingQueue[int](Range[int]{1, 1}) + assert.True(t, queue.Enqueue(1)) + assert.False(t, queue.Enqueue(2)) + assert.Equal(t, []int{2}, queue.Flush()) + + // Should return the zero value for int + assert.Equal(t, 0, queue.Dequeue()) +} + +func TestDequeueFromEmptyQueue(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 5}) + val := queue.Dequeue() + assert.Equal(t, 0, val) // Assuming zero value for int +} + +func TestGetBufferAndReset(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 5}) + queue.Enqueue(1, 2, 3) + buffer := queue.getBuffer() + assert.Equal(t, []int{1, 2, 3}, buffer) + assert.True(t, queue.IsEmpty()) +} + +func TestFlushAndReset(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 5}) + queue.Enqueue(1, 2, 3) + buffer := queue.Flush() + assert.Equal(t, []int{1, 2, 3}, buffer) + assert.True(t, queue.IsEmpty()) +} + +func TestIsEmptyWhenQueueIsEmpty(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 5}) + assert.True(t, queue.IsEmpty()) +} + +func TestIsEmptyWhenQueueIsNotEmpty(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 5}) + queue.Enqueue(1) + assert.False(t, queue.IsEmpty()) +} + +func TestIsFullWhenQueueIsFull(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 5}) + queue.Enqueue(1, 2, 3, 4, 5) + assert.True(t, queue.IsFull()) +} + +func TestIsFullWhenQueueIsNotFull(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 5}) + queue.Enqueue(1, 2, 3) + assert.False(t, queue.IsFull()) +} + +func TestEnqueueToFullQueue(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 3}) + success := queue.Enqueue(1, 2, 3) + assert.True(t, success) + success = queue.Enqueue(4) + assert.False(t, success) + assert.Equal(t, 4, queue.ReversePeek()) +} + +func TestDequeueFromFullQueue(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 3}) + queue.Enqueue(1, 2, 3) + val := queue.Dequeue() + assert.Equal(t, 1, val) + assert.False(t, queue.IsFull()) +} + +func TestEnqueueAfterDequeue(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 3}) + queue.Enqueue(1, 2, 3) + queue.Dequeue() + success := queue.Enqueue(4) + assert.True(t, success) + assert.Equal(t, 4, queue.ReversePeek()) +} + +func TestDequeueAllElements(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 3}) + queue.Enqueue(1, 2, 3) + val1 := queue.Dequeue() + val2 := queue.Dequeue() + val3 := queue.Dequeue() + assert.Equal(t, 1, val1) + assert.Equal(t, 2, val2) + assert.Equal(t, 3, val3) + assert.True(t, queue.IsEmpty()) +} + +func TestEnqueueAndDequeueInterleaved(t *testing.T) { + queue := NewRingQueue[int](Range[int]{3, 3}) + queue.Enqueue(1) + val1 := queue.Dequeue() + queue.Enqueue(2) + val2 := queue.Dequeue() + queue.Enqueue(3, 4) + val3 := queue.Dequeue() + assert.Equal(t, 1, val1) + assert.Equal(t, 2, val2) + assert.Equal(t, 3, val3) + assert.False(t, queue.IsEmpty()) +} + +func TestEnqueueWithResize(t *testing.T) { + queue := NewRingQueue[int](Range[int]{2, 4}) + queue.Enqueue(1, 2) + assert.Equal(t, 2, len(queue.buffer)) + queue.Enqueue(3) + assert.Equal(t, 4, len(queue.buffer)) + assert.Equal(t, 3, queue.ReversePeek()) +} diff --git a/internal/newtelemetry/internal/syncmap.go b/internal/newtelemetry/internal/syncmap.go new file mode 100644 index 0000000000..24482917f3 --- /dev/null +++ b/internal/newtelemetry/internal/syncmap.go @@ -0,0 +1,52 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "sync" +) + +type SyncMap[K comparable, V any] struct { + sync.Map +} + +func (m *SyncMap[K, V]) Load(key K) (value V, ok bool) { + v, ok := m.Map.Load(key) + if !ok { + return + } + value = v.(V) + return +} + +func (m *SyncMap[K, V]) Range(f func(key K, value V) bool) { + m.Map.Range(func(key, value interface{}) bool { + return f(key.(K), value.(V)) + }) +} + +func (m *SyncMap[K, V]) Len() int { + count := 0 + m.Map.Range(func(_, _ any) bool { + count++ + return true + }) + return count +} + +func (m *SyncMap[K, V]) LoadOrStore(key K, value V) (actual V, loaded bool) { + v, loaded := m.Map.LoadOrStore(key, value) + return v.(V), loaded +} + +func (m *SyncMap[K, V]) LoadAndDelete(key K) (value V, loaded bool) { + v, loaded := m.Map.LoadAndDelete(key) + if !loaded { + var zero V + return zero, loaded + } + return v.(V), true +} diff --git a/internal/newtelemetry/internal/syncpool.go b/internal/newtelemetry/internal/syncpool.go new file mode 100644 index 0000000000..29ab2f9b1e --- /dev/null +++ b/internal/newtelemetry/internal/syncpool.go @@ -0,0 +1,34 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "sync" +) + +// SyncPool is a wrapper around [sync.Pool] that provides type safety. +type SyncPool[T any] struct { + pool *sync.Pool +} + +// NewSyncPool creates a new Pool with the given new function. +func NewSyncPool[T any](newT func() T) *SyncPool[T] { + return &SyncPool[T]{ + pool: &sync.Pool{ + New: func() any { + return newT() + }, + }, + } +} + +func (sp *SyncPool[T]) Get() T { + return sp.pool.Get().(T) +} + +func (sp *SyncPool[T]) Put(v T) { + sp.pool.Put(v) +} diff --git a/internal/newtelemetry/internal/ticker.go b/internal/newtelemetry/internal/ticker.go new file mode 100644 index 0000000000..4f061e623f --- /dev/null +++ b/internal/newtelemetry/internal/ticker.go @@ -0,0 +1,92 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "sync" + "time" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" +) + +type TickFunc func() + +type Ticker struct { + ticker *time.Ticker + + tickSpeedMu sync.Mutex + tickSpeed time.Duration + + interval Range[time.Duration] + + tickFunc TickFunc + + stopChan chan struct{} + stopped bool +} + +func NewTicker(tickFunc TickFunc, interval Range[time.Duration]) *Ticker { + ticker := &Ticker{ + ticker: time.NewTicker(interval.Max), + tickSpeed: interval.Max, + interval: interval, + tickFunc: tickFunc, + stopChan: make(chan struct{}), + } + + go func() { + for { + select { + case <-ticker.ticker.C: + tickFunc() + case <-ticker.stopChan: + break + } + } + }() + + return ticker +} + +func (t *Ticker) CanIncreaseSpeed() { + t.tickSpeedMu.Lock() + defer t.tickSpeedMu.Unlock() + + oldTickSpeed := t.tickSpeed + t.tickSpeed = t.interval.Clamp(t.tickSpeed / 2) + + if oldTickSpeed == t.tickSpeed { + return + } + + log.Debug("telemetry: increasing flush speed to an interval of %s", t.tickSpeed) + t.ticker.Reset(t.tickSpeed) +} + +func (t *Ticker) CanDecreaseSpeed() { + t.tickSpeedMu.Lock() + defer t.tickSpeedMu.Unlock() + + oldTickSpeed := t.tickSpeed + t.tickSpeed = t.interval.Clamp(t.tickSpeed * 2) + + if oldTickSpeed == t.tickSpeed { + return + } + + log.Debug("telemetry: decreasing flush speed to an interval of %s", t.tickSpeed) + t.ticker.Reset(t.tickSpeed) +} + +func (t *Ticker) Stop() { + if t.stopped { + return + } + t.ticker.Stop() + t.stopChan <- struct{}{} + close(t.stopChan) + t.stopped = true +} diff --git a/internal/newtelemetry/internal/tracerconfig.go b/internal/newtelemetry/internal/tracerconfig.go new file mode 100644 index 0000000000..439ff430f3 --- /dev/null +++ b/internal/newtelemetry/internal/tracerconfig.go @@ -0,0 +1,16 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +// TracerConfig is the configuration for the tracer for the telemetry client. +type TracerConfig struct { + // Service is the name of the service being traced. + Service string + // Env is the environment the service is running in. + Env string + // Version is the version of the service. + Version string +} diff --git a/internal/newtelemetry/internal/transport/app_closing.go b/internal/newtelemetry/internal/transport/app_closing.go new file mode 100644 index 0000000000..11516d34e5 --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_closing.go @@ -0,0 +1,12 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type AppClosing struct{} + +func (AppClosing) RequestType() RequestType { + return RequestTypeAppClosing +} diff --git a/internal/newtelemetry/internal/transport/app_configuration_change.go b/internal/newtelemetry/internal/transport/app_configuration_change.go new file mode 100644 index 0000000000..d6ef75de7a --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_configuration_change.go @@ -0,0 +1,14 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type AppClientConfigurationChange struct { + Configuration []ConfKeyValue `json:"configuration"` +} + +func (AppClientConfigurationChange) RequestType() RequestType { + return RequestTypeAppClientConfigurationChange +} diff --git a/internal/newtelemetry/internal/transport/app_dependencies_loaded.go b/internal/newtelemetry/internal/transport/app_dependencies_loaded.go new file mode 100644 index 0000000000..425b01bb9a --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_dependencies_loaded.go @@ -0,0 +1,21 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type AppDependenciesLoaded struct { + Dependencies []Dependency `json:"dependencies"` +} + +func (AppDependenciesLoaded) RequestType() RequestType { + return RequestTypeAppDependenciesLoaded +} + +// Dependency is a Go module on which the application depends. This information +// can be accesed at run-time through the runtime/debug.ReadBuildInfo API. +type Dependency struct { + Name string `json:"name"` + Version string `json:"version,omitempty"` +} diff --git a/internal/newtelemetry/internal/transport/app_extended_heartbeat.go b/internal/newtelemetry/internal/transport/app_extended_heartbeat.go new file mode 100644 index 0000000000..62f0e5fed1 --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_extended_heartbeat.go @@ -0,0 +1,16 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type AppExtendedHeartbeat struct { + Configuration []ConfKeyValue `json:"configuration,omitempty"` + Dependencies []Dependency `json:"dependencies,omitempty"` + Integrations []Integration `json:"integrations,omitempty"` +} + +func (AppExtendedHeartbeat) RequestType() RequestType { + return RequestTypeAppExtendedHeartBeat +} diff --git a/internal/newtelemetry/internal/transport/app_heartbeat.go b/internal/newtelemetry/internal/transport/app_heartbeat.go new file mode 100644 index 0000000000..f2d4bab392 --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_heartbeat.go @@ -0,0 +1,16 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// All objects in this file are used to define the payload of the requests sent +// to the telemetry API. +// https://github.com/DataDog/instrumentation-telemetry-api-docs/tree/dad49961203d74ec8236b68ce4b54bbb7ed8716f/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Schemas + +type AppHeartbeat struct{} + +func (AppHeartbeat) RequestType() RequestType { + return RequestTypeAppHeartbeat +} diff --git a/internal/newtelemetry/internal/transport/app_integration_change.go b/internal/newtelemetry/internal/transport/app_integration_change.go new file mode 100644 index 0000000000..20244f033c --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_integration_change.go @@ -0,0 +1,24 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type AppIntegrationChange struct { + Integrations []Integration `json:"integrations"` +} + +func (AppIntegrationChange) RequestType() RequestType { + return RequestTypeAppIntegrationsChange +} + +// Integration is an integration that is configured to be traced automatically. +type Integration struct { + Name string `json:"name"` + Enabled bool `json:"enabled"` + Version string `json:"version,omitempty"` + AutoEnabled bool `json:"auto_enabled,omitempty"` + Compatible bool `json:"compatible,omitempty"` + Error string `json:"error,omitempty"` +} diff --git a/internal/newtelemetry/internal/transport/app_product_change.go b/internal/newtelemetry/internal/transport/app_product_change.go new file mode 100644 index 0000000000..dcbf2ec674 --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_product_change.go @@ -0,0 +1,22 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type AppProductChange struct { + Products Products `json:"products"` +} + +type Products map[Namespace]Product + +func (AppProductChange) RequestType() RequestType { + return RequestTypeAppProductChange +} + +type Product struct { + Version string `json:"version,omitempty"` + Enabled bool `json:"enabled"` + Error Error `json:"error,omitempty"` +} diff --git a/internal/newtelemetry/internal/transport/app_started.go b/internal/newtelemetry/internal/transport/app_started.go new file mode 100644 index 0000000000..3753d78926 --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_started.go @@ -0,0 +1,37 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// All objects in this file are used to define the payload of the requests sent +// to the telemetry API. +// https://github.com/DataDog/instrumentation-telemetry-api-docs/tree/dad49961203d74ec8236b68ce4b54bbb7ed8716f/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Schemas + +type AppStarted struct { + Products Products `json:"products,omitempty"` + Configuration []ConfKeyValue `json:"configuration,omitempty"` + Error Error `json:"error,omitempty"` + InstallSignature InstallSignature `json:"install_signature,omitempty"` + AdditionalPayload []AdditionalPayload `json:"additional_payload,omitempty"` +} + +func (AppStarted) RequestType() RequestType { + return RequestTypeAppStarted +} + +// InstallSignature is a structure to send the install signature with the +// AppStarted payload. +type InstallSignature struct { + InstallID string `json:"install_id,omitempty"` + InstallType string `json:"install_type,omitempty"` + InstallTime string `json:"install_time,omitempty"` +} + +// AdditionalPayload is a generic structure to send additional data with the +// AppStarted payload. +type AdditionalPayload struct { + Name RequestType `json:"name"` + Value Payload `json:"value"` +} diff --git a/internal/newtelemetry/internal/transport/body.go b/internal/newtelemetry/internal/transport/body.go new file mode 100644 index 0000000000..2c4a0dd956 --- /dev/null +++ b/internal/newtelemetry/internal/transport/body.go @@ -0,0 +1,149 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package transport + +import ( + "encoding/json" + "fmt" +) + +// Application is identifying information about the app itself +type Application struct { + ServiceName string `json:"service_name"` + Env string `json:"env"` + ServiceVersion string `json:"service_version"` + TracerVersion string `json:"tracer_version"` + LanguageName string `json:"language_name"` + LanguageVersion string `json:"language_version"` +} + +// Host is identifying information about the host on which the app +// is running +type Host struct { + Hostname string `json:"hostname"` + OS string `json:"os"` + OSVersion string `json:"os_version,omitempty"` + Architecture string `json:"architecture"` + KernelName string `json:"kernel_name"` + KernelRelease string `json:"kernel_release"` + KernelVersion string `json:"kernel_version"` +} + +// Body is the common high-level structure encapsulating a telemetry request body +// Described here: https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/main/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Request%20Bodies/telemetry_body.md +type Body struct { + APIVersion string `json:"api_version"` + RequestType RequestType `json:"request_type"` + TracerTime int64 `json:"tracer_time"` + RuntimeID string `json:"runtime_id"` + SeqID int64 `json:"seq_id"` + Debug bool `json:"debug,omitempty"` + Payload Payload `json:"payload"` + Application Application `json:"application"` + Host Host `json:"host"` +} + +// UnmarshalJSON is used to test the telemetry client end to end +func (b *Body) UnmarshalJSON(bytes []byte) error { + var anyMap map[string]json.RawMessage + var err error + if err = json.Unmarshal(bytes, &anyMap); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["api_version"], &b.APIVersion); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["request_type"], &b.RequestType); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["tracer_time"], &b.TracerTime); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["runtime_id"], &b.RuntimeID); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["seq_id"], &b.SeqID); err != nil { + return err + } + + if _, ok := anyMap["debug"]; ok { + if err = json.Unmarshal(anyMap["debug"], &b.Debug); err != nil { + return err + } + } + + if err = json.Unmarshal(anyMap["application"], &b.Application); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["host"], &b.Host); err != nil { + return err + } + + if b.RequestType == RequestTypeMessageBatch { + var messageBatch []struct { + RequestType RequestType `json:"request_type"` + Payload json.RawMessage `json:"payload"` + } + + if err = json.Unmarshal(anyMap["payload"], &messageBatch); err != nil { + return err + } + + batch := make([]Message, len(messageBatch)) + for i, message := range messageBatch { + payload, err := unmarshalPayload(message.Payload, message.RequestType) + if err != nil { + return err + } + batch[i] = Message{RequestType: message.RequestType, Payload: payload} + } + b.Payload = MessageBatch(batch) + return nil + } + + b.Payload, err = unmarshalPayload(anyMap["payload"], b.RequestType) + return err +} + +func unmarshalPayload(bytes json.RawMessage, requestType RequestType) (Payload, error) { + var payload Payload + switch requestType { + case RequestTypeAppClientConfigurationChange: + payload = new(AppClientConfigurationChange) + case RequestTypeAppProductChange: + payload = new(AppProductChange) + case RequestTypeAppIntegrationsChange: + payload = new(AppIntegrationChange) + case RequestTypeAppHeartbeat: + payload = new(AppHeartbeat) + case RequestTypeAppStarted: + payload = new(AppStarted) + case RequestTypeAppClosing: + payload = new(AppClosing) + case RequestTypeAppExtendedHeartBeat: + payload = new(AppExtendedHeartbeat) + case RequestTypeAppDependenciesLoaded: + payload = new(AppDependenciesLoaded) + case RequestTypeDistributions: + payload = new(Distributions) + case RequestTypeGenerateMetrics: + payload = new(GenerateMetrics) + case RequestTypeLogs: + payload = new(Logs) + } + + if err := json.Unmarshal(bytes, payload); err != nil { + return nil, fmt.Errorf("failed to unmarshal payload: %v", err) + } + + return payload, nil +} diff --git a/internal/newtelemetry/internal/transport/conf_key_value.go b/internal/newtelemetry/internal/transport/conf_key_value.go new file mode 100644 index 0000000000..70778c7dbd --- /dev/null +++ b/internal/newtelemetry/internal/transport/conf_key_value.go @@ -0,0 +1,17 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// ConfKeyValue is a library-specific configuration value +type ConfKeyValue struct { + Name string `json:"name"` + Value any `json:"value"` // can be any type of integer, float, string, or boolean + Origin Origin `json:"origin"` + Error Error `json:"error,omitempty"` + + // SeqID is used to track the total number of configuration key value pairs applied across the tracer + SeqID uint64 `json:"seq_id,omitempty"` +} diff --git a/internal/newtelemetry/internal/transport/distributions.go b/internal/newtelemetry/internal/transport/distributions.go new file mode 100644 index 0000000000..1dda074488 --- /dev/null +++ b/internal/newtelemetry/internal/transport/distributions.go @@ -0,0 +1,36 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// All objects in this file are used to define the payload of the requests sent +// to the telemetry API. +// https://github.com/DataDog/instrumentation-telemetry-api-docs/tree/dad49961203d74ec8236b68ce4b54bbb7ed8716f/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Schemas + +type Distributions struct { + Namespace Namespace `json:"namespace"` + Series []DistributionSeries `json:"series"` + SkipAllowlist bool `json:"skip_allowlist,omitempty"` +} + +func (Distributions) RequestType() RequestType { + return RequestTypeDistributions +} + +// DistributionSeries is a sequence of observations for a distribution metric. +// Unlike `MetricData`, DistributionSeries does not store timestamps in `Points` +type DistributionSeries struct { + Metric string `json:"metric"` + Points []float64 `json:"points"` + Tags []string `json:"tags,omitempty"` + // Common distinguishes metrics which are cross-language vs. + // language-specific. + // + // NOTE: If this field isn't present in the request, the API assumes + // the metric is common. So we can't "omitempty" even though the + // field is technically optional. + Common bool `json:"common,omitempty"` + Namespace Namespace `json:"namespace,omitempty"` +} diff --git a/internal/newtelemetry/internal/transport/error.go b/internal/newtelemetry/internal/transport/error.go new file mode 100644 index 0000000000..b0de67be42 --- /dev/null +++ b/internal/newtelemetry/internal/transport/error.go @@ -0,0 +1,12 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// Error stores error information about various tracer events +type Error struct { + Code int `json:"code"` + Message string `json:"message"` +} diff --git a/internal/newtelemetry/internal/transport/generate-metrics.go b/internal/newtelemetry/internal/transport/generate-metrics.go new file mode 100644 index 0000000000..220f93a920 --- /dev/null +++ b/internal/newtelemetry/internal/transport/generate-metrics.go @@ -0,0 +1,53 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// All objects in this file are used to define the payload of the requests sent +// to the telemetry API. +// https://github.com/DataDog/instrumentation-telemetry-api-docs/tree/dad49961203d74ec8236b68ce4b54bbb7ed8716f/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Schemas + +type GenerateMetrics struct { + Namespace Namespace `json:"namespace,omitempty"` + Series []MetricData `json:"series"` + SkipAllowlist bool `json:"skip_allowlist,omitempty"` +} + +func (GenerateMetrics) RequestType() RequestType { + return RequestTypeGenerateMetrics +} + +// MetricType is the type of metric being sent, either count, gauge, or rate +// distribution is another payload altogether +type MetricType string + +const ( + RateMetric MetricType = "rate" + CountMetric MetricType = "count" + GaugeMetric MetricType = "gauge" + DistMetric MetricType = "distribution" +) + +// MetricData is a sequence of observations for a single named metric. +type MetricData struct { + Metric string `json:"metric"` + // Points stores pairs of timestamps and values + // This first value should be an int64 timestamp and the second should be a float64 value + Points [][2]any `json:"points"` + // Interval is required only for gauge and rate metrics + Interval int64 `json:"interval,omitempty"` + // Type cannot be of type distribution because there is a different payload for it + Type MetricType `json:"type,omitempty"` + Tags []string `json:"tags,omitempty"` + + // Common distinguishes metrics which are cross-language vs. + // language-specific. + // + // NOTE: If this field isn't present in the request, the API assumes + // the metric is common. So we can't "omitempty" even though the + // field is technically optional. + Common bool `json:"common"` + Namespace Namespace `json:"namespace,omitempty"` +} diff --git a/internal/newtelemetry/internal/transport/logs.go b/internal/newtelemetry/internal/transport/logs.go new file mode 100644 index 0000000000..c0cebe2849 --- /dev/null +++ b/internal/newtelemetry/internal/transport/logs.go @@ -0,0 +1,31 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type Logs struct { + Logs []LogMessage `json:"logs,omitempty"` +} + +func (Logs) RequestType() RequestType { + return RequestTypeLogs +} + +type LogLevel string + +const ( + LogLevelDebug LogLevel = "DEBUG" + LogLevelWarn LogLevel = "WARN" + LogLevelError LogLevel = "ERROR" +) + +type LogMessage struct { + Message string `json:"message"` + Level LogLevel `json:"level"` + Count uint32 `json:"count,omitempty"` + Tags string `json:"tags,omitempty"` // comma separated list of tags, e.g. "tag1:1,tag2:toto" + StackTrace string `json:"stack_trace,omitempty"` + TracerTime int64 `json:"tracer_time,omitempty"` // Unix timestamp in seconds +} diff --git a/internal/newtelemetry/internal/transport/message_batch.go b/internal/newtelemetry/internal/transport/message_batch.go new file mode 100644 index 0000000000..d1f9f964fe --- /dev/null +++ b/internal/newtelemetry/internal/transport/message_batch.go @@ -0,0 +1,17 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type MessageBatch []Message + +func (MessageBatch) RequestType() RequestType { + return RequestTypeMessageBatch +} + +type Message struct { + RequestType RequestType `json:"request_type"` + Payload Payload `json:"payload"` +} diff --git a/internal/newtelemetry/internal/transport/namespace.go b/internal/newtelemetry/internal/transport/namespace.go new file mode 100644 index 0000000000..02d94aca19 --- /dev/null +++ b/internal/newtelemetry/internal/transport/namespace.go @@ -0,0 +1,20 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type Namespace string + +const ( + NamespaceGeneral Namespace = "general" + NamespaceTracers Namespace = "tracers" + NamespaceProfilers Namespace = "profilers" + NamespaceAppSec Namespace = "appsec" + NamespaceIAST Namespace = "iast" + NamespaceTelemetry Namespace = "telemetry" + NamespaceCIVisibility Namespace = "civisibility" + NamespaceMLOps Namespace = "mlops" + NamespaceRUM Namespace = "rum" +) diff --git a/internal/newtelemetry/internal/transport/origin.go b/internal/newtelemetry/internal/transport/origin.go new file mode 100644 index 0000000000..01c30f541e --- /dev/null +++ b/internal/newtelemetry/internal/transport/origin.go @@ -0,0 +1,17 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// Origin describes the source of a configuration change +type Origin string + +const ( + OriginDefault Origin = "default" + OriginCode Origin = "code" + OriginDDConfig Origin = "dd_config" + OriginEnvVar Origin = "env_var" + OriginRemoteConfig Origin = "remote_config" +) diff --git a/internal/newtelemetry/internal/transport/payload.go b/internal/newtelemetry/internal/transport/payload.go new file mode 100644 index 0000000000..1a4c0a1dad --- /dev/null +++ b/internal/newtelemetry/internal/transport/payload.go @@ -0,0 +1,13 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// Payload is the interface implemented by all telemetry top-level structures, AKA payloads. +// All structs are a strict representation of what is described in Instrumentation Telemetry v2 documentation schemas: +// https://github.com/DataDog/instrumentation-telemetry-api-docs/tree/dad49961203d74ec8236b68ce4b54bbb7ed8716f/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Schemas +type Payload interface { + RequestType() RequestType +} diff --git a/internal/newtelemetry/internal/transport/requesttype.go b/internal/newtelemetry/internal/transport/requesttype.go new file mode 100644 index 0000000000..c1f1bd4058 --- /dev/null +++ b/internal/newtelemetry/internal/transport/requesttype.go @@ -0,0 +1,55 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// RequestType determines how the Payload of a request should be handled +type RequestType string + +const ( + // RequestTypeAppStarted is the first message sent by the telemetry + // client, containing the configuration loaded at startup + RequestTypeAppStarted RequestType = "app-started" + + // RequestTypeAppHeartbeat is sent periodically by the client to indicate + // that the app is still running + RequestTypeAppHeartbeat RequestType = "app-heartbeat" + + // RequestTypeGenerateMetrics contains count, gauge, or rate metrics accumulated by the + // client, and is sent periodically along with the heartbeat + RequestTypeGenerateMetrics RequestType = "generate-metrics" + + // RequestTypeDistributions is to send distribution type metrics accumulated by the + // client, and is sent periodically along with the heartbeat + RequestTypeDistributions RequestType = "distributions" + + // RequestTypeAppClosing is sent when the telemetry client is stopped + RequestTypeAppClosing RequestType = "app-closing" + + // RequestTypeAppDependenciesLoaded is sent if DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED + // is enabled. Sent when Start is called for the telemetry client. + RequestTypeAppDependenciesLoaded RequestType = "app-dependencies-loaded" + + // RequestTypeAppClientConfigurationChange is sent if there are changes + // to the client library configuration + RequestTypeAppClientConfigurationChange RequestType = "app-client-configuration-change" + + // RequestTypeAppProductChange is sent when products are enabled/disabled + RequestTypeAppProductChange RequestType = "app-product-change" + + // RequestTypeAppIntegrationsChange is sent when the telemetry client starts + // with info on which integrations are used. + RequestTypeAppIntegrationsChange RequestType = "app-integrations-change" + + // RequestTypeMessageBatch is a wrapper over a list of payloads + RequestTypeMessageBatch RequestType = "message-batch" + + // RequestTypeAppExtendedHeartBeat This event will be used as a failsafe if there are any catastrophic data failure. + // The data will be used to reconstruct application records in our db. + RequestTypeAppExtendedHeartBeat RequestType = "app-extended-heartbeat" + + // RequestTypeLogs is used to send logs to the backend + RequestTypeLogs RequestType = "logs" +) diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go new file mode 100644 index 0000000000..b26c476748 --- /dev/null +++ b/internal/newtelemetry/internal/writer.go @@ -0,0 +1,315 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net" + "net/http" + "os" + "runtime" + "sync" + "time" + + "gopkg.in/DataDog/dd-trace-go.v1/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" + "gopkg.in/DataDog/dd-trace-go.v1/internal/hostname" + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" + "gopkg.in/DataDog/dd-trace-go.v1/internal/osinfo" + "gopkg.in/DataDog/dd-trace-go.v1/internal/version" +) + +// We copy the transport to avoid using the default one, as it might be +// augmented with tracing and we don't want these calls to be recorded. +// See https://golang.org/pkg/net/http/#DefaultTransport . +// +//orchestrion:ignore +var defaultHTTPClient = &http.Client{ + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + ForceAttemptHTTP2: true, + MaxIdleConns: 100, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + }, + Timeout: 5 * time.Second, +} + +func newBody(config TracerConfig, debugMode bool) *transport.Body { + osHostname, err := os.Hostname() + if err != nil { + osHostname = hostname.Get() + } + + if osHostname == "" { + osHostname = "unknown" // hostname field is not allowed to be empty + } + + return &transport.Body{ + APIVersion: "v2", + RuntimeID: globalconfig.RuntimeID(), + Debug: debugMode, + Application: transport.Application{ + ServiceName: config.Service, + Env: config.Env, + ServiceVersion: config.Version, + TracerVersion: version.Tag, + LanguageName: "go", + LanguageVersion: runtime.Version(), + }, + Host: transport.Host{ + Hostname: osHostname, + OS: osinfo.OSName(), + OSVersion: osinfo.OSVersion(), + Architecture: osinfo.Architecture(), + KernelName: osinfo.KernelName(), + KernelRelease: osinfo.KernelRelease(), + KernelVersion: osinfo.KernelVersion(), + }, + } +} + +// Writer is an interface that allows to send telemetry data to any endpoint that implements the instrumentation telemetry v2 API. +// The telemetry data is sent as a JSON payload as described in the API documentation. +type Writer interface { + // Flush does a synchronous call to the telemetry endpoint with the given payload. Thread-safe. + // It returns a non-empty [EndpointRequestResult] slice and a nil error if the payload was sent successfully. + // Otherwise, the error is a call to [errors.Join] on all errors that occurred. + Flush(transport.Payload) ([]EndpointRequestResult, error) +} + +// EndpointRequestResult is returned by the Flush method of the Writer interface. +type EndpointRequestResult struct { + // Error is the error that occurred when sending the payload to the endpoint. This is nil if the payload was sent successfully. + Error error + // PayloadByteSize is the number of bytes that were sent to the endpoint, zero if the payload was not sent. + PayloadByteSize int + // CallDuration is the duration of the call to the endpoint if the call was successful + CallDuration time.Duration + // StatusCode is the status code of the response from the endpoint even if the call failed but only with an actual HTTP error + StatusCode int +} + +type writer struct { + mu sync.Mutex + body *transport.Body + httpClient *http.Client + endpoints []*http.Request +} + +type WriterConfig struct { + // TracerConfig is the configuration the tracer sent when the telemetry client was created (required) + TracerConfig + // Endpoints is a list of requests that will be used alongside the body of the telemetry data to create the requests to the telemetry endpoint (required to not be empty) + // The writer will try each endpoint in order until it gets a 2XX HTTP response from the server + Endpoints []*http.Request + // HTTPClient is the http client that will be used to send the telemetry data (defaults to a copy of [http.DefaultClient]) + HTTPClient *http.Client + // Debug is a flag that indicates whether the telemetry client is in debug mode (defaults to false) + Debug bool +} + +func NewWriter(config WriterConfig) (Writer, error) { + if len(config.Endpoints) == 0 { + return nil, fmt.Errorf("telemetry/writer: no endpoints provided") + } + + if config.HTTPClient == nil { + config.HTTPClient = defaultHTTPClient + } + + // Don't allow the client to have a timeout higher than 5 seconds + // This is to avoid blocking the client for too long in case of network issues + if config.HTTPClient.Timeout > 5*time.Second { + copyClient := *config.HTTPClient + config.HTTPClient = ©Client + config.HTTPClient.Timeout = 5 * time.Second + log.Debug("telemetry/writer: client timeout was higher than 5 seconds, clamping it to 5 seconds") + } + + body := newBody(config.TracerConfig, config.Debug) + endpoints := make([]*http.Request, len(config.Endpoints)) + for i, endpoint := range config.Endpoints { + endpoints[i] = preBakeRequest(body, endpoint) + } + + return &writer{ + body: body, + httpClient: config.HTTPClient, + endpoints: endpoints, + }, nil +} + +// preBakeRequest adds all the *static* headers that we already know at the time of the creation of the writer. +// This is useful to avoid querying too many things at the time of the request. +// Headers necessary are described here: +// https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/cf17b41a30fbf31d54e2cfbfc983875d58b02fe1/GeneratedDocumentation/ApiDocs/v2/overview.md#required-http-headers +func preBakeRequest(body *transport.Body, endpoint *http.Request) *http.Request { + clonedEndpoint := endpoint.Clone(context.Background()) + if clonedEndpoint.Header == nil { + clonedEndpoint.Header = make(http.Header, 11) + } + + for key, val := range map[string]string{ + "Content-Type": "application/json", + "DD-Telemetry-API-Version": body.APIVersion, + "DD-Client-Library-Language": body.Application.LanguageName, + "DD-Client-Library-Version": body.Application.TracerVersion, + "DD-Agent-Env": body.Application.Env, + "DD-Agent-Hostname": body.Host.Hostname, + "DD-Agent-Install-Id": globalconfig.InstrumentationInstallID(), + "DD-Agent-Install-Type": globalconfig.InstrumentationInstallType(), + "DD-Agent-Install-Time": globalconfig.InstrumentationInstallTime(), + "Datadog-Container-ID": internal.ContainerID(), + "Datadog-Entity-ID": internal.EntityID(), + // TODO: add support for Cloud provider/resource-type/resource-id headers in another PR and package + // Described here: https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/cf17b41a30fbf31d54e2cfbfc983875d58b02fe1/GeneratedDocumentation/ApiDocs/v2/overview.md#setting-the-serverless-telemetry-headers + } { + if val == "" { + continue + } + clonedEndpoint.Header.Add(key, val) + } + + if body.Debug { + clonedEndpoint.Header.Add("DD-Telemetry-Debug-Enabled", "true") + } + + return clonedEndpoint +} + +// newRequest creates a new http.Request with the given payload and the necessary headers. +func (w *writer) newRequest(endpoint *http.Request, payload transport.Payload) *http.Request { + w.body.SeqID++ + w.body.TracerTime = time.Now().Unix() + w.body.RequestType = payload.RequestType() + w.body.Payload = payload + + request := endpoint.Clone(context.Background()) + request.Header.Set("DD-Telemetry-Request-Type", string(payload.RequestType())) + + pipeReader, pipeWriter := io.Pipe() + request.Body = pipeReader + go func() { + // No need to wait on this because the http client will close the pipeReader which will close the pipeWriter and finish the goroutine + pipeWriter.CloseWithError(json.NewEncoder(pipeWriter).Encode(w.body)) + }() + + return request +} + +// SumReaderCloser is a ReadCloser that wraps another ReadCloser and counts the number of bytes read. +type SumReaderCloser struct { + io.ReadCloser + n int +} + +func (s *SumReaderCloser) Read(p []byte) (n int, err error) { + n, err = s.ReadCloser.Read(p) + s.n += n + return +} + +// WriterStatusCodeError is an error that is returned when the writer receives an unexpected status code from the server. +type WriterStatusCodeError struct { + Status string + Body string +} + +func (w *WriterStatusCodeError) Error() string { + return fmt.Sprintf("unexpected status code: %q (received body: %q)", w.Status, w.Body) +} + +func (w *writer) Flush(payload transport.Payload) ([]EndpointRequestResult, error) { + w.mu.Lock() + defer w.mu.Unlock() + + var results []EndpointRequestResult + for _, endpoint := range w.endpoints { + var ( + request = w.newRequest(endpoint, payload) + sumReaderCloser = &SumReaderCloser{ReadCloser: request.Body} + now = time.Now() + ) + + request.Body = sumReaderCloser + response, err := w.httpClient.Do(request) + if err != nil { + results = append(results, EndpointRequestResult{Error: err}) + continue + } + + // We only have a few endpoints, so we can afford to keep the response body stream open until we are done with it + defer response.Body.Close() + + if response.StatusCode >= 300 || response.StatusCode < 200 { + respBodyBytes, _ := io.ReadAll(io.LimitReader(response.Body, 256)) // maybe we can find an error reason in the response body + results = append(results, EndpointRequestResult{Error: &WriterStatusCodeError{ + Status: response.Status, + Body: string(respBodyBytes), + }, StatusCode: response.StatusCode}) + continue + } + + results = append(results, EndpointRequestResult{ + PayloadByteSize: sumReaderCloser.n, + CallDuration: time.Since(now), + StatusCode: response.StatusCode, + }) + + // We succeeded, no need to try the other endpoints + break + } + + var err error + if results[len(results)-1].Error != nil { + var errs []error + for _, result := range results { + errs = append(errs, result.Error) + } + err = errors.Join(errs...) + } + + return results, err +} + +// RecordWriter is a Writer that stores the payloads in memory. Used for testing purposes +type RecordWriter struct { + mu sync.Mutex + payloads []transport.Payload +} + +func (w *RecordWriter) Flush(payload transport.Payload) ([]EndpointRequestResult, error) { + w.mu.Lock() + defer w.mu.Unlock() + w.payloads = append(w.payloads, payload) + return []EndpointRequestResult{ + { + PayloadByteSize: 1, + CallDuration: time.Nanosecond, + }, + }, nil +} + +func (w *RecordWriter) Payloads() []transport.Payload { + w.mu.Lock() + defer w.mu.Unlock() + copyPayloads := make([]transport.Payload, len(w.payloads)) + copy(copyPayloads, w.payloads) + return copyPayloads +} + +var _ Writer = (*RecordWriter)(nil) diff --git a/internal/newtelemetry/internal/writer_test.go b/internal/newtelemetry/internal/writer_test.go new file mode 100644 index 0000000000..bacf6accf7 --- /dev/null +++ b/internal/newtelemetry/internal/writer_test.go @@ -0,0 +1,213 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "net/http" + "net/http/httptest" + "net/url" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +func TestNewWriter_ValidConfig(t *testing.T) { + config := WriterConfig{ + TracerConfig: TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + }, + Endpoints: []*http.Request{ + {Method: http.MethodPost, URL: &url.URL{Scheme: "http", Host: "localhost", Path: "/telemetry"}}, + }, + } + + writer, err := NewWriter(config) + assert.NoError(t, err) + assert.NotNil(t, writer) +} + +func TestNewWriter_NoEndpoints(t *testing.T) { + config := WriterConfig{ + TracerConfig: TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + }, + Endpoints: []*http.Request{}, + } + + writer, err := NewWriter(config) + assert.Error(t, err) + assert.Nil(t, writer) +} + +type testPayload struct { + RequestTypeValue transport.RequestType `json:"request_type"` + marshalJSON func() ([]byte, error) +} + +func (p *testPayload) MarshalJSON() ([]byte, error) { + return p.marshalJSON() +} + +func (p *testPayload) RequestType() transport.RequestType { + return p.RequestTypeValue +} + +func TestWriter_Flush_Success(t *testing.T) { + config := WriterConfig{ + TracerConfig: TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + }, + } + + var ( + marshalJSONCalled bool + payloadReceived bool + ) + + payload := testPayload{ + RequestTypeValue: "test", + marshalJSON: func() ([]byte, error) { + marshalJSONCalled = true + return []byte(`{"request_type":"test"}`), nil + }, + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + payloadReceived = true + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + req, err := http.NewRequest(http.MethodPost, server.URL, nil) + require.NoError(t, err) + + config.Endpoints = append(config.Endpoints, req) + writer, _ := NewWriter(config) + + bytesSent, err := writer.Flush(&payload) + require.NoError(t, err) + + assert.NotZero(t, bytesSent) + assert.True(t, marshalJSONCalled) + assert.True(t, payloadReceived) +} + +func TestWriter_Flush_Failure(t *testing.T) { + config := WriterConfig{ + TracerConfig: TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + }, + } + + var ( + marshalJSONCalled bool + payloadReceived bool + ) + + payload := testPayload{ + RequestTypeValue: "test", + marshalJSON: func() ([]byte, error) { + marshalJSONCalled = true + return []byte(`{"request_type":"test"}`), nil + }, + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + payloadReceived = true + w.WriteHeader(http.StatusBadRequest) + })) + defer server.Close() + + req, err := http.NewRequest(http.MethodPost, server.URL, nil) + require.NoError(t, err) + + config.Endpoints = append(config.Endpoints, req) + writer, _ := NewWriter(config) + + results, err := writer.Flush(&payload) + require.Error(t, err) + assert.Len(t, results, 1) + assert.Equal(t, http.StatusBadRequest, results[0].StatusCode) + assert.ErrorContains(t, err, `400 Bad Request`) + assert.True(t, marshalJSONCalled) + assert.True(t, payloadReceived) +} + +func TestWriter_Flush_MultipleEndpoints(t *testing.T) { + config := WriterConfig{ + TracerConfig: TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + }, + } + + var ( + marshalJSONCalled int + payloadReceived1 bool + payloadReceived2 bool + ) + + payload := testPayload{ + RequestTypeValue: "test", + marshalJSON: func() ([]byte, error) { + marshalJSONCalled++ + return []byte(`{"request_type":"test"}`), nil + }, + } + + server1 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + payloadReceived1 = true + w.WriteHeader(http.StatusInternalServerError) + })) + defer server1.Close() + + server2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + assert.True(t, payloadReceived1) + payloadReceived2 = true + w.WriteHeader(http.StatusOK) + })) + defer server2.Close() + + req1, err := http.NewRequest(http.MethodPost, server1.URL, nil) + require.NoError(t, err) + + config.Endpoints = append(config.Endpoints, req1) + req2, err := http.NewRequest(http.MethodPost, server2.URL, nil) + require.NoError(t, err) + + config.Endpoints = append(config.Endpoints, req2) + writer, _ := NewWriter(config) + + results, err := writer.Flush(&payload) + assert.NoError(t, err) + + assert.Len(t, results, 2) + assert.Equal(t, http.StatusInternalServerError, results[0].StatusCode) + assert.ErrorContains(t, results[0].Error, `500 Internal Server Error`) + assert.Equal(t, time.Duration(0), results[0].CallDuration) + assert.Zero(t, results[0].PayloadByteSize) + + assert.Equal(t, http.StatusOK, results[1].StatusCode) + assert.InDelta(t, time.Duration(1), results[1].CallDuration, float64(time.Second)) + assert.NotZero(t, results[1].PayloadByteSize) + assert.NoError(t, results[1].Error) + + assert.Equal(t, 2, marshalJSONCalled) + assert.True(t, payloadReceived2) +} diff --git a/internal/newtelemetry/log/log.go b/internal/newtelemetry/log/log.go new file mode 100644 index 0000000000..eabe43e9e1 --- /dev/null +++ b/internal/newtelemetry/log/log.go @@ -0,0 +1,49 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package log + +import ( + "fmt" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry" +) + +func divideArgs(args []any) ([]newtelemetry.LogOption, []any) { + if len(args) == 0 { + return nil, nil + } + + var options []newtelemetry.LogOption + var fmtArgs []any + for _, arg := range args { + if opt, ok := arg.(newtelemetry.LogOption); ok { + options = append(options, opt) + } else { + fmtArgs = append(fmtArgs, arg) + } + } + return options, fmtArgs +} + +// Debug sends a telemetry payload with a debug log message to the backend. +func Debug(format string, args ...any) { + log(newtelemetry.LogDebug, format, args) +} + +// Warn sends a telemetry payload with a warning log message to the backend. +func Warn(format string, args ...any) { + log(newtelemetry.LogWarn, format, args) +} + +// Error sends a telemetry payload with an error log message to the backend. +func Error(format string, args ...any) { + log(newtelemetry.LogError, format, args) +} + +func log(lvl newtelemetry.LogLevel, format string, args []any) { + opts, fmtArgs := divideArgs(args) + newtelemetry.Log(lvl, fmt.Sprintf(format, fmtArgs...), opts...) +} diff --git a/internal/newtelemetry/logger.go b/internal/newtelemetry/logger.go new file mode 100644 index 0000000000..b9a424d7d4 --- /dev/null +++ b/internal/newtelemetry/logger.go @@ -0,0 +1,102 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "runtime" + "strings" + "sync/atomic" + "time" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +// WithTags returns a LogOption that sets the tags for the telemetry log message. Tags are key-value pairs that are then +// serialized into a simple "key:value,key2:value2" format. No quoting or escaping is performed. +func WithTags(tags []string) LogOption { + compiledTags := strings.Join(tags, ",") + return func(key *loggerKey, _ *loggerValue) { + if key == nil { + return + } + key.tags = compiledTags + } +} + +// WithStacktrace returns a LogOption that sets the stacktrace for the telemetry log message. The stacktrace is a string +// that is generated inside the WithStacktrace function. Logs demultiplication does not take the stacktrace into account. +// This means that a log that has been demultiplicated will only show of the first log. +func WithStacktrace() LogOption { + buf := make([]byte, 4_096) + buf = buf[:runtime.Stack(buf, false)] + return func(_ *loggerKey, value *loggerValue) { + if value == nil { + return + } + value.stacktrace = string(buf) + } +} + +type loggerKey struct { + tags string + message string + level LogLevel +} + +type loggerValue struct { + count atomic.Uint32 + stacktrace string + time int64 // Unix timestamp +} + +type logger struct { + store internal.SyncMap[loggerKey, *loggerValue] +} + +func (logger *logger) Add(level LogLevel, text string, opts ...LogOption) { + key := loggerKey{ + message: text, + level: level, + } + + for _, opt := range opts { + opt(&key, nil) + } + + value, loaded := logger.store.LoadOrStore(key, &loggerValue{}) + if !loaded { + // If we were the first to store the value, we need to set the time and apply the options + value.time = time.Now().Unix() + for _, opt := range opts { + opt(nil, value) + } + } + + value.count.Add(1) +} + +func (logger *logger) Payload() transport.Payload { + var logs []transport.LogMessage + logger.store.Range(func(key loggerKey, value *loggerValue) bool { + logger.store.Delete(key) + logs = append(logs, transport.LogMessage{ + Message: key.message, + Level: key.level, + Tags: key.tags, + Count: value.count.Load(), + StackTrace: value.stacktrace, + TracerTime: value.time, + }) + return true + }) + + if len(logs) == 0 { + return nil + } + + return transport.Logs{Logs: logs} +} diff --git a/internal/newtelemetry/metrichandle.go b/internal/newtelemetry/metrichandle.go new file mode 100644 index 0000000000..a191c953f2 --- /dev/null +++ b/internal/newtelemetry/metrichandle.go @@ -0,0 +1,70 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "math" + "sync" + "sync/atomic" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" +) + +// noopMetricHandle is a no-op implementation of a metric handle. +type noopMetricHandle struct{} + +func (noopMetricHandle) Submit(_ float64) {} + +func (noopMetricHandle) Get() float64 { + return math.NaN() +} + +var metricLogLossOnce sync.Once + +// swappableMetricHandle is a MetricHandle that holds a pointer to another MetricHandle and a recorder to replay actions done before the actual MetricHandle is set. +type swappableMetricHandle struct { + ptr atomic.Pointer[MetricHandle] + recorder internal.Recorder[MetricHandle] + maker func(client Client) MetricHandle +} + +func (t *swappableMetricHandle) Submit(value float64) { + if Disabled() { + return + } + + inner := t.ptr.Load() + if inner == nil || *inner == nil { + if !t.recorder.Record(func(handle MetricHandle) { + handle.Submit(value) + }) { + metricLogLossOnce.Do(func() { + log.Debug("telemetry: metric is losing values because the telemetry client has not been started yet, dropping telemetry data, please start the telemetry client earlier to avoid data loss") + }) + } + return + } + + (*inner).Submit(value) +} + +func (t *swappableMetricHandle) Get() float64 { + inner := t.ptr.Load() + if inner == nil || *inner == nil { + return 0 + } + + return (*inner).Get() +} + +func (t *swappableMetricHandle) swap(handle MetricHandle) { + if t.ptr.Swap(&handle) == nil { + t.recorder.Replay(handle) + } +} + +var _ MetricHandle = (*swappableMetricHandle)(nil) diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go new file mode 100644 index 0000000000..0ab4c28cc0 --- /dev/null +++ b/internal/newtelemetry/metrics.go @@ -0,0 +1,262 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "fmt" + "math" + "sort" + "strings" + "sync/atomic" + "time" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/knownmetrics" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +// metricKey is used as a key in the metrics store hash map. +type metricKey struct { + namespace Namespace + kind transport.MetricType + name string + tags string +} + +func (k metricKey) SplitTags() []string { + if k.tags == "" { + return nil + } + return strings.Split(k.tags, ",") +} + +func validateMetricKey(namespace Namespace, kind transport.MetricType, name string, tags []string) error { + if len(name) == 0 { + return fmt.Errorf("metric name with tags %v should be empty", tags) + } + + if !knownmetrics.IsKnownMetric(namespace, kind, name) { + return fmt.Errorf("metric name %q of kind %q in namespace %q is not a known metric, please update the list of metrics name or check that you wrote the name correctly. "+ + "The metric will still be sent", name, string(kind), namespace) + } + + for _, tag := range tags { + if len(tag) == 0 { + return fmt.Errorf("metric %q has should not have empty tags", name) + } + + if strings.Contains(tag, ",") { + return fmt.Errorf("metric %q tag %q should not contain commas", name, tag) + } + } + + return nil +} + +// newMetricKey returns a new metricKey with the given parameters with the tags sorted and joined by commas. +func newMetricKey(namespace Namespace, kind transport.MetricType, name string, tags []string) metricKey { + sort.Strings(tags) + return metricKey{namespace: namespace, kind: kind, name: name, tags: strings.Join(tags, ",")} +} + +// metricsHandle is the internal equivalent of MetricHandle for Count/Rate/Gauge metrics that are sent via the payload [transport.GenerateMetrics]. +type metricHandle interface { + MetricHandle + Payload() transport.MetricData +} + +type metrics struct { + store internal.SyncMap[metricKey, metricHandle] + pool *internal.SyncPool[*metricPoint] + skipAllowlist bool // Debugging feature to skip the allowlist of known metrics +} + +// LoadOrStore returns a MetricHandle for the given metric key. If the metric key does not exist, it will be created. +func (m *metrics) LoadOrStore(namespace Namespace, kind transport.MetricType, name string, tags []string) MetricHandle { + + var ( + key = newMetricKey(namespace, kind, name, tags) + handle MetricHandle + loaded bool + ) + switch kind { + case transport.CountMetric: + handle, loaded = m.store.LoadOrStore(key, &count{metric: metric{key: key, pool: m.pool}}) + case transport.GaugeMetric: + handle, loaded = m.store.LoadOrStore(key, &gauge{metric: metric{key: key, pool: m.pool}}) + case transport.RateMetric: + handle, loaded = m.store.LoadOrStore(key, &rate{count: count{metric: metric{key: key, pool: m.pool}}}) + if !loaded { + // Initialize the interval start for rate metrics + r := handle.(*rate) + now := time.Now() + r.intervalStart.Store(&now) + } + default: + log.Warn("telemetry: unknown metric type %q", kind) + return nil + } + + if !loaded { // The metric is new: validate and log issues about it + if err := validateMetricKey(namespace, kind, name, tags); err != nil { + log.Warn("telemetry: %v", err) + } + } + + return handle +} + +func (m *metrics) Payload() transport.Payload { + series := make([]transport.MetricData, 0, m.store.Len()) + m.store.Range(func(_ metricKey, handle metricHandle) bool { + if payload := handle.Payload(); payload.Type != "" { + series = append(series, payload) + } + return true + }) + + if len(series) == 0 { + return nil + } + + return transport.GenerateMetrics{Series: series, SkipAllowlist: m.skipAllowlist} +} + +type metricPoint struct { + value float64 + time time.Time +} + +// metric is a meta t +type metric struct { + key metricKey + ptr atomic.Pointer[metricPoint] + pool *internal.SyncPool[*metricPoint] +} + +func (m *metric) Get() float64 { + if ptr := m.ptr.Load(); ptr != nil { + return ptr.value + } + + return math.NaN() +} + +func (m *metric) Payload() transport.MetricData { + point := m.ptr.Swap(nil) + if point == nil { + return transport.MetricData{} + } + defer m.pool.Put(point) + return m.payload(point) +} + +func (m *metric) payload(point *metricPoint) transport.MetricData { + if point == nil { + return transport.MetricData{} + } + + return transport.MetricData{ + Metric: m.key.name, + Namespace: m.key.namespace, + Tags: m.key.SplitTags(), + Type: m.key.kind, + Common: knownmetrics.IsCommonMetric(m.key.namespace, m.key.kind, m.key.name), + Points: [][2]any{ + {point.time.Unix(), point.value}, + }, + } +} + +// count is a metric that represents a single value that is incremented over time and flush and reset at zero when flushed +type count struct { + metric +} + +func (m *count) Submit(newValue float64) { + newPoint := m.pool.Get() + newPoint.time = time.Now() + for { + oldPoint := m.ptr.Load() + var oldValue float64 + if oldPoint != nil { + oldValue = oldPoint.value + } + newPoint.value = oldValue + newValue + if m.ptr.CompareAndSwap(oldPoint, newPoint) { + if oldPoint != nil { + m.pool.Put(oldPoint) + } + return + } + } +} + +// gauge is a metric that represents a single value at a point in time that is not incremental +type gauge struct { + metric +} + +func (g *gauge) Submit(value float64) { + newPoint := g.pool.Get() + newPoint.time = time.Now() + newPoint.value = value + for { + oldPoint := g.ptr.Load() + if g.ptr.CompareAndSwap(oldPoint, newPoint) { + if oldPoint != nil { + g.pool.Put(oldPoint) + } + return + } + } +} + +// rate is like a count metric but the value sent is divided by an interval of time that is also sent/ +type rate struct { + count + intervalStart atomic.Pointer[time.Time] +} + +func (r *rate) Get() float64 { + sum := r.count.Get() + intervalStart := r.intervalStart.Load() + if intervalStart == nil { + return math.NaN() + } + + intervalSeconds := time.Since(*intervalStart).Seconds() + if int64(intervalSeconds) == 0 { // Interval for rate is too small, we prefer not sending data over sending something wrong + return math.NaN() + } + + return sum / intervalSeconds +} + +func (r *rate) Payload() transport.MetricData { + now := time.Now() + intervalStart := r.intervalStart.Swap(&now) + if intervalStart == nil { + return transport.MetricData{} + } + + intervalSeconds := time.Since(*intervalStart).Seconds() + if int64(intervalSeconds) == 0 { // Interval for rate is too small, we prefer not sending data over sending something wrong + return transport.MetricData{} + } + + point := r.ptr.Swap(nil) + if point == nil { + return transport.MetricData{} + } + defer r.pool.Put(point) + + point.value /= intervalSeconds + payload := r.metric.payload(point) + payload.Interval = int64(intervalSeconds) + return payload +} diff --git a/internal/newtelemetry/product.go b/internal/newtelemetry/product.go new file mode 100644 index 0000000000..84c53e8242 --- /dev/null +++ b/internal/newtelemetry/product.go @@ -0,0 +1,51 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "sync" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +type products struct { + mu sync.Mutex + products map[Namespace]transport.Product +} + +func (p *products) Add(namespace Namespace, enabled bool, err error) { + p.mu.Lock() + defer p.mu.Unlock() + if p.products == nil { + p.products = make(map[Namespace]transport.Product) + } + + product := transport.Product{ + Enabled: enabled, + } + + if err != nil { + product.Error = transport.Error{ + Message: err.Error(), + } + } + + p.products[namespace] = product +} + +func (p *products) Payload() transport.Payload { + p.mu.Lock() + defer p.mu.Unlock() + if len(p.products) == 0 { + return nil + } + + res := transport.AppProductChange{ + Products: p.products, + } + p.products = nil + return res +} diff --git a/internal/newtelemetry/telemetrytest/globalclient_test.go b/internal/newtelemetry/telemetrytest/globalclient_test.go new file mode 100644 index 0000000000..1f6b86983b --- /dev/null +++ b/internal/newtelemetry/telemetrytest/globalclient_test.go @@ -0,0 +1,130 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package telemetrytest + +import ( + "math" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +func TestGlobalClient(t *testing.T) { + t.Run("config", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.RegisterAppConfig("key", "value", newtelemetry.OriginCode) + assert.Len(t, recorder.Configuration, 1) + assert.Equal(t, "key", recorder.Configuration[0].Name) + assert.Equal(t, "value", recorder.Configuration[0].Value) + assert.Equal(t, newtelemetry.OriginCode, recorder.Configuration[0].Origin) + }) + + t.Run("configs", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.RegisterAppConfigs(newtelemetry.Configuration{Name: "key", Value: "value", Origin: newtelemetry.OriginCode}, newtelemetry.Configuration{Name: "key2", Value: "value2", Origin: newtelemetry.OriginRemoteConfig}) + assert.Len(t, recorder.Configuration, 2) + assert.Equal(t, "key", recorder.Configuration[0].Name) + assert.Equal(t, "value", recorder.Configuration[0].Value) + assert.Equal(t, newtelemetry.OriginCode, recorder.Configuration[0].Origin) + assert.Equal(t, "key2", recorder.Configuration[1].Name) + assert.Equal(t, "value2", recorder.Configuration[1].Value) + assert.Equal(t, newtelemetry.OriginRemoteConfig, recorder.Configuration[1].Origin) + }) + + t.Run("app-stop", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.StopApp() + assert.True(t, recorder.Stopped) + }) + + t.Run("product-start", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.ProductStarted(newtelemetry.NamespaceAppSec) + assert.Len(t, recorder.Products, 1) + assert.True(t, recorder.Products[newtelemetry.NamespaceAppSec]) + }) + + t.Run("product-stopped", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.ProductStopped(newtelemetry.NamespaceAppSec) + assert.Len(t, recorder.Products, 1) + assert.False(t, recorder.Products[newtelemetry.NamespaceAppSec]) + }) + + t.Run("integration-loaded", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.LoadIntegration("test-integration") + assert.Len(t, recorder.Integrations, 1) + assert.Equal(t, "test-integration", recorder.Integrations[0].Name) + }) + + t.Run("mark-integration-as-loaded", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.MarkIntegrationAsLoaded(newtelemetry.Integration{Name: "test-integration", Version: "1.0.0"}) + assert.Len(t, recorder.Integrations, 1) + assert.Equal(t, "test-integration", recorder.Integrations[0].Name) + assert.Equal(t, "1.0.0", recorder.Integrations[0].Version) + }) + + t.Run("count", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.Count(newtelemetry.NamespaceTracers, "init_time", nil).Submit(1) + assert.Len(t, recorder.Metrics, 1) + require.Contains(t, recorder.Metrics, MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceTracers, Kind: string(transport.CountMetric)}) + assert.Equal(t, 1.0, recorder.Metrics[MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceTracers, Kind: string(transport.CountMetric)}].Get()) + }) + + t.Run("gauge", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.Gauge(newtelemetry.NamespaceTracers, "init_time", nil).Submit(1) + assert.Len(t, recorder.Metrics, 1) + require.Contains(t, recorder.Metrics, MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceTracers, Kind: string(transport.GaugeMetric)}) + assert.Equal(t, 1.0, recorder.Metrics[MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceTracers, Kind: string(transport.GaugeMetric)}].Get()) + }) + + t.Run("rate", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.Rate(newtelemetry.NamespaceTracers, "init_time", nil).Submit(1) + + assert.Len(t, recorder.Metrics, 1) + require.Contains(t, recorder.Metrics, MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceTracers, Kind: string(transport.RateMetric)}) + assert.False(t, math.IsNaN(recorder.Metrics[MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceTracers, Kind: string(transport.RateMetric)}].Get())) + }) + + t.Run("distribution", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.Distribution(newtelemetry.NamespaceGeneral, "init_time", nil).Submit(1) + assert.Len(t, recorder.Metrics, 1) + require.Contains(t, recorder.Metrics, MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceGeneral, Kind: string(transport.DistMetric)}) + assert.Equal(t, 1.0, recorder.Metrics[MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceGeneral, Kind: string(transport.DistMetric)}].Get()) + }) +} diff --git a/internal/newtelemetry/telemetrytest/mock.go b/internal/newtelemetry/telemetrytest/mock.go new file mode 100644 index 0000000000..fdf9855084 --- /dev/null +++ b/internal/newtelemetry/telemetrytest/mock.go @@ -0,0 +1,93 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +// Package telemetrytest provides a mock implementation of the telemetry client for testing purposes +package telemetrytest + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry" + + "github.com/stretchr/testify/mock" +) + +// MockClient implements Client and is used for testing purposes outside the telemetry package, +// e.g. the tracer and profiler. +type MockClient struct { + mock.Mock +} + +func (m *MockClient) Close() error { + return nil +} + +type MockMetricHandle struct { + mock.Mock +} + +func (m *MockMetricHandle) Submit(value float64) { + m.Called(value) +} + +func (m *MockMetricHandle) Get() float64 { + return m.Called().Get(0).(float64) +} + +func (m *MockClient) Count(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return m.Called(namespace, name, tags).Get(0).(newtelemetry.MetricHandle) +} + +func (m *MockClient) Rate(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return m.Called(namespace, name, tags).Get(0).(newtelemetry.MetricHandle) +} + +func (m *MockClient) Gauge(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return m.Called(namespace, name, tags).Get(0).(newtelemetry.MetricHandle) +} + +func (m *MockClient) Distribution(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return m.Called(namespace, name, tags).Get(0).(newtelemetry.MetricHandle) +} + +func (m *MockClient) Log(level newtelemetry.LogLevel, text string, options ...newtelemetry.LogOption) { + m.Called(level, text, options) +} + +func (m *MockClient) ProductStarted(product newtelemetry.Namespace) { + m.Called(product) +} + +func (m *MockClient) ProductStopped(product newtelemetry.Namespace) { + m.Called(product) +} + +func (m *MockClient) ProductStartError(product newtelemetry.Namespace, err error) { + m.Called(product, err) +} + +func (m *MockClient) RegisterAppConfig(key string, value any, origin newtelemetry.Origin) { + m.Called(key, value, origin) +} + +func (m *MockClient) RegisterAppConfigs(kvs ...newtelemetry.Configuration) { + m.Called(kvs) +} + +func (m *MockClient) MarkIntegrationAsLoaded(integration newtelemetry.Integration) { + m.Called(integration) +} + +func (m *MockClient) Flush() { + m.Called() +} + +func (m *MockClient) AppStart() { + m.Called() +} + +func (m *MockClient) AppStop() { + m.Called() +} + +var _ newtelemetry.Client = (*MockClient)(nil) diff --git a/internal/newtelemetry/telemetrytest/record.go b/internal/newtelemetry/telemetrytest/record.go new file mode 100644 index 0000000000..b73d398f54 --- /dev/null +++ b/internal/newtelemetry/telemetrytest/record.go @@ -0,0 +1,199 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package telemetrytest + +import ( + "reflect" + "strings" + "sync" + "testing" + "time" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +type MetricKey struct { + Namespace newtelemetry.Namespace + Name string + Tags string + Kind string +} + +type RecordClient struct { + mu sync.Mutex + Started bool + Stopped bool + Configuration []newtelemetry.Configuration + Logs map[newtelemetry.LogLevel]string + Integrations []newtelemetry.Integration + Products map[newtelemetry.Namespace]bool + Metrics map[MetricKey]*RecordMetricHandle +} + +func (r *RecordClient) Close() error { + return nil +} + +type RecordMetricHandle struct { + mu sync.Mutex + count float64 + rate float64 + rateStart time.Time + gauge float64 + distrib []float64 + + submit func(handle *RecordMetricHandle, value float64) + get func(handle *RecordMetricHandle) float64 +} + +func (m *RecordMetricHandle) Submit(value float64) { + m.mu.Lock() + defer m.mu.Unlock() + m.submit(m, value) +} + +func (m *RecordMetricHandle) Get() float64 { + m.mu.Lock() + defer m.mu.Unlock() + return m.get(m) +} + +func (r *RecordClient) metric(kind string, namespace newtelemetry.Namespace, name string, tags []string, submit func(handle *RecordMetricHandle, value float64), get func(handle *RecordMetricHandle) float64) *RecordMetricHandle { + r.mu.Lock() + defer r.mu.Unlock() + if r.Metrics == nil { + r.Metrics = make(map[MetricKey]*RecordMetricHandle) + } + + key := MetricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: kind} + if _, ok := r.Metrics[key]; !ok { + r.Metrics[key] = &RecordMetricHandle{submit: submit, get: get} + } + return r.Metrics[key] +} + +func (r *RecordClient) Count(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return r.metric(string(transport.CountMetric), namespace, name, tags, func(handle *RecordMetricHandle, value float64) { + handle.count += value + }, func(handle *RecordMetricHandle) float64 { + return handle.count + }) +} + +func (r *RecordClient) Rate(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + handle := r.metric(string(transport.RateMetric), namespace, name, tags, func(handle *RecordMetricHandle, value float64) { + handle.count += value + handle.rate = float64(handle.count) / time.Since(handle.rateStart).Seconds() + }, func(handle *RecordMetricHandle) float64 { + return handle.rate + }) + + handle.rateStart = time.Now() + return handle +} + +func (r *RecordClient) Gauge(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return r.metric(string(transport.GaugeMetric), namespace, name, tags, func(handle *RecordMetricHandle, value float64) { + handle.gauge = value + }, func(handle *RecordMetricHandle) float64 { + return handle.gauge + }) +} + +func (r *RecordClient) Distribution(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return r.metric(string(transport.DistMetric), namespace, name, tags, func(handle *RecordMetricHandle, value float64) { + handle.distrib = append(handle.distrib, value) + }, func(handle *RecordMetricHandle) float64 { + var sum float64 + for _, v := range handle.distrib { + sum += v + } + return sum + }) +} + +func (r *RecordClient) Log(level newtelemetry.LogLevel, text string, _ ...newtelemetry.LogOption) { + r.mu.Lock() + defer r.mu.Unlock() + if r.Logs == nil { + r.Logs = make(map[newtelemetry.LogLevel]string) + } + + r.Logs[level] = text +} + +func (r *RecordClient) ProductStarted(product newtelemetry.Namespace) { + r.mu.Lock() + defer r.mu.Unlock() + if r.Products == nil { + r.Products = make(map[newtelemetry.Namespace]bool) + } + + r.Products[product] = true +} + +func (r *RecordClient) ProductStopped(product newtelemetry.Namespace) { + r.mu.Lock() + defer r.mu.Unlock() + if r.Products == nil { + r.Products = make(map[newtelemetry.Namespace]bool) + } + + r.Products[product] = false +} + +func (r *RecordClient) ProductStartError(product newtelemetry.Namespace, _ error) { + r.mu.Lock() + defer r.mu.Unlock() + if r.Products == nil { + r.Products = make(map[newtelemetry.Namespace]bool) + } + + r.Products[product] = false +} + +func (r *RecordClient) RegisterAppConfig(key string, value any, origin newtelemetry.Origin) { + r.mu.Lock() + defer r.mu.Unlock() + r.Configuration = append(r.Configuration, newtelemetry.Configuration{Name: key, Value: value, Origin: origin}) +} + +func (r *RecordClient) RegisterAppConfigs(kvs ...newtelemetry.Configuration) { + r.mu.Lock() + defer r.mu.Unlock() + r.Configuration = append(r.Configuration, kvs...) +} + +func (r *RecordClient) MarkIntegrationAsLoaded(integration newtelemetry.Integration) { + r.mu.Lock() + defer r.mu.Unlock() + r.Integrations = append(r.Integrations, integration) +} + +func (r *RecordClient) Flush() {} + +func (r *RecordClient) AppStart() { + r.mu.Lock() + defer r.mu.Unlock() + r.Started = true +} + +func (r *RecordClient) AppStop() { + r.mu.Lock() + defer r.mu.Unlock() + r.Stopped = true +} + +func CheckConfig(t *testing.T, cfgs []newtelemetry.Configuration, key string, value any) { + for _, c := range cfgs { + if c.Name == key && reflect.DeepEqual(c.Value, value) { + return + } + } + + t.Fatalf("could not find configuration key %s with value %v", key, value) +}