From 7d9896db37826fcfa6678f6ad9f45eed19946332 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Fri, 18 Oct 2024 17:47:32 +0200 Subject: [PATCH 01/61] setup internal/newtelemetry Signed-off-by: Eliott Bouhana wip Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 105 +++++++ internal/newtelemetry/config.go | 14 + internal/newtelemetry/defaults.go | 40 +++ internal/newtelemetry/hot_pointer.go | 42 +++ internal/newtelemetry/message.go | 309 +++++++++++++++++++++ internal/newtelemetry/payload.go | 10 + internal/newtelemetry/requesy_body_pool.go | 69 +++++ internal/newtelemetry/ringbuffer.go | 103 +++++++ 8 files changed, 692 insertions(+) create mode 100644 internal/newtelemetry/api.go create mode 100644 internal/newtelemetry/config.go create mode 100644 internal/newtelemetry/defaults.go create mode 100644 internal/newtelemetry/hot_pointer.go create mode 100644 internal/newtelemetry/message.go create mode 100644 internal/newtelemetry/payload.go create mode 100644 internal/newtelemetry/requesy_body_pool.go create mode 100644 internal/newtelemetry/ringbuffer.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go new file mode 100644 index 0000000000..115a4a5003 --- /dev/null +++ b/internal/newtelemetry/api.go @@ -0,0 +1,105 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +import ( + "net/http" +) + +type ClientConfig struct { + // AgentlessURL is the full URL to the agentless telemetry endpoint. + // Defaults to https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry + AgentlessURL string + + // AgentURL is the url to the agent without the path + AgentURL string + + // APIKey is the API key to use for sending telemetry, defaults to the env var DD_API_KEY. + APIKey string + + // HTTPClient is the http client to use for sending telemetry, defaults to http.DefaultClient. + HTTPClient http.RoundTripper +} + +// NewClient creates a new telemetry client with the given service, environment, and version and config. +func NewClient(service, env, version string, config ClientConfig) (Client, error) { + return nil, nil +} + +// StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global client. +func StartApp(client Client) error { + return nil +} + +// StopApp creates the app-stopped telemetry, adding to the queue and flush all the queue before stopping the client. +func StopApp() { +} + +// MetricHandle is used to reduce lock contention when submitting metrics. +// This can also be used ephemerally to submit a single metric value like this: +// +// telemetry.Metric(telemetry.Appsec, "my-count").Submit(1.0, []string{"tag1:true", "tag2:1.0"}) +type MetricHandle interface { + Submit(value float64, tags []string) + + flush() +} + +// Logger is the interface i +type Logger interface { + // WithTags creates a new Logger which will send a comma-separated list of tags with the next logs + WithTags(tags string) Logger + + // WithStackTrace creates a new Logger which will send a stack trace generated for each next log. + WithStackTrace(tags string) Logger + + // Error sends a telemetry log at the ERROR level + Error(text string) + + // Warn sends a telemetry log at the WARN level + Warn(text string) + + // Debug sends a telemetry log at the DEBUG level + Debug(text string) +} + +// Client constitutes all the functions available concurrently for the telemetry users. +type Client interface { + // Count creates a new metric handle for the given namespace and name that can be used to submit values. + Count(namespace Namespace, name string) MetricHandle + + // Rate creates a new metric handle for the given namespace and name that can be used to submit values. + Rate(namespace Namespace, name string) MetricHandle + + // Gauge creates a new metric handle for the given namespace and name that can be used to submit values. + Gauge(namespace Namespace, name string) MetricHandle + + // Distribution creates a new metric handle for the given namespace and name that can be used to submit values. + Distribution(namespace Namespace, name string) MetricHandle + + // Logger returns an implementation of the Logger interface which sends telemetry logs. + Logger() Logger + + // ProductOnOff sent the telemetry necessary to signal that a product is enabled/disabled. + ProductOnOff(product Namespace, enabled bool) + + // AddAppConfig adds a key value pair to the app configuration and send the change to telemetry + // value has to be json serializable and the origin is the source of the change. + AddAppConfig(key string, value any, origin Origin) + + // AddBulkAppConfig adds a list of key value pairs to the app configuration and send the change to telemetry. + // Same as AddAppConfig but for multiple values. + AddBulkAppConfig(kvs []Configuration) + + // flush closes the client and flushes any remaining data. + flush() + + // appStart sends the telemetry necessary to signal that the app is starting. + appStart() + + // appStop sends the telemetry necessary to signal that the app is stopping and calls Close() + appStop() +} diff --git a/internal/newtelemetry/config.go b/internal/newtelemetry/config.go new file mode 100644 index 0000000000..f09fd791b9 --- /dev/null +++ b/internal/newtelemetry/config.go @@ -0,0 +1,14 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +type InternalConfig struct { + Enabled bool + HeartbeatInterval int + DependencyCollectionEnabled bool + MetricsEnabled bool + LogsEnabled bool +} diff --git a/internal/newtelemetry/defaults.go b/internal/newtelemetry/defaults.go new file mode 100644 index 0000000000..498c9d2e9b --- /dev/null +++ b/internal/newtelemetry/defaults.go @@ -0,0 +1,40 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +import ( + "net" + "net/http" + "time" +) + +var ( + // We copy the transport to avoid using the default one, as it might be + // augmented with tracing and we don't want these calls to be recorded. + // See https://golang.org/pkg/net/http/#DefaultTransport . + defaultHTTPClient = &http.Client{ + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + ForceAttemptHTTP2: true, + MaxIdleConns: 100, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + }, + Timeout: 5 * time.Second, + } + + // agentlessURL is the endpoint used to send telemetry in an agentless environment. It is + // also the default URL in case connecting to the agent URL fails. + agentlessURL = "https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry" + + // defaultHeartbeatInterval is the default interval at which the agent sends a heartbeat. + defaultHeartbeatInterval = 60.0 +) diff --git a/internal/newtelemetry/hot_pointer.go b/internal/newtelemetry/hot_pointer.go new file mode 100644 index 0000000000..b8383f45ac --- /dev/null +++ b/internal/newtelemetry/hot_pointer.go @@ -0,0 +1,42 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +import ( + "sync" +) + +// hotPointer is a type that allows for atomic swapping of a value while keeping the other on standby to prevent allocations. +type hotPointer[T any] struct { + // value is the current value that can be extracted. + value *T + // standby is the value that will be swapped in. + standby *T + // writeMu is used to lock the value. + writeMu sync.Mutex + // swapMu is used to lock the swap. + swapMu sync.Mutex +} + +// Lock take the lock and return the current value +func (hp *hotPointer[T]) Lock() *T { + hp.writeMu.Lock() + return hp.value +} + +// StandbyValue returns the standby value WITHOUT locking. Which means it cannot be used concurrently. +func (hp *hotPointer[T]) StandbyValue() *T { + return hp.standby +} + +// Swap swaps the current value with the standby value and return the standby value using the lock. +// the value returned is NOT protected by the lock. +func (hp *hotPointer[T]) Swap() *T { + hp.Lock() + defer hp.Unlock() + hp.value, hp.standby = hp.standby, hp.value + return hp.standby +} diff --git a/internal/newtelemetry/message.go b/internal/newtelemetry/message.go new file mode 100644 index 0000000000..e8c5f12148 --- /dev/null +++ b/internal/newtelemetry/message.go @@ -0,0 +1,309 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2022 Datadog, Inc. + +package newtelemetry + +import ( + "bytes" + "fmt" + "net/http" +) + +// Request captures all necessary information for a telemetry event submission +type Request struct { + Body *Body + Header *http.Header + HTTPClient *http.Client + URL string +} + +// Body is the common high-level structure encapsulating a telemetry request body +type Body struct { + APIVersion string `json:"api_version"` + RequestType RequestType `json:"request_type"` + TracerTime int64 `json:"tracer_time"` + RuntimeID string `json:"runtime_id"` + SeqID int64 `json:"seq_id"` + Debug bool `json:"debug"` + Payload interface{} `json:"payload"` + Application Application `json:"application"` + Host Host `json:"host"` +} + +// RequestType determines how the Payload of a request should be handled +type RequestType string + +const ( + // RequestTypeAppStarted is the first message sent by the telemetry + // client, containing the configuration loaded at startup + RequestTypeAppStarted RequestType = "app-started" + // RequestTypeAppHeartbeat is sent periodically by the client to indicate + // that the app is still running + RequestTypeAppHeartbeat RequestType = "app-heartbeat" + // RequestTypeGenerateMetrics contains count, gauge, or rate metrics accumulated by the + // client, and is sent periodically along with the heartbeat + RequestTypeGenerateMetrics RequestType = "generate-metrics" + // RequestTypeDistributions is to send distribution type metrics accumulated by the + // client, and is sent periodically along with the heartbeat + RequestTypeDistributions RequestType = "distributions" + // RequestTypeAppClosing is sent when the telemetry client is stopped + RequestTypeAppClosing RequestType = "app-closing" + // RequestTypeDependenciesLoaded is sent if DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED + // is enabled. Sent when Start is called for the telemetry client. + RequestTypeDependenciesLoaded RequestType = "app-dependencies-loaded" + // RequestTypeAppClientConfigurationChange is sent if there are changes + // to the client library configuration + RequestTypeAppClientConfigurationChange RequestType = "app-client-configuration-change" + // RequestTypeAppProductChange is sent when products are enabled/disabled + RequestTypeAppProductChange RequestType = "app-product-change" + // RequestTypeAppIntegrationsChange is sent when the telemetry client starts + // with info on which integrations are used. + RequestTypeAppIntegrationsChange RequestType = "app-integrations-change" +) + +// Namespace describes an APM product to distinguish telemetry coming from +// different products used by the same application +type Namespace string + +const ( + NamespaceGeneral Namespace = "general" + NamespaceTracers Namespace = "tracers" + NamespaceProfilers Namespace = "profilers" + NamespaceAppSec Namespace = "appsec" + NamespaceIAST Namespace = "iast" + NamespaceTelemetry Namespace = "telemetry" +) + +// MetricType is the type of metric being sent, either count, gauge, rate or distribution +type MetricType string + +const ( + RateMetric MetricType = "rate" + CountMetric MetricType = "count" + GaugeMetric MetricType = "gauge" + DistributionMetric MetricType = "distribution" +) + +// Application is identifying information about the app itself +type Application struct { + ServiceName string `json:"service_name"` + Env string `json:"env"` + ServiceVersion string `json:"service_version"` + TracerVersion string `json:"tracer_version"` + LanguageName string `json:"language_name"` + LanguageVersion string `json:"language_version"` +} + +// Host is identifying information about the host on which the app +// is running +type Host struct { + Hostname string `json:"hostname"` + OS string `json:"os"` + OSVersion string `json:"os_version,omitempty"` + Architecture string `json:"architecture"` + KernelName string `json:"kernel_name"` + KernelRelease string `json:"kernel_release"` + KernelVersion string `json:"kernel_version"` +} + +// AppStarted corresponds to the "app-started" request type +type AppStarted struct { + Configuration []Configuration `json:"configuration,omitempty"` + Products Products `json:"products,omitempty"` + AdditionalPayload []AdditionalPayload `json:"additional_payload,omitempty"` + Error Error `json:"error,omitempty"` + RemoteConfig *RemoteConfig `json:"remote_config,omitempty"` +} + +// IntegrationsChange corresponds to the app-integrations-change requesty type +type IntegrationsChange struct { + Integrations []Integration `json:"integrations"` +} + +// Integration is an integration that is configured to be traced automatically. +type Integration struct { + Name string `json:"name"` + Enabled bool `json:"enabled"` + Version string `json:"version,omitempty"` + AutoEnabled bool `json:"auto_enabled,omitempty"` + Compatible bool `json:"compatible,omitempty"` + Error string `json:"error,omitempty"` +} + +// ConfigurationChange corresponds to the `AppClientConfigurationChange` event +// that contains information about configuration changes since the app-started event +type ConfigurationChange struct { + Configuration []Configuration `json:"configuration"` + RemoteConfig *RemoteConfig `json:"remote_config,omitempty"` +} + +type Origin int + +const ( + OriginDefault Origin = iota + OriginCode + OriginDDConfig + OriginEnvVar + OriginRemoteConfig +) + +func (o Origin) String() string { + switch o { + case OriginDefault: + return "default" + case OriginCode: + return "code" + case OriginDDConfig: + return "dd_config" + case OriginEnvVar: + return "env_var" + case OriginRemoteConfig: + return "remote_config" + default: + return fmt.Sprintf("unknown origin %d", o) + } +} + +func (o Origin) MarshalJSON() ([]byte, error) { + var b bytes.Buffer + b.WriteString(`"`) + b.WriteString(o.String()) + b.WriteString(`"`) + return b.Bytes(), nil +} + +// Configuration is a library-specific configuration value +// that should be initialized through StringConfig, IntConfig, FloatConfig, or BoolConfig +type Configuration struct { + Name string `json:"name"` + Value interface{} `json:"value"` + // origin is the source of the config. It is one of {default, env_var, code, dd_config, remote_config}. + Origin Origin `json:"origin"` + Error Error `json:"error"` + IsOverriden bool `json:"is_overridden"` +} + +// TODO: be able to pass in origin, error, isOverriden info to config +// constructors + +// StringConfig returns a Configuration struct with a string value +func StringConfig(key string, val string) Configuration { + return Configuration{Name: key, Value: val} +} + +// IntConfig returns a Configuration struct with a int value +func IntConfig(key string, val int) Configuration { + return Configuration{Name: key, Value: val} +} + +// FloatConfig returns a Configuration struct with a float value +func FloatConfig(key string, val float64) Configuration { + return Configuration{Name: key, Value: val} +} + +// BoolConfig returns a Configuration struct with a bool value +func BoolConfig(key string, val bool) Configuration { + return Configuration{Name: key, Value: val} +} + +// ProductsPayload is the top-level key for the app-product-change payload. +type ProductsPayload struct { + Products Products `json:"products"` +} + +// Products specifies information about available products. +type Products struct { + AppSec ProductDetails `json:"appsec,omitempty"` + Profiler ProductDetails `json:"profiler,omitempty"` +} + +// ProductDetails specifies details about a product. +type ProductDetails struct { + Enabled bool `json:"enabled"` + Version string `json:"version,omitempty"` + Error Error `json:"error,omitempty"` +} + +// Dependencies stores a list of dependencies +type Dependencies struct { + Dependencies []Dependency `json:"dependencies"` +} + +// Dependency is a Go module on which the application depends. This information +// can be accesed at run-time through the runtime/debug.ReadBuildInfo API. +type Dependency struct { + Name string `json:"name"` + Version string `json:"version"` +} + +// RemoteConfig contains information about remote-config +type RemoteConfig struct { + UserEnabled string `json:"user_enabled"` // whether the library has made a request to fetch remote-config + ConfigsRecieved bool `json:"configs_received"` // whether the library receives a valid config response + RcID string `json:"rc_id,omitempty"` + RcRevision string `json:"rc_revision,omitempty"` + RcVersion string `json:"rc_version,omitempty"` + Error Error `json:"error,omitempty"` +} + +// Error stores error information about various tracer events +type Error struct { + Code int `json:"code"` + Message string `json:"message"` +} + +// AdditionalPayload can be used to add extra information to the app-started +// event +type AdditionalPayload struct { + Name string `json:"name"` + Value interface{} `json:"value"` +} + +// Metrics corresponds to the "generate-metrics" request type +type Metrics struct { + Namespace Namespace `json:"namespace"` + Series []Series `json:"series"` +} + +// DistributionMetrics corresponds to the "distributions" request type +type DistributionMetrics struct { + Namespace Namespace `json:"namespace"` + Series []DistributionSeries `json:"series"` +} + +// Series is a sequence of observations for a single named metric. +// The `Points` field will store a timestamp and value. +type Series struct { + Metric string `json:"metric"` + Points [][2]float64 `json:"points"` + // Interval is required only for gauge and rate metrics + Interval int `json:"interval,omitempty"` + // Type cannot be of type distribution because there is a different payload for it + Type MetricType `json:"type,omitempty"` + Tags []string `json:"tags,omitempty"` + // Common distinguishes metrics which are cross-language vs. + // language-specific. + // + // NOTE: If this field isn't present in the request, the API assumes + // the metric is common. So we can't "omitempty" even though the + // field is technically optional. + Common bool `json:"common"` + Namespace Namespace `json:"namespace,omitempty"` +} + +// DistributionSeries is a sequence of observations for a distribution metric. +// Unlike `Series`, DistributionSeries does not store timestamps in `Points` +type DistributionSeries struct { + Metric string `json:"metric"` + Points []float64 `json:"points"` + Tags []string `json:"tags,omitempty"` + // Common distinguishes metrics which are cross-language vs. + // language-specific. + // + // NOTE: If this field isn't present in the request, the API assumes + // the metric is common. So we can't "omitempty" even though the + // field is technically optional. + Common bool `json:"common,omitempty"` +} diff --git a/internal/newtelemetry/payload.go b/internal/newtelemetry/payload.go new file mode 100644 index 0000000000..888eb1fd3d --- /dev/null +++ b/internal/newtelemetry/payload.go @@ -0,0 +1,10 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +type Payload interface { + RequestType() RequestType +} diff --git a/internal/newtelemetry/requesy_body_pool.go b/internal/newtelemetry/requesy_body_pool.go new file mode 100644 index 0000000000..352b51ec72 --- /dev/null +++ b/internal/newtelemetry/requesy_body_pool.go @@ -0,0 +1,69 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +import ( + "runtime" + "sync" + "sync/atomic" + "time" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" + "gopkg.in/DataDog/dd-trace-go.v1/internal/hostname" + "gopkg.in/DataDog/dd-trace-go.v1/internal/osinfo" + tracerversion "gopkg.in/DataDog/dd-trace-go.v1/internal/version" +) + +type RequestBodyPool struct { + pool sync.Pool + seqID atomic.Int64 +} + +func NewRequestPool(service, env, version string) *RequestBodyPool { + return &RequestBodyPool{ + pool: sync.Pool{ + New: func() any { + return &Body{ + APIVersion: "v2", + RuntimeID: globalconfig.RuntimeID(), + Application: Application{ + ServiceName: service, + Env: env, + ServiceVersion: version, + TracerVersion: tracerversion.Tag, + LanguageName: "go", + LanguageVersion: runtime.Version(), + }, + Host: Host{ + Hostname: hostname.Get(), + OS: osinfo.OSName(), + OSVersion: osinfo.OSVersion(), + Architecture: osinfo.Architecture(), + KernelName: osinfo.KernelName(), + KernelRelease: osinfo.KernelRelease(), + KernelVersion: osinfo.KernelVersion(), + }, + } + }, + }, + } +} + +// Get returns a new Body from the pool, ready to be turned into JSON and sent to the API. +func (p *RequestBodyPool) Get(payload Payload) *Body { + body := p.pool.Get().(*Body) + body.SeqID = p.seqID.Add(1) + body.TracerTime = time.Now().Unix() + body.Payload = payload + body.RequestType = payload.RequestType() + return body +} + +// Put returns a Body to the pool. +func (p *RequestBodyPool) Put(body *Body) { + body.Payload = nil + p.pool.Put(body) +} diff --git a/internal/newtelemetry/ringbuffer.go b/internal/newtelemetry/ringbuffer.go new file mode 100644 index 0000000000..8366a3de34 --- /dev/null +++ b/internal/newtelemetry/ringbuffer.go @@ -0,0 +1,103 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +import ( + "sync" +) + +const maxRingBufferSize = 1 << 14 +const startingRingBufferSize = 1 << 8 + +type RingQueue[T any] struct { + // buffer is the slice that contains the data. + buffer []T + // head is the index of the first element in the buffer. + head int + // tail is the index of the last element in the buffer. + tail int + // mu is the lock for the buffer, head and tail. + mu sync.Mutex + // pool is the pool of buffers. Normally there should only be one or 2 buffers in the pool. + pool sync.Pool +} + +func NewRingQueue[T any]() *RingQueue[T] { + return &RingQueue[T]{ + buffer: make([]T, startingRingBufferSize), + pool: sync.Pool{ + New: func() any { return make([]T, startingRingBufferSize) }, + }, + } +} + +// Enqueue adds a value to the buffer. +func (rb *RingQueue[T]) Enqueue(val T) { + rb.mu.Lock() + defer rb.mu.Unlock() + + rb.buffer[rb.tail] = val + rb.tail = (rb.tail + 1) % len(rb.buffer) + + if rb.tail == rb.head && len(rb.buffer) == maxRingBufferSize { // We loose one element + rb.head = (rb.head + 1) % len(rb.buffer) + // TODO: maybe log that we lost an element + return + } + + // We need to resize the buffer, we double the size, this should happen 10 times before we reach the max size + if rb.tail == rb.head { + newBuffer := make([]T, cap(rb.buffer)*2) + copy(newBuffer, rb.buffer[rb.head:]) + copy(newBuffer[len(rb.buffer)-rb.head:], rb.buffer[:rb.tail]) + rb.head = 0 + rb.tail = len(rb.buffer) - 1 + rb.buffer = newBuffer + } +} + +// Dequeue removes a value from the buffer. +func (rb *RingQueue[T]) Dequeue() T { + rb.mu.Lock() + defer rb.mu.Unlock() + + val := rb.buffer[rb.head] + rb.head = (rb.head + 1) % len(rb.buffer) + return val +} + +// GetBuffer returns the current buffer and resets it. +func (rb *RingQueue[T]) GetBuffer() []T { + rb.mu.Lock() + defer rb.mu.Unlock() + + prevBuf := rb.buffer + rb.buffer = rb.pool.Get().([]T) + rb.head = 0 + rb.tail = len(rb.buffer) - 1 + return prevBuf +} + +// ReleaseBuffer returns the buffer to the pool. +func (rb *RingQueue[T]) ReleaseBuffer(buf []T) { + rb.pool.Put(buf) +} + +// IsEmpty returns true if the buffer is empty. +func (rb *RingQueue[T]) IsEmpty() bool { + rb.mu.Lock() + defer rb.mu.Unlock() + + return rb.head == rb.tail +} + +// IsFull returns true if the buffer is full and cannot accept more elements. +func (rb *RingQueue[T]) IsFull() bool { + rb.mu.Lock() + defer rb.mu.Unlock() + + return (rb.tail+1)%len(rb.buffer) == rb.head && len(rb.buffer) == maxRingBufferSize +} From f398276fe4dbca0c45fa6ffd8cde452e6f8986c1 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Wed, 27 Nov 2024 16:19:25 +0100 Subject: [PATCH 02/61] setup newtelemetry package Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 88 ++++++++------ internal/newtelemetry/client.go | 89 +++++++++++++++ internal/newtelemetry/config.go | 14 --- internal/newtelemetry/defaults.go | 1 + internal/newtelemetry/globalclient.go | 89 +++++++++++++++ internal/newtelemetry/hot_pointer.go | 5 + .../newtelemetry/{ => internal}/ringbuffer.go | 2 +- .../newtelemetry/internal/transport/body.go | 52 +++++++++ .../{ => internal/transport}/message.go | 108 ++---------------- internal/newtelemetry/payload.go | 10 -- internal/newtelemetry/requesy_body_pool.go | 69 ----------- internal/newtelemetry/types/namespace.go | 19 +++ internal/newtelemetry/types/origin.go | 46 ++++++++ 13 files changed, 364 insertions(+), 228 deletions(-) create mode 100644 internal/newtelemetry/client.go delete mode 100644 internal/newtelemetry/config.go create mode 100644 internal/newtelemetry/globalclient.go rename internal/newtelemetry/{ => internal}/ringbuffer.go (99%) create mode 100644 internal/newtelemetry/internal/transport/body.go rename internal/newtelemetry/{ => internal/transport}/message.go (75%) delete mode 100644 internal/newtelemetry/payload.go delete mode 100644 internal/newtelemetry/requesy_body_pool.go create mode 100644 internal/newtelemetry/types/namespace.go create mode 100644 internal/newtelemetry/types/origin.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index 115a4a5003..ba221232df 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -7,6 +7,8 @@ package newtelemetry import ( "net/http" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) type ClientConfig struct { @@ -14,47 +16,36 @@ type ClientConfig struct { // Defaults to https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry AgentlessURL string - // AgentURL is the url to the agent without the path + // AgentURL is the url to the agent without the path, AgentURL string // APIKey is the API key to use for sending telemetry, defaults to the env var DD_API_KEY. APIKey string - // HTTPClient is the http client to use for sending telemetry, defaults to http.DefaultClient. + // HTTPClient is the http client to use for sending telemetry, defaults to a http.DefaultClient copy. HTTPClient http.RoundTripper } -// NewClient creates a new telemetry client with the given service, environment, and version and config. -func NewClient(service, env, version string, config ClientConfig) (Client, error) { - return nil, nil -} - -// StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global client. -func StartApp(client Client) error { - return nil -} - -// StopApp creates the app-stopped telemetry, adding to the queue and flush all the queue before stopping the client. -func StopApp() { -} - +// MetricHandle can be used to submit different values for the same metric. // MetricHandle is used to reduce lock contention when submitting metrics. // This can also be used ephemerally to submit a single metric value like this: // -// telemetry.Metric(telemetry.Appsec, "my-count").Submit(1.0, []string{"tag1:true", "tag2:1.0"}) +// telemetry.Metric(telemetry.Appsec, "my-count", map[string]string{"tag1": "true", "tag2": "1.0"}).Submit(1.0) type MetricHandle interface { - Submit(value float64, tags []string) + Submit(value float64) flush() } -// Logger is the interface i -type Logger interface { +// TelemetryLogger is the interface implementing the telemetry logs. Supports log deduplication. All methods are Thread-safe +// This is an interface for easier testing but all functions will be mirrored at the package level to call +// the global client. +type TelemetryLogger interface { // WithTags creates a new Logger which will send a comma-separated list of tags with the next logs - WithTags(tags string) Logger + WithTags(tags string) TelemetryLogger // WithStackTrace creates a new Logger which will send a stack trace generated for each next log. - WithStackTrace(tags string) Logger + WithStackTrace() TelemetryLogger // Error sends a telemetry log at the ERROR level Error(text string) @@ -66,39 +57,60 @@ type Logger interface { Debug(text string) } -// Client constitutes all the functions available concurrently for the telemetry users. +// Integration is an integration that is configured to be traced. +type Integration struct { + Name string + Version string + AutoEnabled bool + Compatible bool + Error string +} + +// Client constitutes all the functions available concurrently for the telemetry users. All methods are thread-safe +// This is an interface for easier testing but all functions will be mirrored at the package level to call +// the global client. type Client interface { - // Count creates a new metric handle for the given namespace and name that can be used to submit values. - Count(namespace Namespace, name string) MetricHandle - // Rate creates a new metric handle for the given namespace and name that can be used to submit values. - Rate(namespace Namespace, name string) MetricHandle + // Count creates a new metric handle for the given parameters that can be used to submit values. + Count(namespace types.Namespace, name string, tags map[string]string) MetricHandle + + // Rate creates a new metric handle for the given parameters that can be used to submit values. + Rate(namespace types.Namespace, name string, tags map[string]string) MetricHandle - // Gauge creates a new metric handle for the given namespace and name that can be used to submit values. - Gauge(namespace Namespace, name string) MetricHandle + // Gauge creates a new metric handle for the given parameters that can be used to submit values. + Gauge(namespace types.Namespace, name string, tags map[string]string) MetricHandle - // Distribution creates a new metric handle for the given namespace and name that can be used to submit values. - Distribution(namespace Namespace, name string) MetricHandle + // Distribution creates a new metric handle for the given parameters that can be used to submit values. + Distribution(namespace types.Namespace, name string, tags map[string]string) MetricHandle // Logger returns an implementation of the Logger interface which sends telemetry logs. - Logger() Logger + Logger() TelemetryLogger - // ProductOnOff sent the telemetry necessary to signal that a product is enabled/disabled. - ProductOnOff(product Namespace, enabled bool) + // ProductStarted declares a product to have started at the customer’s request + ProductStarted(product types.Namespace) + + // ProductStopped declares a product to have being stopped by the customer + ProductStopped(product types.Namespace) + + // ProductStartError declares that a product could not start because of the following error + ProductStartError(product types.Namespace, err error) // AddAppConfig adds a key value pair to the app configuration and send the change to telemetry // value has to be json serializable and the origin is the source of the change. - AddAppConfig(key string, value any, origin Origin) + AddAppConfig(key string, value any, origin types.Origin) - // AddBulkAppConfig adds a list of key value pairs to the app configuration and send the change to telemetry. + // AddBulkAppConfig adds a list of key value pairs to the app configuration and sends the change to telemetry. // Same as AddAppConfig but for multiple values. - AddBulkAppConfig(kvs []Configuration) + AddBulkAppConfig(kvs map[string]any, origin types.Origin) + + // MarkIntegrationAsLoaded marks an integration as loaded in the telemetry + MarkIntegrationAsLoaded(integration Integration) // flush closes the client and flushes any remaining data. flush() // appStart sends the telemetry necessary to signal that the app is starting. - appStart() + appStart() error // appStop sends the telemetry necessary to signal that the app is stopping and calls Close() appStop() diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go new file mode 100644 index 0000000000..7beb7e6f8e --- /dev/null +++ b/internal/newtelemetry/client.go @@ -0,0 +1,89 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" +) + +// NewClient creates a new telemetry client with the given service, environment, and version and config. +func NewClient(service, env, version string, config ClientConfig) (Client, error) { + return nil, nil +} + +type client struct{} + +func (c client) MarkIntegrationAsLoaded(integration Integration) { + //TODO implement me + panic("implement me") +} + +func (c client) Count(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + //TODO implement me + panic("implement me") +} + +func (c client) Rate(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + //TODO implement me + panic("implement me") +} + +func (c client) Gauge(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + //TODO implement me + panic("implement me") +} + +func (c client) Distribution(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + //TODO implement me + panic("implement me") +} + +func (c client) Logger() TelemetryLogger { + //TODO implement me + panic("implement me") +} + +func (c client) ProductStarted(product types.Namespace) { + //TODO implement me + panic("implement me") +} + +func (c client) ProductStopped(product types.Namespace) { + //TODO implement me + panic("implement me") +} + +func (c client) ProductStartError(product types.Namespace, err error) { + //TODO implement me + panic("implement me") +} + +func (c client) AddAppConfig(key string, value any, origin types.Origin) { + //TODO implement me + panic("implement me") +} + +func (c client) AddBulkAppConfig(kvs map[string]any, origin types.Origin) { + //TODO implement me + panic("implement me") +} + +func (c client) flush() { + //TODO implement me + panic("implement me") +} + +func (c client) appStart() error { + //TODO implement me + panic("implement me") +} + +func (c client) appStop() { + //TODO implement me + panic("implement me") +} + +var _ Client = (*client)(nil) diff --git a/internal/newtelemetry/config.go b/internal/newtelemetry/config.go deleted file mode 100644 index f09fd791b9..0000000000 --- a/internal/newtelemetry/config.go +++ /dev/null @@ -1,14 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2024 Datadog, Inc. - -package newtelemetry - -type InternalConfig struct { - Enabled bool - HeartbeatInterval int - DependencyCollectionEnabled bool - MetricsEnabled bool - LogsEnabled bool -} diff --git a/internal/newtelemetry/defaults.go b/internal/newtelemetry/defaults.go index 498c9d2e9b..58fa3ac11f 100644 --- a/internal/newtelemetry/defaults.go +++ b/internal/newtelemetry/defaults.go @@ -15,6 +15,7 @@ var ( // We copy the transport to avoid using the default one, as it might be // augmented with tracing and we don't want these calls to be recorded. // See https://golang.org/pkg/net/http/#DefaultTransport . + //orchestrion:ignore defaultHTTPClient = &http.Client{ Transport: &http.Transport{ Proxy: http.ProxyFromEnvironment, diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go new file mode 100644 index 0000000000..1811458b5e --- /dev/null +++ b/internal/newtelemetry/globalclient.go @@ -0,0 +1,89 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +import ( + "sync/atomic" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" +) + +var globalClient atomic.Pointer[Client] + +// StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global client. +func StartApp(client Client) error { + if err := client.appStart(); err != nil { + return err + } + SwapClient(client) + return nil +} + +// SwapClient swaps the global client with the given client and flush the old client. +func SwapClient(client Client) { + if oldClient := globalClient.Swap(&client); oldClient != nil && *oldClient != nil { + (*oldClient).flush() + } +} + +// StopApp creates the app-stopped telemetry, adding to the queue and flush all the queue before stopping the client. +func StopApp() { + if client := globalClient.Swap(nil); client != nil && *client != nil { + (*client).appStop() + (*client).flush() + } +} + +func Count(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + return (*globalClient.Load()).Count(namespace, name, tags) +} + +// Rate creates a new metric handle for the given parameters that can be used to submit values. +func Rate(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + return (*globalClient.Load()).Rate(namespace, name, tags) +} + +// Gauge creates a new metric handle for the given parameters that can be used to submit values. +func Gauge(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + return (*globalClient.Load()).Gauge(namespace, name, tags) +} + +// Distribution creates a new metric handle for the given parameters that can be used to submit values. +func Distribution(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + return (*globalClient.Load()).Distribution(namespace, name, tags) +} + +// Logger returns an implementation of the TelemetryLogger interface which sends telemetry logs. +func Logger() TelemetryLogger { + return (*globalClient.Load()).Logger() +} + +// ProductStarted declares a product to have started at the customer’s request +func ProductStarted(product types.Namespace) { + (*globalClient.Load()).ProductStarted(product) +} + +// ProductStopped declares a product to have being stopped by the customer +func ProductStopped(product types.Namespace) { + (*globalClient.Load()).ProductStopped(product) +} + +// ProductStartError declares that a product could not start because of the following error +func ProductStartError(product types.Namespace, err error) { + (*globalClient.Load()).ProductStartError(product, err) +} + +// AddAppConfig adds a key value pair to the app configuration and send the change to telemetry +// value has to be json serializable and the origin is the source of the change. +func AddAppConfig(key string, value any, origin types.Origin) { + (*globalClient.Load()).AddAppConfig(key, value, origin) +} + +// AddBulkAppConfig adds a list of key value pairs to the app configuration and sends the change to telemetry. +// Same as AddAppConfig but for multiple values. +func AddBulkAppConfig(kvs map[string]any, origin types.Origin) { + (*globalClient.Load()).AddBulkAppConfig(kvs, origin) +} diff --git a/internal/newtelemetry/hot_pointer.go b/internal/newtelemetry/hot_pointer.go index b8383f45ac..f2d52be5aa 100644 --- a/internal/newtelemetry/hot_pointer.go +++ b/internal/newtelemetry/hot_pointer.go @@ -27,6 +27,11 @@ func (hp *hotPointer[T]) Lock() *T { return hp.value } +// Unlock unlocks the lock +func (hp *hotPointer[T]) Unlock() { + hp.writeMu.Unlock() +} + // StandbyValue returns the standby value WITHOUT locking. Which means it cannot be used concurrently. func (hp *hotPointer[T]) StandbyValue() *T { return hp.standby diff --git a/internal/newtelemetry/ringbuffer.go b/internal/newtelemetry/internal/ringbuffer.go similarity index 99% rename from internal/newtelemetry/ringbuffer.go rename to internal/newtelemetry/internal/ringbuffer.go index 8366a3de34..f8af3c48e3 100644 --- a/internal/newtelemetry/ringbuffer.go +++ b/internal/newtelemetry/internal/ringbuffer.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2024 Datadog, Inc. -package newtelemetry +package internal import ( "sync" diff --git a/internal/newtelemetry/internal/transport/body.go b/internal/newtelemetry/internal/transport/body.go new file mode 100644 index 0000000000..761db0555d --- /dev/null +++ b/internal/newtelemetry/internal/transport/body.go @@ -0,0 +1,52 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package transport + +import ( + "runtime" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" + "gopkg.in/DataDog/dd-trace-go.v1/internal/hostname" + "gopkg.in/DataDog/dd-trace-go.v1/internal/osinfo" + tracerversion "gopkg.in/DataDog/dd-trace-go.v1/internal/version" +) + +// Body is the common high-level structure encapsulating a telemetry request body +type Body struct { + APIVersion string `json:"api_version"` + RequestType RequestType `json:"request_type"` + TracerTime int64 `json:"tracer_time"` + RuntimeID string `json:"runtime_id"` + SeqID int64 `json:"seq_id"` + Debug bool `json:"debug"` + Payload interface{} `json:"payload"` + Application Application `json:"application"` + Host Host `json:"host"` +} + +func NewBody(service, env, version string) *Body { + return &Body{ + APIVersion: "v2", + RuntimeID: globalconfig.RuntimeID(), + Application: Application{ + ServiceName: service, + Env: env, + ServiceVersion: version, + TracerVersion: tracerversion.Tag, + LanguageName: "go", + LanguageVersion: runtime.Version(), + }, + Host: Host{ + Hostname: hostname.Get(), + OS: osinfo.OSName(), + OSVersion: osinfo.OSVersion(), + Architecture: osinfo.Architecture(), + KernelName: osinfo.KernelName(), + KernelRelease: osinfo.KernelRelease(), + KernelVersion: osinfo.KernelVersion(), + }, + } +} diff --git a/internal/newtelemetry/message.go b/internal/newtelemetry/internal/transport/message.go similarity index 75% rename from internal/newtelemetry/message.go rename to internal/newtelemetry/internal/transport/message.go index e8c5f12148..d7ec1ce833 100644 --- a/internal/newtelemetry/message.go +++ b/internal/newtelemetry/internal/transport/message.go @@ -1,14 +1,14 @@ // Unless explicitly stated otherwise all files in this repository are licensed // under the Apache License Version 2.0. // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2022 Datadog, Inc. +// Copyright 2024 Datadog, Inc. -package newtelemetry +package transport import ( - "bytes" - "fmt" "net/http" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) // Request captures all necessary information for a telemetry event submission @@ -19,19 +19,6 @@ type Request struct { URL string } -// Body is the common high-level structure encapsulating a telemetry request body -type Body struct { - APIVersion string `json:"api_version"` - RequestType RequestType `json:"request_type"` - TracerTime int64 `json:"tracer_time"` - RuntimeID string `json:"runtime_id"` - SeqID int64 `json:"seq_id"` - Debug bool `json:"debug"` - Payload interface{} `json:"payload"` - Application Application `json:"application"` - Host Host `json:"host"` -} - // RequestType determines how the Payload of a request should be handled type RequestType string @@ -63,19 +50,6 @@ const ( RequestTypeAppIntegrationsChange RequestType = "app-integrations-change" ) -// Namespace describes an APM product to distinguish telemetry coming from -// different products used by the same application -type Namespace string - -const ( - NamespaceGeneral Namespace = "general" - NamespaceTracers Namespace = "tracers" - NamespaceProfilers Namespace = "profilers" - NamespaceAppSec Namespace = "appsec" - NamespaceIAST Namespace = "iast" - NamespaceTelemetry Namespace = "telemetry" -) - // MetricType is the type of metric being sent, either count, gauge, rate or distribution type MetricType string @@ -139,73 +113,15 @@ type ConfigurationChange struct { RemoteConfig *RemoteConfig `json:"remote_config,omitempty"` } -type Origin int - -const ( - OriginDefault Origin = iota - OriginCode - OriginDDConfig - OriginEnvVar - OriginRemoteConfig -) - -func (o Origin) String() string { - switch o { - case OriginDefault: - return "default" - case OriginCode: - return "code" - case OriginDDConfig: - return "dd_config" - case OriginEnvVar: - return "env_var" - case OriginRemoteConfig: - return "remote_config" - default: - return fmt.Sprintf("unknown origin %d", o) - } -} - -func (o Origin) MarshalJSON() ([]byte, error) { - var b bytes.Buffer - b.WriteString(`"`) - b.WriteString(o.String()) - b.WriteString(`"`) - return b.Bytes(), nil -} - // Configuration is a library-specific configuration value // that should be initialized through StringConfig, IntConfig, FloatConfig, or BoolConfig type Configuration struct { Name string `json:"name"` Value interface{} `json:"value"` // origin is the source of the config. It is one of {default, env_var, code, dd_config, remote_config}. - Origin Origin `json:"origin"` - Error Error `json:"error"` - IsOverriden bool `json:"is_overridden"` -} - -// TODO: be able to pass in origin, error, isOverriden info to config -// constructors - -// StringConfig returns a Configuration struct with a string value -func StringConfig(key string, val string) Configuration { - return Configuration{Name: key, Value: val} -} - -// IntConfig returns a Configuration struct with a int value -func IntConfig(key string, val int) Configuration { - return Configuration{Name: key, Value: val} -} - -// FloatConfig returns a Configuration struct with a float value -func FloatConfig(key string, val float64) Configuration { - return Configuration{Name: key, Value: val} -} - -// BoolConfig returns a Configuration struct with a bool value -func BoolConfig(key string, val bool) Configuration { - return Configuration{Name: key, Value: val} + Origin types.Origin `json:"origin"` + Error Error `json:"error"` + IsOverriden bool `json:"is_overridden"` } // ProductsPayload is the top-level key for the app-product-change payload. @@ -263,13 +179,13 @@ type AdditionalPayload struct { // Metrics corresponds to the "generate-metrics" request type type Metrics struct { - Namespace Namespace `json:"namespace"` - Series []Series `json:"series"` + Namespace types.Namespace `json:"namespace"` + Series []Series `json:"series"` } // DistributionMetrics corresponds to the "distributions" request type type DistributionMetrics struct { - Namespace Namespace `json:"namespace"` + Namespace types.Namespace `json:"namespace"` Series []DistributionSeries `json:"series"` } @@ -289,8 +205,8 @@ type Series struct { // NOTE: If this field isn't present in the request, the API assumes // the metric is common. So we can't "omitempty" even though the // field is technically optional. - Common bool `json:"common"` - Namespace Namespace `json:"namespace,omitempty"` + Common bool `json:"common"` + Namespace types.Namespace `json:"namespace,omitempty"` } // DistributionSeries is a sequence of observations for a distribution metric. diff --git a/internal/newtelemetry/payload.go b/internal/newtelemetry/payload.go deleted file mode 100644 index 888eb1fd3d..0000000000 --- a/internal/newtelemetry/payload.go +++ /dev/null @@ -1,10 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2024 Datadog, Inc. - -package newtelemetry - -type Payload interface { - RequestType() RequestType -} diff --git a/internal/newtelemetry/requesy_body_pool.go b/internal/newtelemetry/requesy_body_pool.go deleted file mode 100644 index 352b51ec72..0000000000 --- a/internal/newtelemetry/requesy_body_pool.go +++ /dev/null @@ -1,69 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2024 Datadog, Inc. - -package newtelemetry - -import ( - "runtime" - "sync" - "sync/atomic" - "time" - - "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" - "gopkg.in/DataDog/dd-trace-go.v1/internal/hostname" - "gopkg.in/DataDog/dd-trace-go.v1/internal/osinfo" - tracerversion "gopkg.in/DataDog/dd-trace-go.v1/internal/version" -) - -type RequestBodyPool struct { - pool sync.Pool - seqID atomic.Int64 -} - -func NewRequestPool(service, env, version string) *RequestBodyPool { - return &RequestBodyPool{ - pool: sync.Pool{ - New: func() any { - return &Body{ - APIVersion: "v2", - RuntimeID: globalconfig.RuntimeID(), - Application: Application{ - ServiceName: service, - Env: env, - ServiceVersion: version, - TracerVersion: tracerversion.Tag, - LanguageName: "go", - LanguageVersion: runtime.Version(), - }, - Host: Host{ - Hostname: hostname.Get(), - OS: osinfo.OSName(), - OSVersion: osinfo.OSVersion(), - Architecture: osinfo.Architecture(), - KernelName: osinfo.KernelName(), - KernelRelease: osinfo.KernelRelease(), - KernelVersion: osinfo.KernelVersion(), - }, - } - }, - }, - } -} - -// Get returns a new Body from the pool, ready to be turned into JSON and sent to the API. -func (p *RequestBodyPool) Get(payload Payload) *Body { - body := p.pool.Get().(*Body) - body.SeqID = p.seqID.Add(1) - body.TracerTime = time.Now().Unix() - body.Payload = payload - body.RequestType = payload.RequestType() - return body -} - -// Put returns a Body to the pool. -func (p *RequestBodyPool) Put(body *Body) { - body.Payload = nil - p.pool.Put(body) -} diff --git a/internal/newtelemetry/types/namespace.go b/internal/newtelemetry/types/namespace.go new file mode 100644 index 0000000000..c86453ecea --- /dev/null +++ b/internal/newtelemetry/types/namespace.go @@ -0,0 +1,19 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package types + +// Namespace describes an APM product to distinguish telemetry coming from +// different products used by the same application +type Namespace string + +const ( + NamespaceGeneral Namespace = "general" + NamespaceTracers Namespace = "tracers" + NamespaceProfilers Namespace = "profilers" + NamespaceAppSec Namespace = "appsec" + NamespaceIAST Namespace = "iast" + NamespaceTelemetry Namespace = "telemetry" +) diff --git a/internal/newtelemetry/types/origin.go b/internal/newtelemetry/types/origin.go new file mode 100644 index 0000000000..9d45a10ab9 --- /dev/null +++ b/internal/newtelemetry/types/origin.go @@ -0,0 +1,46 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package types + +import ( + "bytes" + "fmt" +) + +type Origin int + +const ( + OriginDefault Origin = iota + OriginCode + OriginDDConfig + OriginEnvVar + OriginRemoteConfig +) + +func (o Origin) String() string { + switch o { + case OriginDefault: + return "default" + case OriginCode: + return "code" + case OriginDDConfig: + return "dd_config" + case OriginEnvVar: + return "env_var" + case OriginRemoteConfig: + return "remote_config" + default: + return fmt.Sprintf("unknown origin %d", o) + } +} + +func (o Origin) MarshalJSON() ([]byte, error) { + var b bytes.Buffer + b.WriteString(`"`) + b.WriteString(o.String()) + b.WriteString(`"`) + return b.Bytes(), nil +} From 34d2cf8c702dc7ab188feeabe396312a21a4c390 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 13 Jan 2025 17:42:48 +0100 Subject: [PATCH 03/61] internal/newtelemetry: create all structs from schemas Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 3 +- internal/newtelemetry/hot_pointer.go | 47 ---- .../internal/transport/app_closing.go | 12 + .../transport/app_configuration_change.go | 14 ++ .../transport/app_dependencies_loaded.go | 21 ++ .../transport/app_extended_heartbeat.go | 16 ++ .../internal/transport/app_heartbeat.go | 16 ++ .../transport/app_integration_change.go | 24 ++ .../internal/transport/app_product_change.go | 26 ++ .../internal/transport/app_started.go | 37 +++ .../newtelemetry/internal/transport/body.go | 24 +- .../internal/transport/conf_key_value.go | 21 ++ .../internal/transport/distributions.go | 40 ++++ .../newtelemetry/internal/transport/error.go | 12 + .../internal/transport/generate-metrics.go | 55 +++++ .../newtelemetry/internal/transport/logs.go | 31 +++ .../internal/transport/message.go | 225 ------------------ .../internal/transport/message_batch.go | 19 ++ .../internal/transport/payload.go | 13 + .../internal/transport/requesttype.go | 55 +++++ 20 files changed, 437 insertions(+), 274 deletions(-) delete mode 100644 internal/newtelemetry/hot_pointer.go create mode 100644 internal/newtelemetry/internal/transport/app_closing.go create mode 100644 internal/newtelemetry/internal/transport/app_configuration_change.go create mode 100644 internal/newtelemetry/internal/transport/app_dependencies_loaded.go create mode 100644 internal/newtelemetry/internal/transport/app_extended_heartbeat.go create mode 100644 internal/newtelemetry/internal/transport/app_heartbeat.go create mode 100644 internal/newtelemetry/internal/transport/app_integration_change.go create mode 100644 internal/newtelemetry/internal/transport/app_product_change.go create mode 100644 internal/newtelemetry/internal/transport/app_started.go create mode 100644 internal/newtelemetry/internal/transport/conf_key_value.go create mode 100644 internal/newtelemetry/internal/transport/distributions.go create mode 100644 internal/newtelemetry/internal/transport/error.go create mode 100644 internal/newtelemetry/internal/transport/generate-metrics.go create mode 100644 internal/newtelemetry/internal/transport/logs.go delete mode 100644 internal/newtelemetry/internal/transport/message.go create mode 100644 internal/newtelemetry/internal/transport/message_batch.go create mode 100644 internal/newtelemetry/internal/transport/payload.go create mode 100644 internal/newtelemetry/internal/transport/requesttype.go diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 7beb7e6f8e..d4dd507871 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -14,7 +14,8 @@ func NewClient(service, env, version string, config ClientConfig) (Client, error return nil, nil } -type client struct{} +type client struct { +} func (c client) MarkIntegrationAsLoaded(integration Integration) { //TODO implement me diff --git a/internal/newtelemetry/hot_pointer.go b/internal/newtelemetry/hot_pointer.go deleted file mode 100644 index f2d52be5aa..0000000000 --- a/internal/newtelemetry/hot_pointer.go +++ /dev/null @@ -1,47 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2024 Datadog, Inc. - -package newtelemetry - -import ( - "sync" -) - -// hotPointer is a type that allows for atomic swapping of a value while keeping the other on standby to prevent allocations. -type hotPointer[T any] struct { - // value is the current value that can be extracted. - value *T - // standby is the value that will be swapped in. - standby *T - // writeMu is used to lock the value. - writeMu sync.Mutex - // swapMu is used to lock the swap. - swapMu sync.Mutex -} - -// Lock take the lock and return the current value -func (hp *hotPointer[T]) Lock() *T { - hp.writeMu.Lock() - return hp.value -} - -// Unlock unlocks the lock -func (hp *hotPointer[T]) Unlock() { - hp.writeMu.Unlock() -} - -// StandbyValue returns the standby value WITHOUT locking. Which means it cannot be used concurrently. -func (hp *hotPointer[T]) StandbyValue() *T { - return hp.standby -} - -// Swap swaps the current value with the standby value and return the standby value using the lock. -// the value returned is NOT protected by the lock. -func (hp *hotPointer[T]) Swap() *T { - hp.Lock() - defer hp.Unlock() - hp.value, hp.standby = hp.standby, hp.value - return hp.standby -} diff --git a/internal/newtelemetry/internal/transport/app_closing.go b/internal/newtelemetry/internal/transport/app_closing.go new file mode 100644 index 0000000000..11516d34e5 --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_closing.go @@ -0,0 +1,12 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type AppClosing struct{} + +func (AppClosing) RequestType() RequestType { + return RequestTypeAppClosing +} diff --git a/internal/newtelemetry/internal/transport/app_configuration_change.go b/internal/newtelemetry/internal/transport/app_configuration_change.go new file mode 100644 index 0000000000..d6ef75de7a --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_configuration_change.go @@ -0,0 +1,14 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type AppClientConfigurationChange struct { + Configuration []ConfKeyValue `json:"configuration"` +} + +func (AppClientConfigurationChange) RequestType() RequestType { + return RequestTypeAppClientConfigurationChange +} diff --git a/internal/newtelemetry/internal/transport/app_dependencies_loaded.go b/internal/newtelemetry/internal/transport/app_dependencies_loaded.go new file mode 100644 index 0000000000..425b01bb9a --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_dependencies_loaded.go @@ -0,0 +1,21 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type AppDependenciesLoaded struct { + Dependencies []Dependency `json:"dependencies"` +} + +func (AppDependenciesLoaded) RequestType() RequestType { + return RequestTypeAppDependenciesLoaded +} + +// Dependency is a Go module on which the application depends. This information +// can be accesed at run-time through the runtime/debug.ReadBuildInfo API. +type Dependency struct { + Name string `json:"name"` + Version string `json:"version,omitempty"` +} diff --git a/internal/newtelemetry/internal/transport/app_extended_heartbeat.go b/internal/newtelemetry/internal/transport/app_extended_heartbeat.go new file mode 100644 index 0000000000..62f0e5fed1 --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_extended_heartbeat.go @@ -0,0 +1,16 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type AppExtendedHeartbeat struct { + Configuration []ConfKeyValue `json:"configuration,omitempty"` + Dependencies []Dependency `json:"dependencies,omitempty"` + Integrations []Integration `json:"integrations,omitempty"` +} + +func (AppExtendedHeartbeat) RequestType() RequestType { + return RequestTypeAppExtendedHeartBeat +} diff --git a/internal/newtelemetry/internal/transport/app_heartbeat.go b/internal/newtelemetry/internal/transport/app_heartbeat.go new file mode 100644 index 0000000000..f2d4bab392 --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_heartbeat.go @@ -0,0 +1,16 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// All objects in this file are used to define the payload of the requests sent +// to the telemetry API. +// https://github.com/DataDog/instrumentation-telemetry-api-docs/tree/dad49961203d74ec8236b68ce4b54bbb7ed8716f/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Schemas + +type AppHeartbeat struct{} + +func (AppHeartbeat) RequestType() RequestType { + return RequestTypeAppHeartbeat +} diff --git a/internal/newtelemetry/internal/transport/app_integration_change.go b/internal/newtelemetry/internal/transport/app_integration_change.go new file mode 100644 index 0000000000..20244f033c --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_integration_change.go @@ -0,0 +1,24 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type AppIntegrationChange struct { + Integrations []Integration `json:"integrations"` +} + +func (AppIntegrationChange) RequestType() RequestType { + return RequestTypeAppIntegrationsChange +} + +// Integration is an integration that is configured to be traced automatically. +type Integration struct { + Name string `json:"name"` + Enabled bool `json:"enabled"` + Version string `json:"version,omitempty"` + AutoEnabled bool `json:"auto_enabled,omitempty"` + Compatible bool `json:"compatible,omitempty"` + Error string `json:"error,omitempty"` +} diff --git a/internal/newtelemetry/internal/transport/app_product_change.go b/internal/newtelemetry/internal/transport/app_product_change.go new file mode 100644 index 0000000000..f52893a96a --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_product_change.go @@ -0,0 +1,26 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" +) + +type AppProductChange struct { + Products Products `json:"products"` +} + +type Products map[types.Namespace]Product + +func (AppProductChange) RequestType() RequestType { + return RequestTypeAppProductChange +} + +type Product struct { + Version string `json:"version,omitempty"` + Enabled bool `json:"enabled"` + Error Error `json:"error,omitempty"` +} diff --git a/internal/newtelemetry/internal/transport/app_started.go b/internal/newtelemetry/internal/transport/app_started.go new file mode 100644 index 0000000000..3753d78926 --- /dev/null +++ b/internal/newtelemetry/internal/transport/app_started.go @@ -0,0 +1,37 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// All objects in this file are used to define the payload of the requests sent +// to the telemetry API. +// https://github.com/DataDog/instrumentation-telemetry-api-docs/tree/dad49961203d74ec8236b68ce4b54bbb7ed8716f/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Schemas + +type AppStarted struct { + Products Products `json:"products,omitempty"` + Configuration []ConfKeyValue `json:"configuration,omitempty"` + Error Error `json:"error,omitempty"` + InstallSignature InstallSignature `json:"install_signature,omitempty"` + AdditionalPayload []AdditionalPayload `json:"additional_payload,omitempty"` +} + +func (AppStarted) RequestType() RequestType { + return RequestTypeAppStarted +} + +// InstallSignature is a structure to send the install signature with the +// AppStarted payload. +type InstallSignature struct { + InstallID string `json:"install_id,omitempty"` + InstallType string `json:"install_type,omitempty"` + InstallTime string `json:"install_time,omitempty"` +} + +// AdditionalPayload is a generic structure to send additional data with the +// AppStarted payload. +type AdditionalPayload struct { + Name RequestType `json:"name"` + Value Payload `json:"value"` +} diff --git a/internal/newtelemetry/internal/transport/body.go b/internal/newtelemetry/internal/transport/body.go index 761db0555d..b420b1a3bd 100644 --- a/internal/newtelemetry/internal/transport/body.go +++ b/internal/newtelemetry/internal/transport/body.go @@ -14,6 +14,28 @@ import ( tracerversion "gopkg.in/DataDog/dd-trace-go.v1/internal/version" ) +// Application is identifying information about the app itself +type Application struct { + ServiceName string `json:"service_name"` + Env string `json:"env"` + ServiceVersion string `json:"service_version"` + TracerVersion string `json:"tracer_version"` + LanguageName string `json:"language_name"` + LanguageVersion string `json:"language_version"` +} + +// Host is identifying information about the host on which the app +// is running +type Host struct { + Hostname string `json:"hostname"` + OS string `json:"os"` + OSVersion string `json:"os_version,omitempty"` + Architecture string `json:"architecture"` + KernelName string `json:"kernel_name"` + KernelRelease string `json:"kernel_release"` + KernelVersion string `json:"kernel_version"` +} + // Body is the common high-level structure encapsulating a telemetry request body type Body struct { APIVersion string `json:"api_version"` @@ -22,7 +44,7 @@ type Body struct { RuntimeID string `json:"runtime_id"` SeqID int64 `json:"seq_id"` Debug bool `json:"debug"` - Payload interface{} `json:"payload"` + Payload Payload `json:"payload"` Application Application `json:"application"` Host Host `json:"host"` } diff --git a/internal/newtelemetry/internal/transport/conf_key_value.go b/internal/newtelemetry/internal/transport/conf_key_value.go new file mode 100644 index 0000000000..deab25d2f3 --- /dev/null +++ b/internal/newtelemetry/internal/transport/conf_key_value.go @@ -0,0 +1,21 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" +) + +// ConfKeyValue is a library-specific configuration value +type ConfKeyValue struct { + Name string `json:"name"` + Value any `json:"value"` // can be any type of integer, float, string, or boolean + Origin types.Origin `json:"origin"` + Error Error `json:"error,omitempty"` + + // SeqID is used to track the total number of configuration key value pairs applied across the tracer + SeqID uint64 `json:"seq_id,omitempty"` +} diff --git a/internal/newtelemetry/internal/transport/distributions.go b/internal/newtelemetry/internal/transport/distributions.go new file mode 100644 index 0000000000..2cac29b4d6 --- /dev/null +++ b/internal/newtelemetry/internal/transport/distributions.go @@ -0,0 +1,40 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" +) + +// All objects in this file are used to define the payload of the requests sent +// to the telemetry API. +// https://github.com/DataDog/instrumentation-telemetry-api-docs/tree/dad49961203d74ec8236b68ce4b54bbb7ed8716f/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Schemas + +type Distributions struct { + Namespace types.Namespace `json:"namespace"` + Series []DistributionSeries `json:"series"` + SkipAllowlist bool `json:"skip_allowlist,omitempty"` +} + +func (Distributions) RequestType() RequestType { + return RequestTypeDistributions +} + +// DistributionSeries is a sequence of observations for a distribution metric. +// Unlike `MetricData`, DistributionSeries does not store timestamps in `Points` +type DistributionSeries struct { + Metric string `json:"metric"` + Points []float64 `json:"points"` + Tags []string `json:"tags,omitempty"` + // Common distinguishes metrics which are cross-language vs. + // language-specific. + // + // NOTE: If this field isn't present in the request, the API assumes + // the metric is common. So we can't "omitempty" even though the + // field is technically optional. + Common bool `json:"common,omitempty"` + Namespace types.Namespace `json:"namespace,omitempty"` +} diff --git a/internal/newtelemetry/internal/transport/error.go b/internal/newtelemetry/internal/transport/error.go new file mode 100644 index 0000000000..b0de67be42 --- /dev/null +++ b/internal/newtelemetry/internal/transport/error.go @@ -0,0 +1,12 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// Error stores error information about various tracer events +type Error struct { + Code int `json:"code"` + Message string `json:"message"` +} diff --git a/internal/newtelemetry/internal/transport/generate-metrics.go b/internal/newtelemetry/internal/transport/generate-metrics.go new file mode 100644 index 0000000000..344f483ff5 --- /dev/null +++ b/internal/newtelemetry/internal/transport/generate-metrics.go @@ -0,0 +1,55 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" +) + +// All objects in this file are used to define the payload of the requests sent +// to the telemetry API. +// https://github.com/DataDog/instrumentation-telemetry-api-docs/tree/dad49961203d74ec8236b68ce4b54bbb7ed8716f/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Schemas + +type GenerateMetrics struct { + Namespace types.Namespace `json:"namespace,omitempty"` + Series []MetricData `json:"series"` + SkipAllowlist bool `json:"skip_allowlist,omitempty"` +} + +func (GenerateMetrics) RequestType() RequestType { + return RequestTypeGenerateMetrics +} + +// MetricType is the type of metric being sent, either count, gauge, or rate +// distribution is another payload altogether +type MetricType string + +const ( + RateMetric MetricType = "rate" + CountMetric MetricType = "count" + GaugeMetric MetricType = "gauge" +) + +// MetricData is a sequence of observations for a single named metric. +type MetricData struct { + Metric string `json:"metric"` + // Points stores pairs of timestamps and values + Points [][2]float64 `json:"points"` + // Interval is required only for gauge and rate metrics + Interval int `json:"interval,omitempty"` + // Type cannot be of type distribution because there is a different payload for it + Type MetricType `json:"type,omitempty"` + Tags []string `json:"tags,omitempty"` + + // Common distinguishes metrics which are cross-language vs. + // language-specific. + // + // NOTE: If this field isn't present in the request, the API assumes + // the metric is common. So we can't "omitempty" even though the + // field is technically optional. + Common bool `json:"common"` + Namespace types.Namespace `json:"namespace,omitempty"` +} diff --git a/internal/newtelemetry/internal/transport/logs.go b/internal/newtelemetry/internal/transport/logs.go new file mode 100644 index 0000000000..e945b9f155 --- /dev/null +++ b/internal/newtelemetry/internal/transport/logs.go @@ -0,0 +1,31 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type Logs struct { + Logs []LogMessage `json:"logs,omitempty"` +} + +func (Logs) RequestType() RequestType { + return RequestTypeLogs +} + +type LogLevel string + +const ( + LogLevelDebug LogLevel = "DEBUG" + LogLevelWarn LogLevel = "WARN" + LogLevelError LogLevel = "ERROR" +) + +type LogMessage struct { + Message string `json:"message"` + Level LogLevel `json:"level"` + Count int `json:"count,omitempty"` + Tags string `json:"tags,omitempty"` // comma separated list of tags, e.g. "tag1:1,tag2:toto" + StackTrace string `json:"stack_trace,omitempty"` + TracerTime int `json:"tracer_time,omitempty"` // Unix timestamp in seconds +} diff --git a/internal/newtelemetry/internal/transport/message.go b/internal/newtelemetry/internal/transport/message.go deleted file mode 100644 index d7ec1ce833..0000000000 --- a/internal/newtelemetry/internal/transport/message.go +++ /dev/null @@ -1,225 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2024 Datadog, Inc. - -package transport - -import ( - "net/http" - - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" -) - -// Request captures all necessary information for a telemetry event submission -type Request struct { - Body *Body - Header *http.Header - HTTPClient *http.Client - URL string -} - -// RequestType determines how the Payload of a request should be handled -type RequestType string - -const ( - // RequestTypeAppStarted is the first message sent by the telemetry - // client, containing the configuration loaded at startup - RequestTypeAppStarted RequestType = "app-started" - // RequestTypeAppHeartbeat is sent periodically by the client to indicate - // that the app is still running - RequestTypeAppHeartbeat RequestType = "app-heartbeat" - // RequestTypeGenerateMetrics contains count, gauge, or rate metrics accumulated by the - // client, and is sent periodically along with the heartbeat - RequestTypeGenerateMetrics RequestType = "generate-metrics" - // RequestTypeDistributions is to send distribution type metrics accumulated by the - // client, and is sent periodically along with the heartbeat - RequestTypeDistributions RequestType = "distributions" - // RequestTypeAppClosing is sent when the telemetry client is stopped - RequestTypeAppClosing RequestType = "app-closing" - // RequestTypeDependenciesLoaded is sent if DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED - // is enabled. Sent when Start is called for the telemetry client. - RequestTypeDependenciesLoaded RequestType = "app-dependencies-loaded" - // RequestTypeAppClientConfigurationChange is sent if there are changes - // to the client library configuration - RequestTypeAppClientConfigurationChange RequestType = "app-client-configuration-change" - // RequestTypeAppProductChange is sent when products are enabled/disabled - RequestTypeAppProductChange RequestType = "app-product-change" - // RequestTypeAppIntegrationsChange is sent when the telemetry client starts - // with info on which integrations are used. - RequestTypeAppIntegrationsChange RequestType = "app-integrations-change" -) - -// MetricType is the type of metric being sent, either count, gauge, rate or distribution -type MetricType string - -const ( - RateMetric MetricType = "rate" - CountMetric MetricType = "count" - GaugeMetric MetricType = "gauge" - DistributionMetric MetricType = "distribution" -) - -// Application is identifying information about the app itself -type Application struct { - ServiceName string `json:"service_name"` - Env string `json:"env"` - ServiceVersion string `json:"service_version"` - TracerVersion string `json:"tracer_version"` - LanguageName string `json:"language_name"` - LanguageVersion string `json:"language_version"` -} - -// Host is identifying information about the host on which the app -// is running -type Host struct { - Hostname string `json:"hostname"` - OS string `json:"os"` - OSVersion string `json:"os_version,omitempty"` - Architecture string `json:"architecture"` - KernelName string `json:"kernel_name"` - KernelRelease string `json:"kernel_release"` - KernelVersion string `json:"kernel_version"` -} - -// AppStarted corresponds to the "app-started" request type -type AppStarted struct { - Configuration []Configuration `json:"configuration,omitempty"` - Products Products `json:"products,omitempty"` - AdditionalPayload []AdditionalPayload `json:"additional_payload,omitempty"` - Error Error `json:"error,omitempty"` - RemoteConfig *RemoteConfig `json:"remote_config,omitempty"` -} - -// IntegrationsChange corresponds to the app-integrations-change requesty type -type IntegrationsChange struct { - Integrations []Integration `json:"integrations"` -} - -// Integration is an integration that is configured to be traced automatically. -type Integration struct { - Name string `json:"name"` - Enabled bool `json:"enabled"` - Version string `json:"version,omitempty"` - AutoEnabled bool `json:"auto_enabled,omitempty"` - Compatible bool `json:"compatible,omitempty"` - Error string `json:"error,omitempty"` -} - -// ConfigurationChange corresponds to the `AppClientConfigurationChange` event -// that contains information about configuration changes since the app-started event -type ConfigurationChange struct { - Configuration []Configuration `json:"configuration"` - RemoteConfig *RemoteConfig `json:"remote_config,omitempty"` -} - -// Configuration is a library-specific configuration value -// that should be initialized through StringConfig, IntConfig, FloatConfig, or BoolConfig -type Configuration struct { - Name string `json:"name"` - Value interface{} `json:"value"` - // origin is the source of the config. It is one of {default, env_var, code, dd_config, remote_config}. - Origin types.Origin `json:"origin"` - Error Error `json:"error"` - IsOverriden bool `json:"is_overridden"` -} - -// ProductsPayload is the top-level key for the app-product-change payload. -type ProductsPayload struct { - Products Products `json:"products"` -} - -// Products specifies information about available products. -type Products struct { - AppSec ProductDetails `json:"appsec,omitempty"` - Profiler ProductDetails `json:"profiler,omitempty"` -} - -// ProductDetails specifies details about a product. -type ProductDetails struct { - Enabled bool `json:"enabled"` - Version string `json:"version,omitempty"` - Error Error `json:"error,omitempty"` -} - -// Dependencies stores a list of dependencies -type Dependencies struct { - Dependencies []Dependency `json:"dependencies"` -} - -// Dependency is a Go module on which the application depends. This information -// can be accesed at run-time through the runtime/debug.ReadBuildInfo API. -type Dependency struct { - Name string `json:"name"` - Version string `json:"version"` -} - -// RemoteConfig contains information about remote-config -type RemoteConfig struct { - UserEnabled string `json:"user_enabled"` // whether the library has made a request to fetch remote-config - ConfigsRecieved bool `json:"configs_received"` // whether the library receives a valid config response - RcID string `json:"rc_id,omitempty"` - RcRevision string `json:"rc_revision,omitempty"` - RcVersion string `json:"rc_version,omitempty"` - Error Error `json:"error,omitempty"` -} - -// Error stores error information about various tracer events -type Error struct { - Code int `json:"code"` - Message string `json:"message"` -} - -// AdditionalPayload can be used to add extra information to the app-started -// event -type AdditionalPayload struct { - Name string `json:"name"` - Value interface{} `json:"value"` -} - -// Metrics corresponds to the "generate-metrics" request type -type Metrics struct { - Namespace types.Namespace `json:"namespace"` - Series []Series `json:"series"` -} - -// DistributionMetrics corresponds to the "distributions" request type -type DistributionMetrics struct { - Namespace types.Namespace `json:"namespace"` - Series []DistributionSeries `json:"series"` -} - -// Series is a sequence of observations for a single named metric. -// The `Points` field will store a timestamp and value. -type Series struct { - Metric string `json:"metric"` - Points [][2]float64 `json:"points"` - // Interval is required only for gauge and rate metrics - Interval int `json:"interval,omitempty"` - // Type cannot be of type distribution because there is a different payload for it - Type MetricType `json:"type,omitempty"` - Tags []string `json:"tags,omitempty"` - // Common distinguishes metrics which are cross-language vs. - // language-specific. - // - // NOTE: If this field isn't present in the request, the API assumes - // the metric is common. So we can't "omitempty" even though the - // field is technically optional. - Common bool `json:"common"` - Namespace types.Namespace `json:"namespace,omitempty"` -} - -// DistributionSeries is a sequence of observations for a distribution metric. -// Unlike `Series`, DistributionSeries does not store timestamps in `Points` -type DistributionSeries struct { - Metric string `json:"metric"` - Points []float64 `json:"points"` - Tags []string `json:"tags,omitempty"` - // Common distinguishes metrics which are cross-language vs. - // language-specific. - // - // NOTE: If this field isn't present in the request, the API assumes - // the metric is common. So we can't "omitempty" even though the - // field is technically optional. - Common bool `json:"common,omitempty"` -} diff --git a/internal/newtelemetry/internal/transport/message_batch.go b/internal/newtelemetry/internal/transport/message_batch.go new file mode 100644 index 0000000000..00369c628b --- /dev/null +++ b/internal/newtelemetry/internal/transport/message_batch.go @@ -0,0 +1,19 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +type MessageBatch struct { + Payload []Message `json:"payload"` +} + +func (MessageBatch) RequestType() RequestType { + return RequestTypeMessageBatch +} + +type Message struct { + RequestType RequestType `json:"request_type"` + Payload Payload `json:"payload"` +} diff --git a/internal/newtelemetry/internal/transport/payload.go b/internal/newtelemetry/internal/transport/payload.go new file mode 100644 index 0000000000..1a4c0a1dad --- /dev/null +++ b/internal/newtelemetry/internal/transport/payload.go @@ -0,0 +1,13 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// Payload is the interface implemented by all telemetry top-level structures, AKA payloads. +// All structs are a strict representation of what is described in Instrumentation Telemetry v2 documentation schemas: +// https://github.com/DataDog/instrumentation-telemetry-api-docs/tree/dad49961203d74ec8236b68ce4b54bbb7ed8716f/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Schemas +type Payload interface { + RequestType() RequestType +} diff --git a/internal/newtelemetry/internal/transport/requesttype.go b/internal/newtelemetry/internal/transport/requesttype.go new file mode 100644 index 0000000000..c1f1bd4058 --- /dev/null +++ b/internal/newtelemetry/internal/transport/requesttype.go @@ -0,0 +1,55 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// RequestType determines how the Payload of a request should be handled +type RequestType string + +const ( + // RequestTypeAppStarted is the first message sent by the telemetry + // client, containing the configuration loaded at startup + RequestTypeAppStarted RequestType = "app-started" + + // RequestTypeAppHeartbeat is sent periodically by the client to indicate + // that the app is still running + RequestTypeAppHeartbeat RequestType = "app-heartbeat" + + // RequestTypeGenerateMetrics contains count, gauge, or rate metrics accumulated by the + // client, and is sent periodically along with the heartbeat + RequestTypeGenerateMetrics RequestType = "generate-metrics" + + // RequestTypeDistributions is to send distribution type metrics accumulated by the + // client, and is sent periodically along with the heartbeat + RequestTypeDistributions RequestType = "distributions" + + // RequestTypeAppClosing is sent when the telemetry client is stopped + RequestTypeAppClosing RequestType = "app-closing" + + // RequestTypeAppDependenciesLoaded is sent if DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED + // is enabled. Sent when Start is called for the telemetry client. + RequestTypeAppDependenciesLoaded RequestType = "app-dependencies-loaded" + + // RequestTypeAppClientConfigurationChange is sent if there are changes + // to the client library configuration + RequestTypeAppClientConfigurationChange RequestType = "app-client-configuration-change" + + // RequestTypeAppProductChange is sent when products are enabled/disabled + RequestTypeAppProductChange RequestType = "app-product-change" + + // RequestTypeAppIntegrationsChange is sent when the telemetry client starts + // with info on which integrations are used. + RequestTypeAppIntegrationsChange RequestType = "app-integrations-change" + + // RequestTypeMessageBatch is a wrapper over a list of payloads + RequestTypeMessageBatch RequestType = "message-batch" + + // RequestTypeAppExtendedHeartBeat This event will be used as a failsafe if there are any catastrophic data failure. + // The data will be used to reconstruct application records in our db. + RequestTypeAppExtendedHeartBeat RequestType = "app-extended-heartbeat" + + // RequestTypeLogs is used to send logs to the backend + RequestTypeLogs RequestType = "logs" +) From f921f60b8544096d9fa7992338590679d7c0b6ba Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Wed, 15 Jan 2025 15:14:19 +0100 Subject: [PATCH 04/61] internal/newtelemetry/internal: created and implemented the Writer centralize all http concerns in the writer struct such as: * Reusing the same top-level structures when possible * encoding properly in json the payload * separating the low level http writing from some higher level things we will concerns ourselves about in the telemetry client itself * Centralizing a lot of the static data the telemtry client is supposed to send on every request Signed-off-by: Eliott Bouhana --- internal/globalconfig/globalconfig.go | 16 ++ .../newtelemetry/internal/tracerconfig.go | 16 ++ .../newtelemetry/internal/transport/body.go | 36 +-- internal/newtelemetry/internal/writer.go | 232 ++++++++++++++++++ 4 files changed, 266 insertions(+), 34 deletions(-) create mode 100644 internal/newtelemetry/internal/tracerconfig.go create mode 100644 internal/newtelemetry/internal/writer.go diff --git a/internal/globalconfig/globalconfig.go b/internal/globalconfig/globalconfig.go index a36f50035f..2d6d21e35e 100644 --- a/internal/globalconfig/globalconfig.go +++ b/internal/globalconfig/globalconfig.go @@ -9,6 +9,7 @@ package globalconfig import ( "math" + "os" "sync" "gopkg.in/DataDog/dd-trace-go.v1/internal" @@ -130,3 +131,18 @@ func HeaderTagsLen() int { func ClearHeaderTags() { cfg.headersAsTags.Clear() } + +// InstrumentationInstallID returns the install ID as described in DD_INSTRUMENTATION_INSTALL_ID +func InstrumentationInstallID() string { + return os.Getenv("DD_INSTRUMENTATION_INSTALL_ID") +} + +// InstrumentationInstallType returns the install type as described in DD_INSTRUMENTATION_INSTALL_TYPE +func InstrumentationInstallType() string { + return os.Getenv("DD_INSTRUMENTATION_INSTALL_TYPE") +} + +// InstrumentationInstallTime returns the install time as described in DD_INSTRUMENTATION_INSTALL_TIME +func InstrumentationInstallTime() string { + return os.Getenv("DD_INSTRUMENTATION_INSTALL_TIME") +} diff --git a/internal/newtelemetry/internal/tracerconfig.go b/internal/newtelemetry/internal/tracerconfig.go new file mode 100644 index 0000000000..439ff430f3 --- /dev/null +++ b/internal/newtelemetry/internal/tracerconfig.go @@ -0,0 +1,16 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +// TracerConfig is the configuration for the tracer for the telemetry client. +type TracerConfig struct { + // Service is the name of the service being traced. + Service string + // Env is the environment the service is running in. + Env string + // Version is the version of the service. + Version string +} diff --git a/internal/newtelemetry/internal/transport/body.go b/internal/newtelemetry/internal/transport/body.go index b420b1a3bd..1015df61e5 100644 --- a/internal/newtelemetry/internal/transport/body.go +++ b/internal/newtelemetry/internal/transport/body.go @@ -5,15 +5,6 @@ package transport -import ( - "runtime" - - "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" - "gopkg.in/DataDog/dd-trace-go.v1/internal/hostname" - "gopkg.in/DataDog/dd-trace-go.v1/internal/osinfo" - tracerversion "gopkg.in/DataDog/dd-trace-go.v1/internal/version" -) - // Application is identifying information about the app itself type Application struct { ServiceName string `json:"service_name"` @@ -37,38 +28,15 @@ type Host struct { } // Body is the common high-level structure encapsulating a telemetry request body +// Described here: https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/main/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Request%20Bodies/telemetry_body.md type Body struct { APIVersion string `json:"api_version"` RequestType RequestType `json:"request_type"` TracerTime int64 `json:"tracer_time"` RuntimeID string `json:"runtime_id"` SeqID int64 `json:"seq_id"` - Debug bool `json:"debug"` + Debug bool `json:"debug,omitempty"` Payload Payload `json:"payload"` Application Application `json:"application"` Host Host `json:"host"` } - -func NewBody(service, env, version string) *Body { - return &Body{ - APIVersion: "v2", - RuntimeID: globalconfig.RuntimeID(), - Application: Application{ - ServiceName: service, - Env: env, - ServiceVersion: version, - TracerVersion: tracerversion.Tag, - LanguageName: "go", - LanguageVersion: runtime.Version(), - }, - Host: Host{ - Hostname: hostname.Get(), - OS: osinfo.OSName(), - OSVersion: osinfo.OSVersion(), - Architecture: osinfo.Architecture(), - KernelName: osinfo.KernelName(), - KernelRelease: osinfo.KernelRelease(), - KernelVersion: osinfo.KernelVersion(), - }, - } -} diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go new file mode 100644 index 0000000000..0d9cfe9ff6 --- /dev/null +++ b/internal/newtelemetry/internal/writer.go @@ -0,0 +1,232 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net" + "net/http" + "runtime" + "strconv" + "sync" + "time" + + "gopkg.in/DataDog/dd-trace-go.v1/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" + "gopkg.in/DataDog/dd-trace-go.v1/internal/hostname" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" + "gopkg.in/DataDog/dd-trace-go.v1/internal/osinfo" + "gopkg.in/DataDog/dd-trace-go.v1/internal/version" +) + +// We copy the transport to avoid using the default one, as it might be +// augmented with tracing and we don't want these calls to be recorded. +// See https://golang.org/pkg/net/http/#DefaultTransport . +// +//orchestrion:ignore +var defaultHTTPClient = &http.Client{ + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + ForceAttemptHTTP2: true, + MaxIdleConns: 100, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + }, + Timeout: 5 * time.Second, +} + +func newBody(config TracerConfig, debugMode bool) *transport.Body { + return &transport.Body{ + APIVersion: "v2", + RuntimeID: globalconfig.RuntimeID(), + Debug: debugMode, + Application: transport.Application{ + ServiceName: config.Service, + Env: config.Env, + ServiceVersion: config.Version, + TracerVersion: version.Tag, + LanguageName: "go", + LanguageVersion: runtime.Version(), + }, + Host: transport.Host{ + Hostname: hostname.Get(), + OS: osinfo.OSName(), + OSVersion: osinfo.OSVersion(), + Architecture: osinfo.Architecture(), + KernelName: osinfo.KernelName(), + KernelRelease: osinfo.KernelRelease(), + KernelVersion: osinfo.KernelVersion(), + }, + } +} + +// Writer is an interface that allows to send telemetry data to any endpoint that implements the instrumentation telemetry v2 API. +// The telemetry data is sent as a JSON payload as described in the API documentation. +type Writer interface { + // Flush does a synchronous call to the telemetry endpoint with the given payload. Thread-safe. + // It returns the number of bytes sent and an error if any. + Flush(transport.Payload) (int, error) +} + +type writer struct { + mu sync.Mutex + body *transport.Body + httpClient *http.Client + endpoints []*http.Request +} + +type WriterConfig struct { + // TracerConfig is the configuration the tracer sent when the telemetry client was created (required) + TracerConfig + // Endpoints is a list of requests that will be used alongside the body of the telemetry data to create the requests to the telemetry endpoint (required to not be empty) + Endpoints []*http.Request + // HTTPClient is the http client that will be used to send the telemetry data (defaults to the default http client) + HTTPClient *http.Client + // Debug is a flag that indicates whether the telemetry client is in debug mode (defaults to false) + Debug bool +} + +func NewWriter(config WriterConfig) (Writer, error) { + if config.HTTPClient == nil { + config.HTTPClient = defaultHTTPClient + } + + if len(config.Endpoints) == 0 { + return nil, fmt.Errorf("telemetry/writer: no endpoints provided") + } + + body := newBody(config.TracerConfig, config.Debug) + endpoints := make([]*http.Request, len(config.Endpoints)) + for i, endpoint := range config.Endpoints { + endpoints[i] = preBakeRequest(body, endpoint) + } + + return &writer{ + body: body, + httpClient: config.HTTPClient, + endpoints: endpoints, + }, nil +} + +// preBakeRequest adds all the *static* headers that we already know at the time of the creation of the writer. +// This is useful to avoid querying too many things at the time of the request. +// Headers necessary are described here: +// https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/cf17b41a30fbf31d54e2cfbfc983875d58b02fe1/GeneratedDocumentation/ApiDocs/v2/overview.md#required-http-headers +func preBakeRequest(body *transport.Body, endpoint *http.Request) *http.Request { + clonedEndpoint := endpoint.Clone(context.Background()) + for key, val := range map[string]string{ + "Content-Type": "application/json", + "DD-Telemetry-API-Version": body.APIVersion, + "DD-Telemetry-Debug-Enabled": strconv.FormatBool(body.Debug), + "DD-Client-Library-Language": body.Application.LanguageName, + "DD-Client-Library-Version": body.Application.TracerVersion, + "DD-Agent-Env": body.Application.Env, + "DD-Agent-Hostname": body.Host.Hostname, + "DD-Agent-Install-Id": globalconfig.InstrumentationInstallID(), + "DD-Agent-Install-Type": globalconfig.InstrumentationInstallType(), + "DD-Agent-Install-Time": globalconfig.InstrumentationInstallTime(), + "Datadog-Container-ID": internal.ContainerID(), + "Datadog-Entity-ID": internal.EntityID(), + // TODO: Add support for Cloud provider/resource-type/resource-id headers in another PR and package + // Described here: https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/cf17b41a30fbf31d54e2cfbfc983875d58b02fe1/GeneratedDocumentation/ApiDocs/v2/overview.md#setting-the-serverless-telemetry-headers + } { + if val == "" { + continue + } + clonedEndpoint.Header.Add(key, val) + } + return clonedEndpoint +} + +// newRequest creates a new http.Request with the given payload and the necessary headers. +func (w *writer) newRequest(endpoint *http.Request, payload transport.Payload) *http.Request { + pipeReader, pipeWriter := io.Pipe() + request := endpoint.Clone(context.Background()) + + w.body.SeqID++ + w.body.TracerTime = time.Now().Unix() + w.body.RequestType = payload.RequestType() + w.body.Payload = payload + + request.Body = pipeReader + request.Header.Set("DD-Telemetry-Request-Type", string(payload.RequestType())) + + go func() { + // No need to wait on this because the http client will close the pipeReader which will close the pipeWriter and finish the goroutine + pipeWriter.CloseWithError(json.NewEncoder(pipeWriter).Encode(w.body)) + }() + + return request +} + +// SumReaderCloser is a ReadCloser that wraps another ReadCloser and counts the number of bytes read. +type SumReaderCloser struct { + io.ReadCloser + n *int +} + +func (s *SumReaderCloser) Read(p []byte) (n int, err error) { + n, err = s.ReadCloser.Read(p) + *s.n += n + return +} + +func (w *writer) Flush(payload transport.Payload) (int, error) { + w.mu.Lock() + defer w.mu.Unlock() + + var ( + errs []error + sumRead int + ) + for _, endpoint := range w.endpoints { + sumRead = 0 + request := w.newRequest(endpoint, payload) + request.Body = &SumReaderCloser{ReadCloser: request.Body, n: &sumRead} + response, err := w.httpClient.Do(request) + if err != nil { + errs = append(errs, fmt.Errorf("telemetry/writer: while trying endpoint %q: %w", request.URL.String(), err)) + continue + } + + // Currently we have a maximum of 3 endpoints so we can afford to close bodies at the end of the function + //goland:noinspection GoDeferInLoop + defer response.Body.Close() + + if response.StatusCode >= 300 || response.StatusCode < 200 { + respBodyBytes, _ := io.ReadAll(response.Body) // maybe we can find an error reason in the response body + errs = append(errs, fmt.Errorf("telemetry/writer: while trying endpoint %q: unexpected status code: %q (received body: %q)", request.URL.String(), response.Status, string(respBodyBytes))) + continue + } + + // We succeeded, no need to try the other endpoints + break + } + + return sumRead, errors.Join(errs...) +} + +// RecordWriter is a Writer that stores the payloads in memory. Used for testing purposes +type RecordWriter struct { + mu sync.Mutex + payloads []transport.Payload +} + +func (w *RecordWriter) Flush(payload transport.Payload) error { + w.mu.Lock() + defer w.mu.Unlock() + w.payloads = append(w.payloads, payload) + return nil +} From 160e1fbac52e1eb698ca591e6ae790fc874f5968 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Wed, 15 Jan 2025 17:16:35 +0100 Subject: [PATCH 05/61] end to end support for integration, product, client configuration change payloads Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 35 ++++++++++----- internal/newtelemetry/client.go | 58 +++++++++++++----------- internal/newtelemetry/configuration.go | 55 +++++++++++++++++++++++ internal/newtelemetry/defaults.go | 62 ++++++++++++++++---------- internal/newtelemetry/integration.go | 44 ++++++++++++++++++ internal/newtelemetry/product.go | 52 +++++++++++++++++++++ 6 files changed, 246 insertions(+), 60 deletions(-) create mode 100644 internal/newtelemetry/configuration.go create mode 100644 internal/newtelemetry/integration.go create mode 100644 internal/newtelemetry/product.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index ba221232df..34d3aa1a28 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -7,30 +7,42 @@ package newtelemetry import ( "net/http" + "time" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) type ClientConfig struct { - // AgentlessURL is the full URL to the agentless telemetry endpoint. + // AgentlessURL is the full URL to the agentless telemetry endpoint. (Either AgentlessURL or AgentURL must be set or both) // Defaults to https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry AgentlessURL string - // AgentURL is the url to the agent without the path, + // AgentURL is the url of the agent to send telemetry to. (Either AgentlessURL or AgentURL must be set or both) AgentURL string - // APIKey is the API key to use for sending telemetry, defaults to the env var DD_API_KEY. - APIKey string - // HTTPClient is the http client to use for sending telemetry, defaults to a http.DefaultClient copy. HTTPClient http.RoundTripper + + // HeartbeatInterval is the interval at which to send a heartbeat payload, defaults to 60s. + // The maximum value is 60s. + HeartbeatInterval time.Duration + + // FlushIntervalRange is the interval at which the client flushes the data. + // By default, the client will start to flush at 60s intervals and will reduce the interval based on the load till it hit 15s + // Both values cannot be higher than 60s because the heartbeat need to be sent at least every 60s. + FlushIntervalRange struct { + Min time.Duration + Max time.Duration + } } // MetricHandle can be used to submit different values for the same metric. // MetricHandle is used to reduce lock contention when submitting metrics. // This can also be used ephemerally to submit a single metric value like this: // -// telemetry.Metric(telemetry.Appsec, "my-count", map[string]string{"tag1": "true", "tag2": "1.0"}).Submit(1.0) +// ```go +// telemetry.Metric(telemetry.Appsec, "my-count", map[string]string{"tag1": "true", "tag2": "1.0"}).Submit(1.0) +// ``` type MetricHandle interface { Submit(value float64) @@ -59,11 +71,12 @@ type TelemetryLogger interface { // Integration is an integration that is configured to be traced. type Integration struct { - Name string - Version string - AutoEnabled bool - Compatible bool - Error string + // Name is an arbitrary string that must stay constant for the integration. + Name string + // Version is the version of the integration/dependency that is being loaded. + Version string + // Error is the error that occurred while loading the integration. + Error string } // Client constitutes all the functions available concurrently for the telemetry users. All methods are thread-safe diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index d4dd507871..9f2a82de48 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -6,6 +6,8 @@ package newtelemetry import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) @@ -15,74 +17,78 @@ func NewClient(service, env, version string, config ClientConfig) (Client, error } type client struct { + tracerConfig internal.TracerConfig + writer internal.Writer + payloadQueue internal.RingQueue[transport.Payload] + + // Data sources + integrations integrations + products products + configuration configuration } -func (c client) MarkIntegrationAsLoaded(integration Integration) { - //TODO implement me - panic("implement me") +func (c *client) MarkIntegrationAsLoaded(integration Integration) { + c.integrations.Add(integration) } -func (c client) Count(namespace types.Namespace, name string, tags map[string]string) MetricHandle { +func (c *client) Count(_ types.Namespace, _ string, _ map[string]string) MetricHandle { //TODO implement me panic("implement me") } -func (c client) Rate(namespace types.Namespace, name string, tags map[string]string) MetricHandle { +func (c *client) Rate(_ types.Namespace, _ string, _ map[string]string) MetricHandle { //TODO implement me panic("implement me") } -func (c client) Gauge(namespace types.Namespace, name string, tags map[string]string) MetricHandle { +func (c *client) Gauge(_ types.Namespace, _ string, _ map[string]string) MetricHandle { //TODO implement me panic("implement me") } -func (c client) Distribution(namespace types.Namespace, name string, tags map[string]string) MetricHandle { +func (c *client) Distribution(_ types.Namespace, _ string, _ map[string]string) MetricHandle { //TODO implement me panic("implement me") } -func (c client) Logger() TelemetryLogger { +func (c *client) Logger() TelemetryLogger { //TODO implement me panic("implement me") } -func (c client) ProductStarted(product types.Namespace) { - //TODO implement me - panic("implement me") +func (c *client) ProductStarted(product types.Namespace) { + c.products.Add(product, true, nil) } -func (c client) ProductStopped(product types.Namespace) { - //TODO implement me - panic("implement me") +func (c *client) ProductStopped(product types.Namespace) { + c.products.Add(product, false, nil) } -func (c client) ProductStartError(product types.Namespace, err error) { - //TODO implement me - panic("implement me") +func (c *client) ProductStartError(product types.Namespace, err error) { + c.products.Add(product, false, err) } -func (c client) AddAppConfig(key string, value any, origin types.Origin) { - //TODO implement me - panic("implement me") +func (c *client) AddAppConfig(key string, value any, origin types.Origin) { + c.configuration.Add(key, value, origin) } -func (c client) AddBulkAppConfig(kvs map[string]any, origin types.Origin) { - //TODO implement me - panic("implement me") +func (c *client) AddBulkAppConfig(kvs map[string]any, origin types.Origin) { + for key, value := range kvs { + c.configuration.Add(key, value, origin) + } } -func (c client) flush() { +func (c *client) flush() { //TODO implement me panic("implement me") } -func (c client) appStart() error { +func (c *client) appStart() error { //TODO implement me panic("implement me") } -func (c client) appStop() { +func (c *client) appStop() { //TODO implement me panic("implement me") } diff --git a/internal/newtelemetry/configuration.go b/internal/newtelemetry/configuration.go new file mode 100644 index 0000000000..348eed8143 --- /dev/null +++ b/internal/newtelemetry/configuration.go @@ -0,0 +1,55 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "sync" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" +) + +type configuration struct { + mu sync.Mutex + config map[string]transport.ConfKeyValue + seqID uint64 +} + +func (c *configuration) Add(key string, value any, origin types.Origin) { + c.mu.Lock() + defer c.mu.Unlock() + + if c.config == nil { + c.config = make(map[string]transport.ConfKeyValue) + } + + c.config[key] = transport.ConfKeyValue{ + Name: key, + Value: value, + Origin: origin, + } +} + +func (c *configuration) Payload() transport.Payload { + c.mu.Lock() + defer c.mu.Unlock() + if len(c.config) == 0 { + return nil + } + + configs := make([]transport.ConfKeyValue, len(c.config)) + idx := 0 + for _, conf := range c.config { + conf.SeqID = c.seqID + configs[idx] = conf + idx++ + c.seqID++ + } + c.config = nil + return transport.AppClientConfigurationChange{ + Configuration: configs, + } +} diff --git a/internal/newtelemetry/defaults.go b/internal/newtelemetry/defaults.go index 58fa3ac11f..e8ede44fc3 100644 --- a/internal/newtelemetry/defaults.go +++ b/internal/newtelemetry/defaults.go @@ -6,36 +6,52 @@ package newtelemetry import ( - "net" - "net/http" "time" ) var ( - // We copy the transport to avoid using the default one, as it might be - // augmented with tracing and we don't want these calls to be recorded. - // See https://golang.org/pkg/net/http/#DefaultTransport . - //orchestrion:ignore - defaultHTTPClient = &http.Client{ - Transport: &http.Transport{ - Proxy: http.ProxyFromEnvironment, - DialContext: (&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext, - ForceAttemptHTTP2: true, - MaxIdleConns: 100, - IdleConnTimeout: 90 * time.Second, - TLSHandshakeTimeout: 10 * time.Second, - ExpectContinueTimeout: 1 * time.Second, - }, - Timeout: 5 * time.Second, - } - // agentlessURL is the endpoint used to send telemetry in an agentless environment. It is // also the default URL in case connecting to the agent URL fails. agentlessURL = "https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry" // defaultHeartbeatInterval is the default interval at which the agent sends a heartbeat. - defaultHeartbeatInterval = 60.0 + defaultHeartbeatInterval = 60.0 * time.Second + + // defaultMinFlushInterval is the default interval at which the client flushes the data. + defaultMinFlushInterval = 15.0 * time.Second + + // defaultMaxFlushInterval is the default interval at which the client flushes the data. + defaultMaxFlushInterval = 60.0 * time.Second ) + +// clamp squeezes a value between a minimum and maximum value. +func clamp[T ~int64](value, minVal, maxVal T) T { + return max(min(maxVal, value), minVal) +} + +// defaultConfig returns a ClientConfig with default values set. +func defaultConfig(config ClientConfig) ClientConfig { + if config.AgentlessURL == "" { + config.AgentlessURL = agentlessURL + } + + if config.HeartbeatInterval == 0 { + config.HeartbeatInterval = defaultHeartbeatInterval + } else { + config.HeartbeatInterval = clamp(config.HeartbeatInterval, time.Microsecond, 60*time.Second) + } + + if config.FlushIntervalRange.Min == 0 { + config.FlushIntervalRange.Min = defaultMinFlushInterval + } else { + config.FlushIntervalRange.Min = clamp(config.FlushIntervalRange.Min, time.Microsecond, 60*time.Second) + } + + if config.FlushIntervalRange.Max == 0 { + config.FlushIntervalRange.Max = defaultMaxFlushInterval + } else { + config.FlushIntervalRange.Max = clamp(config.FlushIntervalRange.Max, time.Microsecond, 60*time.Second) + } + + return config +} diff --git a/internal/newtelemetry/integration.go b/internal/newtelemetry/integration.go new file mode 100644 index 0000000000..a0ba344abd --- /dev/null +++ b/internal/newtelemetry/integration.go @@ -0,0 +1,44 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "sync" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +type integrations struct { + mu sync.Mutex + integrations []Integration +} + +func (i *integrations) Add(integration Integration) { + i.mu.Lock() + defer i.mu.Unlock() + i.integrations = append(i.integrations, integration) +} + +func (i *integrations) Payload() transport.Payload { + i.mu.Lock() + defer i.mu.Unlock() + if len(i.integrations) == 0 { + return nil + } + + integrations := make([]transport.Integration, len(i.integrations)) + for idx, integration := range i.integrations { + integrations[idx] = transport.Integration{ + Name: integration.Name, + Version: integration.Version, + Enabled: true, + Error: integration.Error, + } + } + return transport.AppIntegrationChange{ + Integrations: integrations, + } +} diff --git a/internal/newtelemetry/product.go b/internal/newtelemetry/product.go new file mode 100644 index 0000000000..9aa215f4d5 --- /dev/null +++ b/internal/newtelemetry/product.go @@ -0,0 +1,52 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "sync" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" +) + +type products struct { + mu sync.Mutex + products map[types.Namespace]transport.Product +} + +func (p *products) Add(namespace types.Namespace, enabled bool, err error) { + p.mu.Lock() + defer p.mu.Unlock() + if p.products == nil { + p.products = make(map[types.Namespace]transport.Product) + } + + product := transport.Product{ + Enabled: enabled, + } + + if err != nil { + product.Error = transport.Error{ + Message: err.Error(), + } + } + + p.products[namespace] = product +} + +func (p *products) Payload() transport.Payload { + p.mu.Lock() + defer p.mu.Unlock() + if len(p.products) == 0 { + return nil + } + + res := transport.AppProductChange{ + Products: p.products, + } + p.products = nil + return res +} From f43bbdb31376f3d98d4fde38df972a396060b364 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Wed, 15 Jan 2025 17:58:57 +0100 Subject: [PATCH 06/61] refactor config and implement NewClient() Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 29 +---- internal/newtelemetry/client.go | 68 ++++++++++- internal/newtelemetry/config.go | 156 ++++++++++++++++++++++++++ internal/newtelemetry/defaults.go | 57 ---------- internal/newtelemetry/globalclient.go | 1 - 5 files changed, 221 insertions(+), 90 deletions(-) create mode 100644 internal/newtelemetry/config.go delete mode 100644 internal/newtelemetry/defaults.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index 34d3aa1a28..ad9026e951 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -6,36 +6,9 @@ package newtelemetry import ( - "net/http" - "time" - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) -type ClientConfig struct { - // AgentlessURL is the full URL to the agentless telemetry endpoint. (Either AgentlessURL or AgentURL must be set or both) - // Defaults to https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry - AgentlessURL string - - // AgentURL is the url of the agent to send telemetry to. (Either AgentlessURL or AgentURL must be set or both) - AgentURL string - - // HTTPClient is the http client to use for sending telemetry, defaults to a http.DefaultClient copy. - HTTPClient http.RoundTripper - - // HeartbeatInterval is the interval at which to send a heartbeat payload, defaults to 60s. - // The maximum value is 60s. - HeartbeatInterval time.Duration - - // FlushIntervalRange is the interval at which the client flushes the data. - // By default, the client will start to flush at 60s intervals and will reduce the interval based on the load till it hit 15s - // Both values cannot be higher than 60s because the heartbeat need to be sent at least every 60s. - FlushIntervalRange struct { - Min time.Duration - Max time.Duration - } -} - // MetricHandle can be used to submit different values for the same metric. // MetricHandle is used to reduce lock contention when submitting metrics. // This can also be used ephemerally to submit a single metric value like this: @@ -125,6 +98,6 @@ type Client interface { // appStart sends the telemetry necessary to signal that the app is starting. appStart() error - // appStop sends the telemetry necessary to signal that the app is stopping and calls Close() + // appStop sends the telemetry necessary to signal that the app is stopping and calls flush() appStop() } diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 9f2a82de48..a482d8555f 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -6,6 +6,8 @@ package newtelemetry import ( + "errors" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" @@ -13,18 +15,67 @@ import ( // NewClient creates a new telemetry client with the given service, environment, and version and config. func NewClient(service, env, version string, config ClientConfig) (Client, error) { - return nil, nil + if service == "" { + return nil, errors.New("service name must not be empty") + } + + if env == "" { + return nil, errors.New("environment name must not be empty") + } + + if version == "" { + return nil, errors.New("version must not be empty") + } + + config = defaultConfig(config) + if err := config.validateConfig(); err != nil { + return nil, err + } + + tracerConfig := internal.TracerConfig{ + Service: service, + Env: env, + Version: version, + } + + writerConfig, err := config.ToWriterConfig(tracerConfig) + if err != nil { + return nil, err + } + + writer, err := internal.NewWriter(writerConfig) + if err != nil { + return nil, err + } + + client := &client{ + tracerConfig: tracerConfig, + writer: writer, + payloadQueue: internal.NewRingQueue[transport.Payload](), + } + + client.dataSources = append( + client.dataSources, + &client.integrations, + &client.products, + &client.configuration, + ) + return client, nil } type client struct { tracerConfig internal.TracerConfig writer internal.Writer - payloadQueue internal.RingQueue[transport.Payload] + payloadQueue *internal.RingQueue[transport.Payload] // Data sources integrations integrations products products configuration configuration + + dataSources []interface { + Payload() transport.Payload + } } func (c *client) MarkIntegrationAsLoaded(integration Integration) { @@ -78,14 +129,23 @@ func (c *client) AddBulkAppConfig(kvs map[string]any, origin types.Origin) { } } +func (c *client) gatherPayloads() []transport.Payload { + var res []transport.Payload + for _, ds := range c.dataSources { + if payload := ds.Payload(); payload != nil { + res = append(res, payload) + } + } + return res +} + func (c *client) flush() { //TODO implement me panic("implement me") } func (c *client) appStart() error { - //TODO implement me - panic("implement me") + return nil } func (c *client) appStop() { diff --git a/internal/newtelemetry/config.go b/internal/newtelemetry/config.go new file mode 100644 index 0000000000..67310d06d0 --- /dev/null +++ b/internal/newtelemetry/config.go @@ -0,0 +1,156 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024 Datadog, Inc. + +package newtelemetry + +import ( + "errors" + "fmt" + "net/http" + "net/url" + "os" + "time" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" +) + +type ClientConfig struct { + // AgentlessURL is the full URL to the agentless telemetry endpoint. (optional) + // Defaults to https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry + AgentlessURL string + + // AgentURL is the url of the agent to send telemetry to. (optional) + // If the AgentURL is not set, the telemetry client will not attempt to connect to the agent before sending to the agentless endpoint. + AgentURL string + + // HTTPClient is the http client to use for sending telemetry, defaults to a http.DefaultClient copy. + HTTPClient *http.Client + + // HeartbeatInterval is the interval at which to send a heartbeat payload, defaults to 60s. + // The maximum value is 60s. + HeartbeatInterval time.Duration + + // FlushIntervalRange is the interval at which the client flushes the data. + // By default, the client will start to flush at 60s intervals and will reduce the interval based on the load till it hit 15s + // Both values cannot be higher than 60s because the heartbeat need to be sent at least every 60s. + FlushIntervalRange struct { + Min time.Duration + Max time.Duration + } + + // Debug enables debug mode for the telemetry clientt and sent it to the backend so it logs the request + Debug bool + + // APIKey is the API key to use for sending telemetry to the agentless endpoint. (using DD_API_KEY env var by default) + APIKey string +} + +const ( + // agentlessURL is the endpoint used to send telemetry in an agentless environment. It is + // also the default URL in case connecting to the agent URL fails. + agentlessURL = "https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry" + + // defaultHeartbeatInterval is the default interval at which the agent sends a heartbeat. + defaultHeartbeatInterval = 60.0 * time.Second + + // defaultMinFlushInterval is the default interval at which the client flushes the data. + defaultMinFlushInterval = 15.0 * time.Second + + // defaultMaxFlushInterval is the default interval at which the client flushes the data. + defaultMaxFlushInterval = 60.0 * time.Second + + agentProxyAPIPath = "/telemetry/proxy/api/v2/apmtelemetry" +) + +// clamp squeezes a value between a minimum and maximum value. +func clamp[T ~int64](value, minVal, maxVal T) T { + return max(min(maxVal, value), minVal) +} + +func (config ClientConfig) validateConfig() error { + if config.AgentlessURL == "" && config.AgentURL == "" { + return errors.New("either AgentlessURL or AgentURL must be set") + } + + if config.HeartbeatInterval > 60*time.Second { + return fmt.Errorf("HeartbeatInterval cannot be higher than 60s, got %v", config.HeartbeatInterval) + } + + if config.FlushIntervalRange.Min > 60*time.Second || config.FlushIntervalRange.Max > 60*time.Second { + return fmt.Errorf("FlushIntervalRange cannot be higher than 60s, got Min: %v, Max: %v", config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) + } + + if config.FlushIntervalRange.Min > config.FlushIntervalRange.Max { + return fmt.Errorf("FlushIntervalRange Min cannot be higher than Max, got Min: %v, Max: %v", config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) + } + + return nil +} + +// defaultConfig returns a ClientConfig with default values set. +func defaultConfig(config ClientConfig) ClientConfig { + if config.AgentlessURL == "" { + config.AgentlessURL = agentlessURL + } + + if config.APIKey == "" { + config.APIKey = os.Getenv("DD_API_KEY") + } + + if config.HeartbeatInterval == 0 { + config.HeartbeatInterval = defaultHeartbeatInterval + } else { + config.HeartbeatInterval = clamp(config.HeartbeatInterval, time.Microsecond, 60*time.Second) + } + + if config.FlushIntervalRange.Min == 0 { + config.FlushIntervalRange.Min = defaultMinFlushInterval + } else { + config.FlushIntervalRange.Min = clamp(config.FlushIntervalRange.Min, time.Microsecond, 60*time.Second) + } + + if config.FlushIntervalRange.Max == 0 { + config.FlushIntervalRange.Max = defaultMaxFlushInterval + } else { + config.FlushIntervalRange.Max = clamp(config.FlushIntervalRange.Max, time.Microsecond, 60*time.Second) + } + + return config +} + +func (config ClientConfig) ToWriterConfig(tracerConfig internal.TracerConfig) (internal.WriterConfig, error) { + endpoints := make([]*http.Request, 0, 2) + if config.AgentURL != "" { + baseURL, err := url.Parse(config.AgentURL) + if err != nil { + return internal.WriterConfig{}, fmt.Errorf("invalid agent URL: %v", err) + } + + baseURL.Path = agentProxyAPIPath + request, err := http.NewRequest(http.MethodPost, baseURL.String(), nil) + if err != nil { + return internal.WriterConfig{}, fmt.Errorf("failed to create request: %v", err) + } + + endpoints = append(endpoints, request) + } + + if config.AgentlessURL != "" && config.APIKey != "" { + request, err := http.NewRequest(http.MethodPost, config.AgentlessURL, nil) + if err != nil { + return internal.WriterConfig{}, fmt.Errorf("failed to create request: %v", err) + } + + request.Header.Set("DD-API-KEY", config.APIKey) + endpoints = append(endpoints, request) + } + + return internal.WriterConfig{ + TracerConfig: tracerConfig, + Endpoints: endpoints, + HTTPClient: config.HTTPClient, + Debug: config.Debug, + }, nil +} diff --git a/internal/newtelemetry/defaults.go b/internal/newtelemetry/defaults.go deleted file mode 100644 index e8ede44fc3..0000000000 --- a/internal/newtelemetry/defaults.go +++ /dev/null @@ -1,57 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2024 Datadog, Inc. - -package newtelemetry - -import ( - "time" -) - -var ( - // agentlessURL is the endpoint used to send telemetry in an agentless environment. It is - // also the default URL in case connecting to the agent URL fails. - agentlessURL = "https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry" - - // defaultHeartbeatInterval is the default interval at which the agent sends a heartbeat. - defaultHeartbeatInterval = 60.0 * time.Second - - // defaultMinFlushInterval is the default interval at which the client flushes the data. - defaultMinFlushInterval = 15.0 * time.Second - - // defaultMaxFlushInterval is the default interval at which the client flushes the data. - defaultMaxFlushInterval = 60.0 * time.Second -) - -// clamp squeezes a value between a minimum and maximum value. -func clamp[T ~int64](value, minVal, maxVal T) T { - return max(min(maxVal, value), minVal) -} - -// defaultConfig returns a ClientConfig with default values set. -func defaultConfig(config ClientConfig) ClientConfig { - if config.AgentlessURL == "" { - config.AgentlessURL = agentlessURL - } - - if config.HeartbeatInterval == 0 { - config.HeartbeatInterval = defaultHeartbeatInterval - } else { - config.HeartbeatInterval = clamp(config.HeartbeatInterval, time.Microsecond, 60*time.Second) - } - - if config.FlushIntervalRange.Min == 0 { - config.FlushIntervalRange.Min = defaultMinFlushInterval - } else { - config.FlushIntervalRange.Min = clamp(config.FlushIntervalRange.Min, time.Microsecond, 60*time.Second) - } - - if config.FlushIntervalRange.Max == 0 { - config.FlushIntervalRange.Max = defaultMaxFlushInterval - } else { - config.FlushIntervalRange.Max = clamp(config.FlushIntervalRange.Max, time.Microsecond, 60*time.Second) - } - - return config -} diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 1811458b5e..789990dbfc 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -33,7 +33,6 @@ func SwapClient(client Client) { func StopApp() { if client := globalClient.Swap(nil); client != nil && *client != nil { (*client).appStop() - (*client).flush() } } From a0f0d6b6b43a8bf414173ae28e48b90220e5f2f6 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 16 Jan 2025 17:28:04 +0100 Subject: [PATCH 07/61] wip Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 13 ++-- internal/newtelemetry/client.go | 69 ++++++++++++++++---- internal/newtelemetry/config.go | 2 +- internal/newtelemetry/configuration.go | 37 ++++++++++- internal/newtelemetry/globalclient.go | 62 +++++++++++++----- internal/newtelemetry/integration.go | 10 +++ internal/newtelemetry/internal/ringbuffer.go | 27 ++++---- internal/newtelemetry/internal/ticker.go | 60 +++++++++++++++++ internal/newtelemetry/internal/writer.go | 15 ++++- internal/newtelemetry/product.go | 13 ++++ 10 files changed, 258 insertions(+), 50 deletions(-) create mode 100644 internal/newtelemetry/internal/ticker.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index ad9026e951..a22bc6731a 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -6,6 +6,8 @@ package newtelemetry import ( + "io" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) @@ -56,6 +58,7 @@ type Integration struct { // This is an interface for easier testing but all functions will be mirrored at the package level to call // the global client. type Client interface { + io.Closer // Count creates a new metric handle for the given parameters that can be used to submit values. Count(namespace types.Namespace, name string, tags map[string]string) MetricHandle @@ -92,12 +95,14 @@ type Client interface { // MarkIntegrationAsLoaded marks an integration as loaded in the telemetry MarkIntegrationAsLoaded(integration Integration) - // flush closes the client and flushes any remaining data. - flush() + // Flush closes the client and flushes any remaining data. + // Flush returns the number of bytes sent and an error if any. + // Any error returned means that the data was not sent. + Flush() (int, error) - // appStart sends the telemetry necessary to signal that the app is starting. + // appStart sends the telemetry necessary to signal that the app is starting. and calls start() appStart() error - // appStop sends the telemetry necessary to signal that the app is stopping and calls flush() + // appStop sends the telemetry necessary to signal that the app is stopping and calls Close() appStop() } diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index a482d8555f..2189734fdb 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -8,6 +8,7 @@ package newtelemetry import ( "errors" + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" @@ -51,22 +52,23 @@ func NewClient(service, env, version string, config ClientConfig) (Client, error client := &client{ tracerConfig: tracerConfig, writer: writer, - payloadQueue: internal.NewRingQueue[transport.Payload](), + clientConfig: config, } - client.dataSources = append( - client.dataSources, + client.ticker = internal.NewTicker(client, config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) + client.dataSources = append(client.dataSources, &client.integrations, &client.products, &client.configuration, ) + return client, nil } type client struct { tracerConfig internal.TracerConfig writer internal.Writer - payloadQueue *internal.RingQueue[transport.Payload] + clientConfig ClientConfig // Data sources integrations integrations @@ -75,7 +77,10 @@ type client struct { dataSources []interface { Payload() transport.Payload + Size() int } + + ticker *internal.Ticker } func (c *client) MarkIntegrationAsLoaded(integration Integration) { @@ -129,23 +134,50 @@ func (c *client) AddBulkAppConfig(kvs map[string]any, origin types.Origin) { } } -func (c *client) gatherPayloads() []transport.Payload { - var res []transport.Payload +func (c *client) Config() ClientConfig { + return c.clientConfig +} + +func (c *client) Flush() (int, error) { + var payloads []transport.Payload for _, ds := range c.dataSources { if payload := ds.Payload(); payload != nil { - res = append(res, payload) + payloads = append(payloads, payload) } } - return res + + return c.flush(payloads) } -func (c *client) flush() { - //TODO implement me - panic("implement me") +// flush sends all the data sources to the writer by let them flow through the given wrapper function. +// The wrapper function is used to transform the payloads before sending them to the writer. +func (c *client) flush(payloads []transport.Payload) (int, error) { + if len(payloads) == 0 { + return 0, nil + } + + var ( + nbBytes int + err error + ) + + for _, payload := range payloads { + nbBytesOfPayload, payloadErr := c.writer.Flush(payload) + if nbBytes > 0 { + log.Debug("non-fatal error while flushing telemetry data: %v", err) + err = nil + } + + nbBytes += nbBytesOfPayload + err = errors.Join(err, payloadErr) + } + + return nbBytes, err } func (c *client) appStart() error { - return nil + //TODO implement me + panic("implement me") } func (c *client) appStop() { @@ -153,4 +185,17 @@ func (c *client) appStop() { panic("implement me") } +func (c *client) size() int { + size := 0 + for _, ds := range c.dataSources { + size += ds.Size() + } + return size +} + +func (c *client) Close() error { + c.ticker.Stop() + return nil +} + var _ Client = (*client)(nil) diff --git a/internal/newtelemetry/config.go b/internal/newtelemetry/config.go index 67310d06d0..c4501620ef 100644 --- a/internal/newtelemetry/config.go +++ b/internal/newtelemetry/config.go @@ -33,7 +33,7 @@ type ClientConfig struct { HeartbeatInterval time.Duration // FlushIntervalRange is the interval at which the client flushes the data. - // By default, the client will start to flush at 60s intervals and will reduce the interval based on the load till it hit 15s + // By default, the client will start to Flush at 60s intervals and will reduce the interval based on the load till it hit 15s // Both values cannot be higher than 60s because the heartbeat need to be sent at least every 60s. FlushIntervalRange struct { Min time.Duration diff --git a/internal/newtelemetry/configuration.go b/internal/newtelemetry/configuration.go index 348eed8143..722530c46e 100644 --- a/internal/newtelemetry/configuration.go +++ b/internal/newtelemetry/configuration.go @@ -6,6 +6,7 @@ package newtelemetry import ( + "reflect" "sync" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" @@ -15,6 +16,7 @@ import ( type configuration struct { mu sync.Mutex config map[string]transport.ConfKeyValue + size int seqID uint64 } @@ -26,11 +28,37 @@ func (c *configuration) Add(key string, value any, origin types.Origin) { c.config = make(map[string]transport.ConfKeyValue) } + if kv, ok := c.config[key]; ok { + c.size -= len(kv.Name) + guessSize(kv.Value) + } + c.config[key] = transport.ConfKeyValue{ Name: key, Value: value, Origin: origin, } + + c.size += len(key) + guessSize(value) +} + +// guessSize returns a simple guess of the value's size in bytes. +// 99% of config values are strings so a simple guess is enough. +// All non-primitive types will go through reflection at encoding time anyway +func guessSize(value any) int { + switch value.(type) { + case string: + return len(value.(string)) + case int64, uint64, int, uint, float64, uintptr: + return 8 + case int32, uint32, float32: + return 4 + case int16, uint16: + return 2 + case bool, int8, uint8: + return 1 + } + + return int(reflect.ValueOf(value).Type().Size()) } func (c *configuration) Payload() transport.Payload { @@ -47,9 +75,16 @@ func (c *configuration) Payload() transport.Payload { configs[idx] = conf idx++ c.seqID++ + delete(c.config, conf.Name) } - c.config = nil + c.size = 0 return transport.AppClientConfigurationChange{ Configuration: configs, } } + +func (c *configuration) Size() int { + c.mu.Lock() + defer c.mu.Unlock() + return c.size +} diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 789990dbfc..2b9f27e591 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -11,9 +11,11 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) -var globalClient atomic.Pointer[Client] +var ( + globalClient atomic.Pointer[Client] +) -// StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global client. +// StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global (*client). func StartApp(client Client) error { if err := client.appStart(); err != nil { return err @@ -22,14 +24,14 @@ func StartApp(client Client) error { return nil } -// SwapClient swaps the global client with the given client and flush the old client. +// SwapClient swaps the global client with the given client and Flush the old (*client). func SwapClient(client Client) { if oldClient := globalClient.Swap(&client); oldClient != nil && *oldClient != nil { - (*oldClient).flush() + (*oldClient).Close() } } -// StopApp creates the app-stopped telemetry, adding to the queue and flush all the queue before stopping the client. +// StopApp creates the app-stopped telemetry, adding to the queue and Flush all the queue before stopping the (*client). func StopApp() { if client := globalClient.Swap(nil); client != nil && *client != nil { (*client).appStop() @@ -37,52 +39,82 @@ func StopApp() { } func Count(namespace types.Namespace, name string, tags map[string]string) MetricHandle { - return (*globalClient.Load()).Count(namespace, name, tags) + if client := globalClient.Load(); client != nil && *client != nil { + return (*client).Count(namespace, name, tags) + } + + return nil } // Rate creates a new metric handle for the given parameters that can be used to submit values. func Rate(namespace types.Namespace, name string, tags map[string]string) MetricHandle { - return (*globalClient.Load()).Rate(namespace, name, tags) + if client := globalClient.Load(); client != nil && *client != nil { + return (*client).Rate(namespace, name, tags) + } + + return nil } // Gauge creates a new metric handle for the given parameters that can be used to submit values. func Gauge(namespace types.Namespace, name string, tags map[string]string) MetricHandle { - return (*globalClient.Load()).Gauge(namespace, name, tags) + if client := globalClient.Load(); client != nil && *client != nil { + return (*client).Gauge(namespace, name, tags) + } + + return nil } // Distribution creates a new metric handle for the given parameters that can be used to submit values. func Distribution(namespace types.Namespace, name string, tags map[string]string) MetricHandle { - return (*globalClient.Load()).Distribution(namespace, name, tags) + if client := globalClient.Load(); client != nil && *client != nil { + return (*client).Distribution(namespace, name, tags) + } + + return nil } // Logger returns an implementation of the TelemetryLogger interface which sends telemetry logs. func Logger() TelemetryLogger { - return (*globalClient.Load()).Logger() + if client := globalClient.Load(); client != nil && *client != nil { + return (*client).Logger() + } + + return nil } // ProductStarted declares a product to have started at the customer’s request func ProductStarted(product types.Namespace) { - (*globalClient.Load()).ProductStarted(product) + if client := globalClient.Load(); client != nil && *client != nil { + (*client).ProductStarted(product) + } } // ProductStopped declares a product to have being stopped by the customer func ProductStopped(product types.Namespace) { - (*globalClient.Load()).ProductStopped(product) + if client := globalClient.Load(); client != nil && *client != nil { + (*client).ProductStopped(product) + } } // ProductStartError declares that a product could not start because of the following error func ProductStartError(product types.Namespace, err error) { - (*globalClient.Load()).ProductStartError(product, err) + if client := globalClient.Load(); client != nil && *client != nil { + (*client).ProductStartError(product, err) + } } // AddAppConfig adds a key value pair to the app configuration and send the change to telemetry // value has to be json serializable and the origin is the source of the change. func AddAppConfig(key string, value any, origin types.Origin) { - (*globalClient.Load()).AddAppConfig(key, value, origin) + if client := globalClient.Load(); client != nil && *client != nil { + (*client).AddAppConfig(key, value, origin) + } } // AddBulkAppConfig adds a list of key value pairs to the app configuration and sends the change to telemetry. // Same as AddAppConfig but for multiple values. func AddBulkAppConfig(kvs map[string]any, origin types.Origin) { - (*globalClient.Load()).AddBulkAppConfig(kvs, origin) + if client := globalClient.Load(); client != nil && *client != nil { + (*client).AddBulkAppConfig(kvs, origin) + } } diff --git a/internal/newtelemetry/integration.go b/internal/newtelemetry/integration.go index a0ba344abd..7314403ea1 100644 --- a/internal/newtelemetry/integration.go +++ b/internal/newtelemetry/integration.go @@ -14,12 +14,14 @@ import ( type integrations struct { mu sync.Mutex integrations []Integration + size int } func (i *integrations) Add(integration Integration) { i.mu.Lock() defer i.mu.Unlock() i.integrations = append(i.integrations, integration) + i.size += len(integration.Name) + len(integration.Version) + len(integration.Error) } func (i *integrations) Payload() transport.Payload { @@ -38,7 +40,15 @@ func (i *integrations) Payload() transport.Payload { Error: integration.Error, } } + i.integrations = nil + i.size = 0 return transport.AppIntegrationChange{ Integrations: integrations, } } + +func (i *integrations) Size() int { + i.mu.Lock() + defer i.mu.Unlock() + return i.size +} diff --git a/internal/newtelemetry/internal/ringbuffer.go b/internal/newtelemetry/internal/ringbuffer.go index f8af3c48e3..851d0f0ebe 100644 --- a/internal/newtelemetry/internal/ringbuffer.go +++ b/internal/newtelemetry/internal/ringbuffer.go @@ -9,9 +9,6 @@ import ( "sync" ) -const maxRingBufferSize = 1 << 14 -const startingRingBufferSize = 1 << 8 - type RingQueue[T any] struct { // buffer is the slice that contains the data. buffer []T @@ -22,41 +19,43 @@ type RingQueue[T any] struct { // mu is the lock for the buffer, head and tail. mu sync.Mutex // pool is the pool of buffers. Normally there should only be one or 2 buffers in the pool. - pool sync.Pool + pool sync.Pool + maxBufferSize int } -func NewRingQueue[T any]() *RingQueue[T] { +func NewRingQueue[T any](minSize, maxSize int) *RingQueue[T] { return &RingQueue[T]{ - buffer: make([]T, startingRingBufferSize), + buffer: make([]T, minSize), + maxBufferSize: maxSize, pool: sync.Pool{ - New: func() any { return make([]T, startingRingBufferSize) }, + New: func() any { return make([]T, minSize) }, }, } } // Enqueue adds a value to the buffer. -func (rb *RingQueue[T]) Enqueue(val T) { +func (rb *RingQueue[T]) Enqueue(val T) bool { rb.mu.Lock() defer rb.mu.Unlock() rb.buffer[rb.tail] = val rb.tail = (rb.tail + 1) % len(rb.buffer) - if rb.tail == rb.head && len(rb.buffer) == maxRingBufferSize { // We loose one element + if rb.tail == rb.head && len(rb.buffer) == rb.maxBufferSize { // We loose one element rb.head = (rb.head + 1) % len(rb.buffer) - // TODO: maybe log that we lost an element - return + return false } - // We need to resize the buffer, we double the size, this should happen 10 times before we reach the max size + // We need to resize the buffer, we double the size and cap it to maxBufferSize if rb.tail == rb.head { - newBuffer := make([]T, cap(rb.buffer)*2) + newBuffer := make([]T, min(cap(rb.buffer)*2, rb.maxBufferSize)) copy(newBuffer, rb.buffer[rb.head:]) copy(newBuffer[len(rb.buffer)-rb.head:], rb.buffer[:rb.tail]) rb.head = 0 rb.tail = len(rb.buffer) - 1 rb.buffer = newBuffer } + return true } // Dequeue removes a value from the buffer. @@ -99,5 +98,5 @@ func (rb *RingQueue[T]) IsFull() bool { rb.mu.Lock() defer rb.mu.Unlock() - return (rb.tail+1)%len(rb.buffer) == rb.head && len(rb.buffer) == maxRingBufferSize + return (rb.tail+1)%len(rb.buffer) == rb.head && len(rb.buffer) == rb.maxBufferSize } diff --git a/internal/newtelemetry/internal/ticker.go b/internal/newtelemetry/internal/ticker.go new file mode 100644 index 0000000000..df3aaf4e49 --- /dev/null +++ b/internal/newtelemetry/internal/ticker.go @@ -0,0 +1,60 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "sync" + "time" +) + +type flusher interface { + Flush() (int, error) +} + +type Ticker struct { + *time.Ticker + + tickSpeedMu sync.Mutex + tickSpeed time.Duration + + maxInterval time.Duration + minInterval time.Duration + + flusher flusher +} + +func NewTicker(flusher flusher, minInterval, maxInterval time.Duration) *Ticker { + ticker := &Ticker{ + Ticker: time.NewTicker(maxInterval), + tickSpeed: maxInterval, + + maxInterval: maxInterval, + minInterval: minInterval, + + flusher: flusher, + } + + go func() { + for range ticker.C { + _, err := flusher.Flush() + if err != nil { + // Reset the interval to the maximum value + ticker.AdjustTickSpeed(maxInterval) + return + } + } + }() + + return ticker +} + +func (t *Ticker) AdjustTickSpeed(interval time.Duration) { + t.tickSpeedMu.Lock() + defer t.tickSpeedMu.Unlock() + + t.tickSpeed = interval + t.Reset(interval) +} diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go index 0d9cfe9ff6..8aca6256eb 100644 --- a/internal/newtelemetry/internal/writer.go +++ b/internal/newtelemetry/internal/writer.go @@ -14,7 +14,6 @@ import ( "net" "net/http" "runtime" - "strconv" "sync" "time" @@ -77,6 +76,8 @@ func newBody(config TracerConfig, debugMode bool) *transport.Body { type Writer interface { // Flush does a synchronous call to the telemetry endpoint with the given payload. Thread-safe. // It returns the number of bytes sent and an error if any. + // Keep in mind that errors can be returned even if the payload was sent successfully. + // Please check if the number of bytes sent is greater than 0 to know if the payload was sent. Flush(transport.Payload) (int, error) } @@ -91,8 +92,9 @@ type WriterConfig struct { // TracerConfig is the configuration the tracer sent when the telemetry client was created (required) TracerConfig // Endpoints is a list of requests that will be used alongside the body of the telemetry data to create the requests to the telemetry endpoint (required to not be empty) + // The writer will try each endpoint in order until it gets a 2XX HTTP response from the server Endpoints []*http.Request - // HTTPClient is the http client that will be used to send the telemetry data (defaults to the default http client) + // HTTPClient is the http client that will be used to send the telemetry data (defaults to a copy of [http.DefaultClient]) HTTPClient *http.Client // Debug is a flag that indicates whether the telemetry client is in debug mode (defaults to false) Debug bool @@ -103,6 +105,9 @@ func NewWriter(config WriterConfig) (Writer, error) { config.HTTPClient = defaultHTTPClient } + // Don't allow the client to have a timeout higher than 5 seconds + config.HTTPClient.Timeout = min(config.HTTPClient.Timeout, 5*time.Second) + if len(config.Endpoints) == 0 { return nil, fmt.Errorf("telemetry/writer: no endpoints provided") } @@ -129,7 +134,6 @@ func preBakeRequest(body *transport.Body, endpoint *http.Request) *http.Request for key, val := range map[string]string{ "Content-Type": "application/json", "DD-Telemetry-API-Version": body.APIVersion, - "DD-Telemetry-Debug-Enabled": strconv.FormatBool(body.Debug), "DD-Client-Library-Language": body.Application.LanguageName, "DD-Client-Library-Version": body.Application.TracerVersion, "DD-Agent-Env": body.Application.Env, @@ -147,6 +151,11 @@ func preBakeRequest(body *transport.Body, endpoint *http.Request) *http.Request } clonedEndpoint.Header.Add(key, val) } + + if body.Debug { + clonedEndpoint.Header.Add("DD-Telemetry-Debug-Enabled", "true") + } + return clonedEndpoint } diff --git a/internal/newtelemetry/product.go b/internal/newtelemetry/product.go index 9aa215f4d5..b0d7b7eeb5 100644 --- a/internal/newtelemetry/product.go +++ b/internal/newtelemetry/product.go @@ -15,6 +15,7 @@ import ( type products struct { mu sync.Mutex products map[types.Namespace]transport.Product + size int } func (p *products) Add(namespace types.Namespace, enabled bool, err error) { @@ -34,7 +35,12 @@ func (p *products) Add(namespace types.Namespace, enabled bool, err error) { } } + if product, ok := p.products[namespace]; ok { + p.size -= len(namespace) + len(product.Error.Message) + } + p.products[namespace] = product + p.size += len(namespace) + len(product.Error.Message) } func (p *products) Payload() transport.Payload { @@ -48,5 +54,12 @@ func (p *products) Payload() transport.Payload { Products: p.products, } p.products = nil + p.size = 0 return res } + +func (p *products) Size() int { + p.mu.Lock() + defer p.mu.Unlock() + return p.size +} From b8de6af0299d9780959821d6a1a8f7003a18e08d Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Fri, 17 Jan 2025 14:39:59 +0100 Subject: [PATCH 08/61] support for payload transformers Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 4 +- internal/newtelemetry/client.go | 34 +++++++-- internal/newtelemetry/globalclient.go | 4 +- internal/newtelemetry/internal/ticker.go | 1 - .../newtelemetry/internal/transformers.go | 74 +++++++++++++++++++ internal/newtelemetry/internal/writer.go | 7 +- 6 files changed, 109 insertions(+), 15 deletions(-) create mode 100644 internal/newtelemetry/internal/transformers.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index a22bc6731a..06afaf7f68 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -100,8 +100,8 @@ type Client interface { // Any error returned means that the data was not sent. Flush() (int, error) - // appStart sends the telemetry necessary to signal that the app is starting. and calls start() - appStart() error + // appStart sends the telemetry necessary to signal that the app is starting. + appStart() // appStop sends the telemetry necessary to signal that the app is stopping and calls Close() appStop() diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 2189734fdb..b67d5072c9 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -7,6 +7,7 @@ package newtelemetry import ( "errors" + "sync" "gopkg.in/DataDog/dd-trace-go.v1/internal/log" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" @@ -50,9 +51,10 @@ func NewClient(service, env, version string, config ClientConfig) (Client, error } client := &client{ - tracerConfig: tracerConfig, - writer: writer, - clientConfig: config, + tracerConfig: tracerConfig, + writer: writer, + clientConfig: config, + flushTransformer: internal.MessageBatchTransformer, } client.ticker = internal.NewTicker(client, config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) @@ -81,6 +83,10 @@ type client struct { } ticker *internal.Ticker + + // flushTransformer is the transformer to use for the next flush + flushTransformer internal.Transformer + flushTransformerMu sync.Mutex } func (c *client) MarkIntegrationAsLoaded(integration Integration) { @@ -156,6 +162,16 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { return 0, nil } + // Always add the Heartbeat to the payloads + payloads = append(payloads, transport.AppHeartbeat{}) + + // Transform the payloads + { + c.flushTransformerMu.Lock() + payloads, c.flushTransformer = c.flushTransformer.Transform(payloads) + c.flushTransformerMu.Unlock() + } + var ( nbBytes int err error @@ -175,14 +191,16 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { return nbBytes, err } -func (c *client) appStart() error { - //TODO implement me - panic("implement me") +func (c *client) appStart() { + c.flushTransformerMu.Lock() + defer c.flushTransformerMu.Unlock() + c.flushTransformer = internal.AppStartedTransformer } func (c *client) appStop() { - //TODO implement me - panic("implement me") + c.flushTransformerMu.Lock() + defer c.flushTransformerMu.Unlock() + c.flushTransformer = internal.AppClosingTransformer } func (c *client) size() int { diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 2b9f27e591..6bd1ee5037 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -17,9 +17,7 @@ var ( // StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global (*client). func StartApp(client Client) error { - if err := client.appStart(); err != nil { - return err - } + client.appStart() SwapClient(client) return nil } diff --git a/internal/newtelemetry/internal/ticker.go b/internal/newtelemetry/internal/ticker.go index df3aaf4e49..67fe9bd4da 100644 --- a/internal/newtelemetry/internal/ticker.go +++ b/internal/newtelemetry/internal/ticker.go @@ -43,7 +43,6 @@ func NewTicker(flusher flusher, minInterval, maxInterval time.Duration) *Ticker if err != nil { // Reset the interval to the maximum value ticker.AdjustTickSpeed(maxInterval) - return } } }() diff --git a/internal/newtelemetry/internal/transformers.go b/internal/newtelemetry/internal/transformers.go new file mode 100644 index 0000000000..84c91d0af0 --- /dev/null +++ b/internal/newtelemetry/internal/transformers.go @@ -0,0 +1,74 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +// Transformer is an interface for transforming payloads to comply with different types of lifecycle events in the application. +type Transformer interface { + // Transform transforms the given payloads and returns the transformed payloads and the Transformer to use for the next + // transformation. + Transform([]transport.Payload) ([]transport.Payload, Transformer) +} + +type appStartedTransformer struct{} + +var AppStartedTransformer Transformer = &appStartedTransformer{} + +func (t *appStartedTransformer) Transform(payloads []transport.Payload) ([]transport.Payload, Transformer) { + appStarted := transport.AppStarted{ + InstallSignature: transport.InstallSignature{ + InstallID: globalconfig.InstrumentationInstallID(), + InstallType: globalconfig.InstrumentationInstallType(), + InstallTime: globalconfig.InstrumentationInstallTime(), + }, + } + + payloadLefts := make([]transport.Message, 0, len(payloads)/2) + for _, payload := range payloads { + switch payload.(type) { + case transport.AppClientConfigurationChange: + appStarted.Configuration = payload.(transport.AppClientConfigurationChange).Configuration + case transport.AppProductChange: + appStarted.Products = payload.(transport.AppProductChange).Products + default: + payloadLefts = append(payloadLefts, transport.Message{Payload: payload, RequestType: payload.RequestType()}) + } + } + + // Following the documentation, an app-started payload cannot be put in a message-batch one. + return []transport.Payload{ + appStarted, + transport.MessageBatch{Payload: payloadLefts}, + }, MessageBatchTransformer +} + +type messageBatchTransformer struct{} + +var MessageBatchTransformer Transformer = &messageBatchTransformer{} + +func (t *messageBatchTransformer) Transform(payloads []transport.Payload) ([]transport.Payload, Transformer) { + messages := make([]transport.Message, len(payloads)) + for _, payload := range payloads { + messages = append(messages, transport.Message{ + Payload: payload, + RequestType: payload.RequestType(), + }) + } + + return []transport.Payload{transport.MessageBatch{Payload: messages}}, MessageBatchTransformer +} + +type appClosingTransformer struct{} + +var AppClosingTransformer Transformer = &appClosingTransformer{} + +func (t *appClosingTransformer) Transform(payloads []transport.Payload) ([]transport.Payload, Transformer) { + return MessageBatchTransformer.Transform(append(payloads, transport.AppClosing{})) +} diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go index 8aca6256eb..3be939cd71 100644 --- a/internal/newtelemetry/internal/writer.go +++ b/internal/newtelemetry/internal/writer.go @@ -47,6 +47,11 @@ var defaultHTTPClient = &http.Client{ } func newBody(config TracerConfig, debugMode bool) *transport.Body { + hostname := hostname.Get() + if hostname == "" { + hostname = "unknown" + } + return &transport.Body{ APIVersion: "v2", RuntimeID: globalconfig.RuntimeID(), @@ -60,7 +65,7 @@ func newBody(config TracerConfig, debugMode bool) *transport.Body { LanguageVersion: runtime.Version(), }, Host: transport.Host{ - Hostname: hostname.Get(), + Hostname: hostname, OS: osinfo.OSName(), OSVersion: osinfo.OSVersion(), Architecture: osinfo.Architecture(), From a9fc7bded3d99fd6aa004cb10f4f6b7161d817dd Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Fri, 17 Jan 2025 15:16:52 +0100 Subject: [PATCH 09/61] support for more configuration options Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 6 ++- internal/newtelemetry/config.go | 52 ++++++++++++++++++++---- internal/newtelemetry/internal/writer.go | 7 +++- 3 files changed, 54 insertions(+), 11 deletions(-) diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index b67d5072c9..dcdc422038 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -40,7 +40,7 @@ func NewClient(service, env, version string, config ClientConfig) (Client, error Version: version, } - writerConfig, err := config.ToWriterConfig(tracerConfig) + writerConfig, err := NewWriterConfig(config, tracerConfig) if err != nil { return nil, err } @@ -199,8 +199,10 @@ func (c *client) appStart() { func (c *client) appStop() { c.flushTransformerMu.Lock() - defer c.flushTransformerMu.Unlock() c.flushTransformer = internal.AppClosingTransformer + c.flushTransformerMu.Unlock() + c.Flush() + c.Close() } func (c *client) size() int { diff --git a/internal/newtelemetry/config.go b/internal/newtelemetry/config.go index c4501620ef..71d8b5a66b 100644 --- a/internal/newtelemetry/config.go +++ b/internal/newtelemetry/config.go @@ -6,17 +6,32 @@ package newtelemetry import ( - "errors" "fmt" "net/http" "net/url" "os" "time" + globalinternal "gopkg.in/DataDog/dd-trace-go.v1/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" ) type ClientConfig struct { + // DependencyCollectionEnabled determines whether dependency data is sent via telemetry. + // If false, libraries should not send the app-dependencies-loaded event. + // We default this to true since Application Security Monitoring uses this data to detect vulnerabilities in the ASM-SCA product + // This can be controlled via the env var DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED + DependencyCollectionEnabled bool + + // MetricsEnabled etermines whether metrics are sent via telemetry. + // If false, libraries should not send the generate-metrics or distributions events. + // This can be controlled via the env var DD_TELEMETRY_METRICS_ENABLED + MetricsEnabled bool + + // LogsEnabled determines whether logs are sent via telemetry. + // This can be controlled via the env var DD_TELEMETRY_LOG_COLLECTION_ENABLED + LogsEnabled bool + // AgentlessURL is the full URL to the agentless telemetry endpoint. (optional) // Defaults to https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry AgentlessURL string @@ -45,6 +60,11 @@ type ClientConfig struct { // APIKey is the API key to use for sending telemetry to the agentless endpoint. (using DD_API_KEY env var by default) APIKey string + + // EarlyFlushPayloadSize is the size of the payload that will trigger an early flush. + // This is necessary because backend won't allow payloads larger than 5MB. + // The default value here will be 2MB to take into account the large inaccuracy in estimating the size of payloads + EarlyFlushPayloadSize int } const ( @@ -53,7 +73,7 @@ const ( agentlessURL = "https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry" // defaultHeartbeatInterval is the default interval at which the agent sends a heartbeat. - defaultHeartbeatInterval = 60.0 * time.Second + defaultHeartbeatInterval = 60 // seconds // defaultMinFlushInterval is the default interval at which the client flushes the data. defaultMinFlushInterval = 15.0 * time.Second @@ -62,6 +82,10 @@ const ( defaultMaxFlushInterval = 60.0 * time.Second agentProxyAPIPath = "/telemetry/proxy/api/v2/apmtelemetry" + + defaultEarlyFlushPayloadSize = 2 * 1024 * 1024 // 2MB + + maxPayloadSize = 5 * 1024 * 1024 // 5MB ) // clamp squeezes a value between a minimum and maximum value. @@ -70,10 +94,6 @@ func clamp[T ~int64](value, minVal, maxVal T) T { } func (config ClientConfig) validateConfig() error { - if config.AgentlessURL == "" && config.AgentURL == "" { - return errors.New("either AgentlessURL or AgentURL must be set") - } - if config.HeartbeatInterval > 60*time.Second { return fmt.Errorf("HeartbeatInterval cannot be higher than 60s, got %v", config.HeartbeatInterval) } @@ -86,6 +106,10 @@ func (config ClientConfig) validateConfig() error { return fmt.Errorf("FlushIntervalRange Min cannot be higher than Max, got Min: %v, Max: %v", config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) } + if config.EarlyFlushPayloadSize > maxPayloadSize || config.EarlyFlushPayloadSize <= 0 { + return fmt.Errorf("EarlyFlushPayloadSize must be between 0 and 5MB, got %v", config.EarlyFlushPayloadSize) + } + return nil } @@ -100,7 +124,7 @@ func defaultConfig(config ClientConfig) ClientConfig { } if config.HeartbeatInterval == 0 { - config.HeartbeatInterval = defaultHeartbeatInterval + config.HeartbeatInterval = time.Duration(globalinternal.IntEnv("DD_TELEMETRY_HEARTBEAT_INTERVAL", defaultHeartbeatInterval)) * time.Second } else { config.HeartbeatInterval = clamp(config.HeartbeatInterval, time.Microsecond, 60*time.Second) } @@ -117,10 +141,22 @@ func defaultConfig(config ClientConfig) ClientConfig { config.FlushIntervalRange.Max = clamp(config.FlushIntervalRange.Max, time.Microsecond, 60*time.Second) } + if !config.DependencyCollectionEnabled { + config.DependencyCollectionEnabled = globalinternal.BoolEnv("DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED", true) + } + + if !config.MetricsEnabled { + config.MetricsEnabled = globalinternal.BoolEnv("DD_TELEMETRY_METRICS_ENABLED", true) + } + + if !config.LogsEnabled { + config.LogsEnabled = globalinternal.BoolEnv("DD_TELEMETRY_LOG_COLLECTION_ENABLED", true) + } + return config } -func (config ClientConfig) ToWriterConfig(tracerConfig internal.TracerConfig) (internal.WriterConfig, error) { +func NewWriterConfig(config ClientConfig, tracerConfig internal.TracerConfig) (internal.WriterConfig, error) { endpoints := make([]*http.Request, 0, 2) if config.AgentURL != "" { baseURL, err := url.Parse(config.AgentURL) diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go index 3be939cd71..2a8cce2384 100644 --- a/internal/newtelemetry/internal/writer.go +++ b/internal/newtelemetry/internal/writer.go @@ -111,7 +111,12 @@ func NewWriter(config WriterConfig) (Writer, error) { } // Don't allow the client to have a timeout higher than 5 seconds - config.HTTPClient.Timeout = min(config.HTTPClient.Timeout, 5*time.Second) + // This is to avoid blocking the client for too long in case of network issues + if config.HTTPClient.Timeout > 5*time.Second { + copyClient := *config.HTTPClient + config.HTTPClient = ©Client + config.HTTPClient.Timeout = 5 * time.Second + } if len(config.Endpoints) == 0 { return nil, fmt.Errorf("telemetry/writer: no endpoints provided") From 5159533d047299138f12bf659f29e0c6f785bef5 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Fri, 17 Jan 2025 15:54:04 +0100 Subject: [PATCH 10/61] fix hostname resolution logic to match the previous telemetry client Signed-off-by: Eliott Bouhana --- internal/newtelemetry/internal/writer.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go index 2a8cce2384..51dd9badda 100644 --- a/internal/newtelemetry/internal/writer.go +++ b/internal/newtelemetry/internal/writer.go @@ -13,6 +13,7 @@ import ( "io" "net" "net/http" + "os" "runtime" "sync" "time" @@ -47,9 +48,13 @@ var defaultHTTPClient = &http.Client{ } func newBody(config TracerConfig, debugMode bool) *transport.Body { - hostname := hostname.Get() - if hostname == "" { - hostname = "unknown" + osHostname, err := os.Hostname() + if err != nil { + osHostname = hostname.Get() + } + + if osHostname == "" { + osHostname = "unknown" // hostname field is not allowed to be empty } return &transport.Body{ @@ -65,7 +70,7 @@ func newBody(config TracerConfig, debugMode bool) *transport.Body { LanguageVersion: runtime.Version(), }, Host: transport.Host{ - Hostname: hostname, + Hostname: osHostname, OS: osinfo.OSName(), OSVersion: osinfo.OSVersion(), Architecture: osinfo.Architecture(), From 5e90cb813b7740e7dabed75eb784715520ee83b6 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 20 Jan 2025 14:11:27 +0100 Subject: [PATCH 11/61] test the writer Signed-off-by: Eliott Bouhana --- internal/newtelemetry/internal/writer.go | 11 +- internal/newtelemetry/internal/writer_test.go | 200 ++++++++++++++++++ 2 files changed, 208 insertions(+), 3 deletions(-) create mode 100644 internal/newtelemetry/internal/writer_test.go diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go index 51dd9badda..948348fcd1 100644 --- a/internal/newtelemetry/internal/writer.go +++ b/internal/newtelemetry/internal/writer.go @@ -146,6 +146,10 @@ func NewWriter(config WriterConfig) (Writer, error) { // https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/cf17b41a30fbf31d54e2cfbfc983875d58b02fe1/GeneratedDocumentation/ApiDocs/v2/overview.md#required-http-headers func preBakeRequest(body *transport.Body, endpoint *http.Request) *http.Request { clonedEndpoint := endpoint.Clone(context.Background()) + if clonedEndpoint.Header == nil { + clonedEndpoint.Header = make(http.Header, 11) + } + for key, val := range map[string]string{ "Content-Type": "application/json", "DD-Telemetry-API-Version": body.APIVersion, @@ -216,12 +220,12 @@ func (w *writer) Flush(payload transport.Payload) (int, error) { sumRead int ) for _, endpoint := range w.endpoints { - sumRead = 0 request := w.newRequest(endpoint, payload) request.Body = &SumReaderCloser{ReadCloser: request.Body, n: &sumRead} response, err := w.httpClient.Do(request) if err != nil { - errs = append(errs, fmt.Errorf("telemetry/writer: while trying endpoint %q: %w", request.URL.String(), err)) + errs = append(errs, fmt.Errorf("telemetry/writer: %w", err)) + sumRead = 0 continue } @@ -231,7 +235,8 @@ func (w *writer) Flush(payload transport.Payload) (int, error) { if response.StatusCode >= 300 || response.StatusCode < 200 { respBodyBytes, _ := io.ReadAll(response.Body) // maybe we can find an error reason in the response body - errs = append(errs, fmt.Errorf("telemetry/writer: while trying endpoint %q: unexpected status code: %q (received body: %q)", request.URL.String(), response.Status, string(respBodyBytes))) + errs = append(errs, fmt.Errorf("telemetry/writer: unexpected status code: %q (received body: %q)", response.Status, string(respBodyBytes))) + sumRead = 0 continue } diff --git a/internal/newtelemetry/internal/writer_test.go b/internal/newtelemetry/internal/writer_test.go new file mode 100644 index 0000000000..df4d255cc1 --- /dev/null +++ b/internal/newtelemetry/internal/writer_test.go @@ -0,0 +1,200 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "net/http" + "net/http/httptest" + "net/url" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +func TestNewWriter_ValidConfig(t *testing.T) { + config := WriterConfig{ + TracerConfig: TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + }, + Endpoints: []*http.Request{ + {Method: http.MethodPost, URL: &url.URL{Scheme: "http", Host: "localhost", Path: "/telemetry"}}, + }, + } + + writer, err := NewWriter(config) + assert.NoError(t, err) + assert.NotNil(t, writer) +} + +func TestNewWriter_NoEndpoints(t *testing.T) { + config := WriterConfig{ + TracerConfig: TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + }, + Endpoints: []*http.Request{}, + } + + writer, err := NewWriter(config) + assert.Error(t, err) + assert.Nil(t, writer) +} + +type testPayload struct { + RequestTypeValue transport.RequestType `json:"request_type"` + marshalJSON func() ([]byte, error) +} + +func (p *testPayload) MarshalJSON() ([]byte, error) { + return p.marshalJSON() +} + +func (p *testPayload) RequestType() transport.RequestType { + return p.RequestTypeValue +} + +func TestWriter_Flush_Success(t *testing.T) { + config := WriterConfig{ + TracerConfig: TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + }, + } + + var ( + marshalJSONCalled bool + payloadReceived bool + ) + + payload := testPayload{ + RequestTypeValue: "test", + marshalJSON: func() ([]byte, error) { + marshalJSONCalled = true + return []byte(`{"request_type":"test"}`), nil + }, + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + payloadReceived = true + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + req, err := http.NewRequest(http.MethodPost, server.URL, nil) + require.NoError(t, err) + + config.Endpoints = append(config.Endpoints, req) + writer, _ := NewWriter(config) + + bytesSent, err := writer.Flush(&payload) + require.NoError(t, err) + + assert.NotZero(t, bytesSent) + assert.True(t, marshalJSONCalled) + assert.True(t, payloadReceived) +} + +func TestWriter_Flush_Failure(t *testing.T) { + config := WriterConfig{ + TracerConfig: TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + }, + } + + var ( + marshalJSONCalled bool + payloadReceived bool + ) + + payload := testPayload{ + RequestTypeValue: "test", + marshalJSON: func() ([]byte, error) { + marshalJSONCalled = true + return []byte(`{"request_type":"test"}`), nil + }, + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + payloadReceived = true + w.WriteHeader(http.StatusBadRequest) + })) + defer server.Close() + + req, err := http.NewRequest(http.MethodPost, server.URL, nil) + require.NoError(t, err) + + config.Endpoints = append(config.Endpoints, req) + writer, _ := NewWriter(config) + + bytesSent, err := writer.Flush(&payload) + require.Error(t, err) + assert.Zero(t, bytesSent) + assert.True(t, marshalJSONCalled) + assert.True(t, payloadReceived) +} + +func TestWriter_Flush_MultipleEndpoints(t *testing.T) { + config := WriterConfig{ + TracerConfig: TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + }, + } + + var ( + marshalJSONCalled int + payloadReceived1 bool + payloadReceived2 bool + ) + + payload := testPayload{ + RequestTypeValue: "test", + marshalJSON: func() ([]byte, error) { + marshalJSONCalled++ + return []byte(`{"request_type":"test"}`), nil + }, + } + + server1 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + payloadReceived1 = true + w.WriteHeader(http.StatusInternalServerError) + })) + defer server1.Close() + + server2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + assert.True(t, payloadReceived1) + payloadReceived2 = true + w.WriteHeader(http.StatusOK) + })) + defer server2.Close() + + req1, err := http.NewRequest(http.MethodPost, server1.URL, nil) + require.NoError(t, err) + + config.Endpoints = append(config.Endpoints, req1) + req2, err := http.NewRequest(http.MethodPost, server2.URL, nil) + require.NoError(t, err) + + config.Endpoints = append(config.Endpoints, req2) + writer, _ := NewWriter(config) + + bytesSent, err := writer.Flush(&payload) + require.ErrorContains(t, err, `telemetry/writer: unexpected status code: "500 Internal Server Error" (received body: "")`) + + assert.NotZero(t, bytesSent) + assert.Equal(t, 2, marshalJSONCalled) + assert.True(t, payloadReceived2) +} From 9d9c3b8bc38969f20cf686a8ab835775f7b806f4 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 20 Jan 2025 14:17:36 +0100 Subject: [PATCH 12/61] support for disabling telemetry Signed-off-by: Eliott Bouhana --- internal/newtelemetry/globalclient.go | 60 +++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 6bd1ee5037..7ce231fc2f 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -8,6 +8,7 @@ package newtelemetry import ( "sync/atomic" + globalinternal "gopkg.in/DataDog/dd-trace-go.v1/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) @@ -17,6 +18,10 @@ var ( // StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global (*client). func StartApp(client Client) error { + if Disabled() { + return nil + } + client.appStart() SwapClient(client) return nil @@ -24,6 +29,10 @@ func StartApp(client Client) error { // SwapClient swaps the global client with the given client and Flush the old (*client). func SwapClient(client Client) { + if Disabled() { + return + } + if oldClient := globalClient.Swap(&client); oldClient != nil && *oldClient != nil { (*oldClient).Close() } @@ -31,12 +40,27 @@ func SwapClient(client Client) { // StopApp creates the app-stopped telemetry, adding to the queue and Flush all the queue before stopping the (*client). func StopApp() { + if Disabled() { + return + } + if client := globalClient.Swap(nil); client != nil && *client != nil { (*client).appStop() } } +// Disabled returns whether instrumentation telemetry is disabled +// according to the DD_INSTRUMENTATION_TELEMETRY_ENABLED env var +func Disabled() bool { + return !globalinternal.BoolEnv("DD_INSTRUMENTATION_TELEMETRY_ENABLED", true) +} + +// Count creates a new metric handle for the given parameters that can be used to submit values. func Count(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + if Disabled() { + return nil + } + if client := globalClient.Load(); client != nil && *client != nil { return (*client).Count(namespace, name, tags) } @@ -46,6 +70,10 @@ func Count(namespace types.Namespace, name string, tags map[string]string) Metri // Rate creates a new metric handle for the given parameters that can be used to submit values. func Rate(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + if Disabled() { + return nil + } + if client := globalClient.Load(); client != nil && *client != nil { return (*client).Rate(namespace, name, tags) } @@ -55,6 +83,10 @@ func Rate(namespace types.Namespace, name string, tags map[string]string) Metric // Gauge creates a new metric handle for the given parameters that can be used to submit values. func Gauge(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + if Disabled() { + return nil + } + if client := globalClient.Load(); client != nil && *client != nil { return (*client).Gauge(namespace, name, tags) } @@ -64,6 +96,10 @@ func Gauge(namespace types.Namespace, name string, tags map[string]string) Metri // Distribution creates a new metric handle for the given parameters that can be used to submit values. func Distribution(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + if Disabled() { + return nil + } + if client := globalClient.Load(); client != nil && *client != nil { return (*client).Distribution(namespace, name, tags) } @@ -73,6 +109,10 @@ func Distribution(namespace types.Namespace, name string, tags map[string]string // Logger returns an implementation of the TelemetryLogger interface which sends telemetry logs. func Logger() TelemetryLogger { + if Disabled() { + return nil + } + if client := globalClient.Load(); client != nil && *client != nil { return (*client).Logger() } @@ -82,6 +122,10 @@ func Logger() TelemetryLogger { // ProductStarted declares a product to have started at the customer’s request func ProductStarted(product types.Namespace) { + if Disabled() { + return + } + if client := globalClient.Load(); client != nil && *client != nil { (*client).ProductStarted(product) } @@ -89,6 +133,10 @@ func ProductStarted(product types.Namespace) { // ProductStopped declares a product to have being stopped by the customer func ProductStopped(product types.Namespace) { + if Disabled() { + return + } + if client := globalClient.Load(); client != nil && *client != nil { (*client).ProductStopped(product) } @@ -96,6 +144,10 @@ func ProductStopped(product types.Namespace) { // ProductStartError declares that a product could not start because of the following error func ProductStartError(product types.Namespace, err error) { + if Disabled() { + return + } + if client := globalClient.Load(); client != nil && *client != nil { (*client).ProductStartError(product, err) } @@ -104,6 +156,10 @@ func ProductStartError(product types.Namespace, err error) { // AddAppConfig adds a key value pair to the app configuration and send the change to telemetry // value has to be json serializable and the origin is the source of the change. func AddAppConfig(key string, value any, origin types.Origin) { + if Disabled() { + return + } + if client := globalClient.Load(); client != nil && *client != nil { (*client).AddAppConfig(key, value, origin) } @@ -112,6 +168,10 @@ func AddAppConfig(key string, value any, origin types.Origin) { // AddBulkAppConfig adds a list of key value pairs to the app configuration and sends the change to telemetry. // Same as AddAppConfig but for multiple values. func AddBulkAppConfig(kvs map[string]any, origin types.Origin) { + if Disabled() { + return + } + if client := globalClient.Load(); client != nil && *client != nil { (*client).AddBulkAppConfig(kvs, origin) } From 72359f59562d9ec6a301816b62e86ae762b79bbe Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 20 Jan 2025 15:58:45 +0100 Subject: [PATCH 13/61] renamed transformers to mappers and added a heartbeat mapper, removed the Size() method on data sources. Put back support for the payloadQueue in case Flush calls are failing Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 4 +- internal/newtelemetry/client.go | 117 +++++++++++------- internal/newtelemetry/config.go | 5 + internal/newtelemetry/configuration.go | 35 ------ internal/newtelemetry/integration.go | 9 -- .../internal/mapper/app_closing.go | 22 ++++ .../internal/mapper/app_started.go | 46 +++++++ .../newtelemetry/internal/mapper/default.go | 81 ++++++++++++ .../newtelemetry/internal/mapper/mapper.go | 17 +++ .../newtelemetry/internal/mapper/wrapper.go | 18 +++ internal/newtelemetry/internal/ringbuffer.go | 13 +- internal/newtelemetry/internal/ticker.go | 30 ++--- .../newtelemetry/internal/transformers.go | 74 ----------- internal/newtelemetry/product.go | 13 -- 14 files changed, 286 insertions(+), 198 deletions(-) create mode 100644 internal/newtelemetry/internal/mapper/app_closing.go create mode 100644 internal/newtelemetry/internal/mapper/app_started.go create mode 100644 internal/newtelemetry/internal/mapper/default.go create mode 100644 internal/newtelemetry/internal/mapper/mapper.go create mode 100644 internal/newtelemetry/internal/mapper/wrapper.go delete mode 100644 internal/newtelemetry/internal/transformers.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index 06afaf7f68..8d0895eb7f 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -96,9 +96,7 @@ type Client interface { MarkIntegrationAsLoaded(integration Integration) // Flush closes the client and flushes any remaining data. - // Flush returns the number of bytes sent and an error if any. - // Any error returned means that the data was not sent. - Flush() (int, error) + Flush() // appStart sends the telemetry necessary to signal that the app is starting. appStart() diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index dcdc422038..d191afdef4 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -11,6 +11,7 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/log" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/mapper" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) @@ -51,19 +52,25 @@ func NewClient(service, env, version string, config ClientConfig) (Client, error } client := &client{ - tracerConfig: tracerConfig, - writer: writer, - clientConfig: config, - flushTransformer: internal.MessageBatchTransformer, + tracerConfig: tracerConfig, + writer: writer, + clientConfig: config, + flushMapper: mapper.NewDefaultMapper(config.HeartbeatInterval), + // This means that, by default, we incur dataloss if we spend ~30mins without flushing, considering we send telemetry data this looks reasonable. + // This also means that in the worst case scenario, memory-wise, the app is stabilized after running for 30mins. + payloadQueue: internal.NewRingQueue[transport.Payload](4, 32), } - client.ticker = internal.NewTicker(client, config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) client.dataSources = append(client.dataSources, &client.integrations, &client.products, &client.configuration, ) + client.flushTicker = internal.NewTicker(func() { + client.Flush() + }, config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) + return client, nil } @@ -76,17 +83,19 @@ type client struct { integrations integrations products products configuration configuration - - dataSources []interface { + dataSources []interface { Payload() transport.Payload - Size() int } - ticker *internal.Ticker + flushTicker *internal.Ticker + heartbeatTicker *internal.Ticker + + // flushMapper is the transformer to use for the next flush on the gather payloads on this tick + flushMapper mapper.Mapper + flushMapperMu sync.Mutex - // flushTransformer is the transformer to use for the next flush - flushTransformer internal.Transformer - flushTransformerMu sync.Mutex + // payloadQueue is used when we cannot flush previously built payloads + payloadQueue *internal.RingQueue[transport.Payload] } func (c *client) MarkIntegrationAsLoaded(integration Integration) { @@ -144,7 +153,7 @@ func (c *client) Config() ClientConfig { return c.clientConfig } -func (c *client) Flush() (int, error) { +func (c *client) Flush() { var payloads []transport.Payload for _, ds := range c.dataSources { if payload := ds.Payload(); payload != nil { @@ -152,69 +161,81 @@ func (c *client) Flush() (int, error) { } } - return c.flush(payloads) + _, _ = c.flush(payloads) } -// flush sends all the data sources to the writer by let them flow through the given wrapper function. -// The wrapper function is used to transform the payloads before sending them to the writer. +// flush sends all the data sources to the writer by let them flow through the given transformer function. +// The transformer function is used to transform the payloads before sending them to the writer. func (c *client) flush(payloads []transport.Payload) (int, error) { - if len(payloads) == 0 { - return 0, nil - } - - // Always add the Heartbeat to the payloads - payloads = append(payloads, transport.AppHeartbeat{}) - // Transform the payloads { - c.flushTransformerMu.Lock() - payloads, c.flushTransformer = c.flushTransformer.Transform(payloads) - c.flushTransformerMu.Unlock() + c.flushMapperMu.Lock() + payloads, c.flushMapper = c.flushMapper.Transform(payloads) + c.flushMapperMu.Unlock() } + c.payloadQueue.Enqueue(payloads...) + payloads = c.payloadQueue.GetBuffer() + defer c.payloadQueue.ReleaseBuffer(payloads) + var ( - nbBytes int - err error + nbBytes int + nonFatalErros []error ) - for _, payload := range payloads { - nbBytesOfPayload, payloadErr := c.writer.Flush(payload) + for i, payload := range payloads { + if payload == nil { + continue + } + + nbBytesOfPayload, err := c.writer.Flush(payload) if nbBytes > 0 { - log.Debug("non-fatal error while flushing telemetry data: %v", err) + nonFatalErros = append(nonFatalErros, err) err = nil } + if err != nil { + // We stop flushing when we encounter a fatal error, put the payloads in the + log.Error("error while flushing telemetry data: %v", err) + c.payloadQueue.Enqueue(payloads[i:]...) + return nbBytes, err + } + + if nbBytesOfPayload > c.clientConfig.EarlyFlushPayloadSize { + // We increase the speed of the flushTicker to try to flush the remaining payloads faster as we are at risk of sending too large payloads to the backend + c.flushTicker.IncreaseSpeed() + } + nbBytes += nbBytesOfPayload - err = errors.Join(err, payloadErr) } - return nbBytes, err + if len(nonFatalErros) > 0 { + log.Debug("non-fatal error while flushing telemetry data: %v", errors.Join(nonFatalErros...)) + } + + return nbBytes, nil } func (c *client) appStart() { - c.flushTransformerMu.Lock() - defer c.flushTransformerMu.Unlock() - c.flushTransformer = internal.AppStartedTransformer + c.flushMapperMu.Lock() + defer c.flushMapperMu.Unlock() + + // Wrap the current flushMapper with the AppStartedMapper so we can add the app-started event to the payloads using available payloads at the time of the call one minute later + c.flushMapper = mapper.NewAppStartedMapper(c.flushMapper) } func (c *client) appStop() { - c.flushTransformerMu.Lock() - c.flushTransformer = internal.AppClosingTransformer - c.flushTransformerMu.Unlock() + c.flushMapperMu.Lock() + c.flushMapper = mapper.NewAppClosingMapper(c.flushMapper) + c.flushMapperMu.Unlock() + + // Flush locks the flushMapperMu mutex, so we need to call it outside the lock c.Flush() c.Close() } -func (c *client) size() int { - size := 0 - for _, ds := range c.dataSources { - size += ds.Size() - } - return size -} - func (c *client) Close() error { - c.ticker.Stop() + c.flushTicker.Stop() return nil } diff --git a/internal/newtelemetry/config.go b/internal/newtelemetry/config.go index 71d8b5a66b..c8a52b4824 100644 --- a/internal/newtelemetry/config.go +++ b/internal/newtelemetry/config.go @@ -85,6 +85,7 @@ const ( defaultEarlyFlushPayloadSize = 2 * 1024 * 1024 // 2MB + // maxPayloadSize is specified by the backend to be 5MB. The goal is to never reach this value otherwise our data will be silently dropped. maxPayloadSize = 5 * 1024 * 1024 // 5MB ) @@ -153,6 +154,10 @@ func defaultConfig(config ClientConfig) ClientConfig { config.LogsEnabled = globalinternal.BoolEnv("DD_TELEMETRY_LOG_COLLECTION_ENABLED", true) } + if config.EarlyFlushPayloadSize == 0 { + config.EarlyFlushPayloadSize = defaultEarlyFlushPayloadSize + } + return config } diff --git a/internal/newtelemetry/configuration.go b/internal/newtelemetry/configuration.go index 722530c46e..dbf88e5393 100644 --- a/internal/newtelemetry/configuration.go +++ b/internal/newtelemetry/configuration.go @@ -6,7 +6,6 @@ package newtelemetry import ( - "reflect" "sync" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" @@ -16,7 +15,6 @@ import ( type configuration struct { mu sync.Mutex config map[string]transport.ConfKeyValue - size int seqID uint64 } @@ -28,37 +26,11 @@ func (c *configuration) Add(key string, value any, origin types.Origin) { c.config = make(map[string]transport.ConfKeyValue) } - if kv, ok := c.config[key]; ok { - c.size -= len(kv.Name) + guessSize(kv.Value) - } - c.config[key] = transport.ConfKeyValue{ Name: key, Value: value, Origin: origin, } - - c.size += len(key) + guessSize(value) -} - -// guessSize returns a simple guess of the value's size in bytes. -// 99% of config values are strings so a simple guess is enough. -// All non-primitive types will go through reflection at encoding time anyway -func guessSize(value any) int { - switch value.(type) { - case string: - return len(value.(string)) - case int64, uint64, int, uint, float64, uintptr: - return 8 - case int32, uint32, float32: - return 4 - case int16, uint16: - return 2 - case bool, int8, uint8: - return 1 - } - - return int(reflect.ValueOf(value).Type().Size()) } func (c *configuration) Payload() transport.Payload { @@ -77,14 +49,7 @@ func (c *configuration) Payload() transport.Payload { c.seqID++ delete(c.config, conf.Name) } - c.size = 0 return transport.AppClientConfigurationChange{ Configuration: configs, } } - -func (c *configuration) Size() int { - c.mu.Lock() - defer c.mu.Unlock() - return c.size -} diff --git a/internal/newtelemetry/integration.go b/internal/newtelemetry/integration.go index 7314403ea1..e46bc1e4c9 100644 --- a/internal/newtelemetry/integration.go +++ b/internal/newtelemetry/integration.go @@ -14,14 +14,12 @@ import ( type integrations struct { mu sync.Mutex integrations []Integration - size int } func (i *integrations) Add(integration Integration) { i.mu.Lock() defer i.mu.Unlock() i.integrations = append(i.integrations, integration) - i.size += len(integration.Name) + len(integration.Version) + len(integration.Error) } func (i *integrations) Payload() transport.Payload { @@ -41,14 +39,7 @@ func (i *integrations) Payload() transport.Payload { } } i.integrations = nil - i.size = 0 return transport.AppIntegrationChange{ Integrations: integrations, } } - -func (i *integrations) Size() int { - i.mu.Lock() - defer i.mu.Unlock() - return i.size -} diff --git a/internal/newtelemetry/internal/mapper/app_closing.go b/internal/newtelemetry/internal/mapper/app_closing.go new file mode 100644 index 0000000000..b6ce349453 --- /dev/null +++ b/internal/newtelemetry/internal/mapper/app_closing.go @@ -0,0 +1,22 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package mapper + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +func NewAppClosingMapper(underlying Mapper) Mapper { + return &appClosing{wrapper{underlying}} +} + +type appClosing struct { + wrapper +} + +func (t *appClosing) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { + return t.wrapper.Transform(append(payloads, transport.AppClosing{})) +} diff --git a/internal/newtelemetry/internal/mapper/app_started.go b/internal/newtelemetry/internal/mapper/app_started.go new file mode 100644 index 0000000000..cbdb257ff0 --- /dev/null +++ b/internal/newtelemetry/internal/mapper/app_started.go @@ -0,0 +1,46 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package mapper + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +type appStarted struct { + wrapper +} + +func NewAppStartedMapper(underlying Mapper) Mapper { + return &appStarted{wrapper{underlying}} +} + +func (t *appStarted) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { + appStarted := &transport.AppStarted{ + InstallSignature: transport.InstallSignature{ + InstallID: globalconfig.InstrumentationInstallID(), + InstallType: globalconfig.InstrumentationInstallType(), + InstallTime: globalconfig.InstrumentationInstallTime(), + }, + } + + payloadLefts := make([]transport.Payload, 0, len(payloads)) + for _, payload := range payloads { + switch payload.(type) { + case transport.AppClientConfigurationChange: + appStarted.Configuration = payload.(transport.AppClientConfigurationChange).Configuration + case transport.AppProductChange: + appStarted.Products = payload.(transport.AppProductChange).Products + default: + payloadLefts = append(payloadLefts, payload) + } + } + + // Following the documentation, an app-started payload cannot be put in a message-batch one so we don't forward it to the next transformation, + // and we also need to put the app-started payload at the beginning of the slice + mappedPayloads, nextMapper := t.wrapper.Transform(payloadLefts) + return append([]transport.Payload{appStarted}, mappedPayloads...), nextMapper +} diff --git a/internal/newtelemetry/internal/mapper/default.go b/internal/newtelemetry/internal/mapper/default.go new file mode 100644 index 0000000000..001b2a1bf5 --- /dev/null +++ b/internal/newtelemetry/internal/mapper/default.go @@ -0,0 +1,81 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package mapper + +import ( + "time" + + "golang.org/x/time/rate" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +// NewDefaultMapper returns a Mapper that transforms payloads into a MessageBatch and adds a heartbeat message. +// The heartbeat message is added every heartbeatInterval. +func NewDefaultMapper(heartbeatInterval time.Duration) Mapper { + return &defaultMapper{ + heartbeatEnricher: heartbeatEnricher{ + rateLimiter: rate.NewLimiter(rate.Every(heartbeatInterval), 1), + }, + } +} + +type defaultMapper struct { + heartbeatEnricher + messageBatchReducer +} + +func (t *defaultMapper) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { + payloads, _ = t.heartbeatEnricher.Transform(payloads) + payloads, _ = t.messageBatchReducer.Transform(payloads) + return payloads, t +} + +type messageBatchReducer struct{} + +func (t *messageBatchReducer) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { + messages := make([]transport.Message, len(payloads)) + for _, payload := range payloads { + messages = append(messages, transport.Message{ + Payload: payload, + RequestType: payload.RequestType(), + }) + } + + return []transport.Payload{transport.MessageBatch{Payload: messages}}, t +} + +type heartbeatEnricher struct { + rateLimiter *rate.Limiter +} + +func (t *heartbeatEnricher) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { + if !t.rateLimiter.Allow() { + return payloads, t + } + + extendedHeartbeat := transport.AppExtendedHeartbeat{} + payloadLefts := make([]transport.Payload, 0, len(payloads)) + for _, payload := range payloads { + switch payload.(type) { + case transport.AppDependenciesLoaded: + extendedHeartbeat.Dependencies = payload.(transport.AppDependenciesLoaded).Dependencies + case transport.AppIntegrationChange: + extendedHeartbeat.Integrations = payload.(transport.AppIntegrationChange).Integrations + case transport.AppClientConfigurationChange: + extendedHeartbeat.Configuration = payload.(transport.AppClientConfigurationChange).Configuration + default: + payloadLefts = append(payloadLefts, payload) + } + } + + if len(payloadLefts) == len(payloads) { + // No Payloads were consumed by the extended heartbeat, we can add a regular heartbeat + return append(payloads, transport.AppHeartbeat{}), t + } + + return append(payloadLefts, extendedHeartbeat), t +} diff --git a/internal/newtelemetry/internal/mapper/mapper.go b/internal/newtelemetry/internal/mapper/mapper.go new file mode 100644 index 0000000000..84a376fee6 --- /dev/null +++ b/internal/newtelemetry/internal/mapper/mapper.go @@ -0,0 +1,17 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package mapper + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +// Mapper is an interface for transforming payloads to comply with different types of lifecycle events in the application. +type Mapper interface { + // Transform transforms the given payloads and returns the transformed payloads and the Mapper to use for the next + // transformation at the next flush a minute later + Transform([]transport.Payload) ([]transport.Payload, Mapper) +} diff --git a/internal/newtelemetry/internal/mapper/wrapper.go b/internal/newtelemetry/internal/mapper/wrapper.go new file mode 100644 index 0000000000..c2c52901a3 --- /dev/null +++ b/internal/newtelemetry/internal/mapper/wrapper.go @@ -0,0 +1,18 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package mapper + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +type wrapper struct { + underlying Mapper +} + +func (w wrapper) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { + return w.underlying.Transform(payloads) +} diff --git a/internal/newtelemetry/internal/ringbuffer.go b/internal/newtelemetry/internal/ringbuffer.go index 851d0f0ebe..e472944878 100644 --- a/internal/newtelemetry/internal/ringbuffer.go +++ b/internal/newtelemetry/internal/ringbuffer.go @@ -34,7 +34,16 @@ func NewRingQueue[T any](minSize, maxSize int) *RingQueue[T] { } // Enqueue adds a value to the buffer. -func (rb *RingQueue[T]) Enqueue(val T) bool { +func (rb *RingQueue[T]) Enqueue(vals ...T) bool { + for _, val := range vals { + if !rb.enqueueLocked(val) { + return false + } + } + return true +} + +func (rb *RingQueue[T]) enqueueLocked(val T) bool { rb.mu.Lock() defer rb.mu.Unlock() @@ -82,7 +91,7 @@ func (rb *RingQueue[T]) GetBuffer() []T { // ReleaseBuffer returns the buffer to the pool. func (rb *RingQueue[T]) ReleaseBuffer(buf []T) { - rb.pool.Put(buf) + rb.pool.Put(buf[:cap(buf)]) // Make sure nobody reduced the length of the buffer } // IsEmpty returns true if the buffer is empty. diff --git a/internal/newtelemetry/internal/ticker.go b/internal/newtelemetry/internal/ticker.go index 67fe9bd4da..192673867e 100644 --- a/internal/newtelemetry/internal/ticker.go +++ b/internal/newtelemetry/internal/ticker.go @@ -10,9 +10,7 @@ import ( "time" ) -type flusher interface { - Flush() (int, error) -} +type TickFunc func() type Ticker struct { *time.Ticker @@ -23,10 +21,10 @@ type Ticker struct { maxInterval time.Duration minInterval time.Duration - flusher flusher + tickFunc TickFunc } -func NewTicker(flusher flusher, minInterval, maxInterval time.Duration) *Ticker { +func NewTicker(tickFunc TickFunc, minInterval, maxInterval time.Duration) *Ticker { ticker := &Ticker{ Ticker: time.NewTicker(maxInterval), tickSpeed: maxInterval, @@ -34,26 +32,30 @@ func NewTicker(flusher flusher, minInterval, maxInterval time.Duration) *Ticker maxInterval: maxInterval, minInterval: minInterval, - flusher: flusher, + tickFunc: tickFunc, } go func() { for range ticker.C { - _, err := flusher.Flush() - if err != nil { - // Reset the interval to the maximum value - ticker.AdjustTickSpeed(maxInterval) - } + tickFunc() } }() return ticker } -func (t *Ticker) AdjustTickSpeed(interval time.Duration) { +func (t *Ticker) IncreaseSpeed() { + t.tickSpeedMu.Lock() + defer t.tickSpeedMu.Unlock() + + t.tickSpeed = max(t.tickSpeed/2, t.minInterval) + t.Reset(t.tickSpeed) +} + +func (t *Ticker) DecreaseSpeed() { t.tickSpeedMu.Lock() defer t.tickSpeedMu.Unlock() - t.tickSpeed = interval - t.Reset(interval) + t.tickSpeed = min(t.tickSpeed*2, t.maxInterval) + t.Reset(t.tickSpeed) } diff --git a/internal/newtelemetry/internal/transformers.go b/internal/newtelemetry/internal/transformers.go deleted file mode 100644 index 84c91d0af0..0000000000 --- a/internal/newtelemetry/internal/transformers.go +++ /dev/null @@ -1,74 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2025 Datadog, Inc. - -package internal - -import ( - "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" -) - -// Transformer is an interface for transforming payloads to comply with different types of lifecycle events in the application. -type Transformer interface { - // Transform transforms the given payloads and returns the transformed payloads and the Transformer to use for the next - // transformation. - Transform([]transport.Payload) ([]transport.Payload, Transformer) -} - -type appStartedTransformer struct{} - -var AppStartedTransformer Transformer = &appStartedTransformer{} - -func (t *appStartedTransformer) Transform(payloads []transport.Payload) ([]transport.Payload, Transformer) { - appStarted := transport.AppStarted{ - InstallSignature: transport.InstallSignature{ - InstallID: globalconfig.InstrumentationInstallID(), - InstallType: globalconfig.InstrumentationInstallType(), - InstallTime: globalconfig.InstrumentationInstallTime(), - }, - } - - payloadLefts := make([]transport.Message, 0, len(payloads)/2) - for _, payload := range payloads { - switch payload.(type) { - case transport.AppClientConfigurationChange: - appStarted.Configuration = payload.(transport.AppClientConfigurationChange).Configuration - case transport.AppProductChange: - appStarted.Products = payload.(transport.AppProductChange).Products - default: - payloadLefts = append(payloadLefts, transport.Message{Payload: payload, RequestType: payload.RequestType()}) - } - } - - // Following the documentation, an app-started payload cannot be put in a message-batch one. - return []transport.Payload{ - appStarted, - transport.MessageBatch{Payload: payloadLefts}, - }, MessageBatchTransformer -} - -type messageBatchTransformer struct{} - -var MessageBatchTransformer Transformer = &messageBatchTransformer{} - -func (t *messageBatchTransformer) Transform(payloads []transport.Payload) ([]transport.Payload, Transformer) { - messages := make([]transport.Message, len(payloads)) - for _, payload := range payloads { - messages = append(messages, transport.Message{ - Payload: payload, - RequestType: payload.RequestType(), - }) - } - - return []transport.Payload{transport.MessageBatch{Payload: messages}}, MessageBatchTransformer -} - -type appClosingTransformer struct{} - -var AppClosingTransformer Transformer = &appClosingTransformer{} - -func (t *appClosingTransformer) Transform(payloads []transport.Payload) ([]transport.Payload, Transformer) { - return MessageBatchTransformer.Transform(append(payloads, transport.AppClosing{})) -} diff --git a/internal/newtelemetry/product.go b/internal/newtelemetry/product.go index b0d7b7eeb5..9aa215f4d5 100644 --- a/internal/newtelemetry/product.go +++ b/internal/newtelemetry/product.go @@ -15,7 +15,6 @@ import ( type products struct { mu sync.Mutex products map[types.Namespace]transport.Product - size int } func (p *products) Add(namespace types.Namespace, enabled bool, err error) { @@ -35,12 +34,7 @@ func (p *products) Add(namespace types.Namespace, enabled bool, err error) { } } - if product, ok := p.products[namespace]; ok { - p.size -= len(namespace) + len(product.Error.Message) - } - p.products[namespace] = product - p.size += len(namespace) + len(product.Error.Message) } func (p *products) Payload() transport.Payload { @@ -54,12 +48,5 @@ func (p *products) Payload() transport.Payload { Products: p.products, } p.products = nil - p.size = 0 return res } - -func (p *products) Size() int { - p.mu.Lock() - defer p.mu.Unlock() - return p.size -} From 9eafa97a880de9fbea4ae64b80a9dce5aa44ccc9 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 20 Jan 2025 17:15:59 +0100 Subject: [PATCH 14/61] start testing the basic features of the client Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 12 +- internal/newtelemetry/client_test.go | 125 ++++++++++++++++++ internal/newtelemetry/config.go | 2 +- .../newtelemetry/internal/mapper/default.go | 4 + 4 files changed, 135 insertions(+), 8 deletions(-) create mode 100644 internal/newtelemetry/client_test.go diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index d191afdef4..10a41fe62a 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -30,18 +30,16 @@ func NewClient(service, env, version string, config ClientConfig) (Client, error return nil, errors.New("version must not be empty") } + return newClient(internal.TracerConfig{Service: service, Env: env, Version: version}, config) +} + +func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client, error) { config = defaultConfig(config) if err := config.validateConfig(); err != nil { return nil, err } - tracerConfig := internal.TracerConfig{ - Service: service, - Env: env, - Version: version, - } - - writerConfig, err := NewWriterConfig(config, tracerConfig) + writerConfig, err := newWriterConfig(config, tracerConfig) if err != nil { return nil, err } diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go new file mode 100644 index 0000000000..c30afe4b7a --- /dev/null +++ b/internal/newtelemetry/client_test.go @@ -0,0 +1,125 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +func TestNewClient(t *testing.T) { + for _, test := range []struct { + name string + tracerConfig internal.TracerConfig + clientConfig ClientConfig + newErr string + }{ + { + name: "empty service", + tracerConfig: internal.TracerConfig{ + Service: "", + Env: "test-env", + Version: "1.0.0", + }, + newErr: "service name must not be empty", + }, + { + name: "empty environment", + tracerConfig: internal.TracerConfig{ + Service: "test-service", + Env: "", + Version: "1.0.0", + }, + newErr: "environment name must not be empty", + }, + { + name: "empty version", + tracerConfig: internal.TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "", + }, + newErr: "version must not be empty", + }, + } { + t.Run(test.name, func(t *testing.T) { + c, err := NewClient(test.tracerConfig.Service, test.tracerConfig.Env, test.tracerConfig.Version, test.clientConfig) + if err == nil { + defer c.Close() + } + + if test.newErr == "" { + require.NoError(t, err) + } else { + require.ErrorContains(t, err, test.newErr) + } + }) + } +} + +type testWriter struct { + flush func(transport.Payload) +} + +func (w *testWriter) Flush(payloads transport.Payload) (int, error) { + w.flush(payloads) + return 1, nil +} + +func TestClient(t *testing.T) { + tracerConfig := internal.TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + } + for _, test := range []struct { + name string + clientConfig ClientConfig + when func(c *client) + expect func(*testing.T, transport.Payload) + }{ + { + name: "heartbeat", + clientConfig: ClientConfig{ + HeartbeatInterval: time.Nanosecond, + }, + expect: func(t *testing.T, payload transport.Payload) { + assert.Equal(t, payload.RequestType(), transport.RequestTypeAppHeartbeat) + }, + }, + } { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + test.clientConfig.AgentURL = "http://localhost:8126" + c, err := newClient(tracerConfig, test.clientConfig) + require.NoError(t, err) + defer c.Close() + + var writerFlushCalled bool + + c.writer = &testWriter{ + flush: func(payload transport.Payload) { + writerFlushCalled = true + if test.expect != nil { + test.expect(t, payload) + } + }, + } + + if test.when != nil { + test.when(c) + } + c.Flush() + require.Truef(t, writerFlushCalled, "expected writer.Flush() to be called") + }) + } +} diff --git a/internal/newtelemetry/config.go b/internal/newtelemetry/config.go index c8a52b4824..bf74b40e18 100644 --- a/internal/newtelemetry/config.go +++ b/internal/newtelemetry/config.go @@ -161,7 +161,7 @@ func defaultConfig(config ClientConfig) ClientConfig { return config } -func NewWriterConfig(config ClientConfig, tracerConfig internal.TracerConfig) (internal.WriterConfig, error) { +func newWriterConfig(config ClientConfig, tracerConfig internal.TracerConfig) (internal.WriterConfig, error) { endpoints := make([]*http.Request, 0, 2) if config.AgentURL != "" { baseURL, err := url.Parse(config.AgentURL) diff --git a/internal/newtelemetry/internal/mapper/default.go b/internal/newtelemetry/internal/mapper/default.go index 001b2a1bf5..f63d27c8ee 100644 --- a/internal/newtelemetry/internal/mapper/default.go +++ b/internal/newtelemetry/internal/mapper/default.go @@ -37,6 +37,10 @@ func (t *defaultMapper) Transform(payloads []transport.Payload) ([]transport.Pay type messageBatchReducer struct{} func (t *messageBatchReducer) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { + if len(payloads) <= 1 { + return payloads, t + } + messages := make([]transport.Message, len(payloads)) for _, payload := range payloads { messages = append(messages, transport.Message{ From 1baad87771046452c213e85aebcfd192db4c163d Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Wed, 22 Jan 2025 16:38:58 +0100 Subject: [PATCH 15/61] add more tests Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 5 +- internal/newtelemetry/client.go | 16 +- .../{config.go => client_config.go} | 0 internal/newtelemetry/client_test.go | 263 +++++++++++++++++- internal/newtelemetry/globalclient.go | 11 +- internal/newtelemetry/integration.go | 2 +- .../internal/mapper/app_closing.go | 7 +- .../internal/mapper/app_started.go | 17 +- .../newtelemetry/internal/mapper/default.go | 14 +- internal/newtelemetry/internal/ringbuffer.go | 18 +- 10 files changed, 310 insertions(+), 43 deletions(-) rename internal/newtelemetry/{config.go => client_config.go} (100%) diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index 8d0895eb7f..e6631c3330 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -50,7 +50,8 @@ type Integration struct { Name string // Version is the version of the integration/dependency that is being loaded. Version string - // Error is the error that occurred while loading the integration. + // Error is the error that occurred while loading the integration. If this field is specified, the integration is + // considered to be having been forcefully disabled because of the error. Error string } @@ -101,6 +102,6 @@ type Client interface { // appStart sends the telemetry necessary to signal that the app is starting. appStart() - // appStop sends the telemetry necessary to signal that the app is stopping and calls Close() + // appStop sends the telemetry necessary to signal that the app is stopping. appStop() } diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 10a41fe62a..8977033ede 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -187,7 +187,7 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { } nbBytesOfPayload, err := c.writer.Flush(payload) - if nbBytes > 0 { + if nbBytes > 0 && err != nil { nonFatalErros = append(nonFatalErros, err) err = nil } @@ -208,7 +208,11 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { } if len(nonFatalErros) > 0 { - log.Debug("non-fatal error while flushing telemetry data: %v", errors.Join(nonFatalErros...)) + err := "error" + if len(nonFatalErros) > 1 { + err = "errors" + } + log.Debug("non-fatal %s while flushing telemetry data: %v", err, errors.Join(nonFatalErros...)) } return nbBytes, nil @@ -217,19 +221,13 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { func (c *client) appStart() { c.flushMapperMu.Lock() defer c.flushMapperMu.Unlock() - - // Wrap the current flushMapper with the AppStartedMapper so we can add the app-started event to the payloads using available payloads at the time of the call one minute later c.flushMapper = mapper.NewAppStartedMapper(c.flushMapper) } func (c *client) appStop() { c.flushMapperMu.Lock() + defer c.flushMapperMu.Unlock() c.flushMapper = mapper.NewAppClosingMapper(c.flushMapper) - c.flushMapperMu.Unlock() - - // Flush locks the flushMapperMu mutex, so we need to call it outside the lock - c.Flush() - c.Close() } func (c *client) Close() error { diff --git a/internal/newtelemetry/config.go b/internal/newtelemetry/client_config.go similarity index 100% rename from internal/newtelemetry/config.go rename to internal/newtelemetry/client_config.go diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index c30afe4b7a..f03a5c7d4f 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -6,14 +6,17 @@ package newtelemetry import ( + "errors" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) func TestNewClient(t *testing.T) { @@ -75,7 +78,7 @@ func (w *testWriter) Flush(payloads transport.Payload) (int, error) { return 1, nil } -func TestClient(t *testing.T) { +func TestClientFlush(t *testing.T) { tracerConfig := internal.TracerConfig{ Service: "test-service", Env: "test-env", @@ -93,9 +96,267 @@ func TestClient(t *testing.T) { HeartbeatInterval: time.Nanosecond, }, expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppHeartbeat{}, payload) assert.Equal(t, payload.RequestType(), transport.RequestTypeAppHeartbeat) }, }, + { + name: "extended-heartbeat-config", + clientConfig: ClientConfig{ + HeartbeatInterval: time.Nanosecond, + }, + when: func(c *client) { + c.AddAppConfig("key", "value", types.OriginDefault) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppExtendedHeartbeat{}, payload) + heartbeat := payload.(transport.AppExtendedHeartbeat) + assert.Len(t, heartbeat.Configuration, 1) + assert.Equal(t, heartbeat.Configuration[0].Name, "key") + assert.Equal(t, heartbeat.Configuration[0].Value, "value") + assert.Equal(t, heartbeat.Configuration[0].Origin, types.OriginDefault) + }, + }, + { + name: "extended-heartbeat-integrations", + clientConfig: ClientConfig{ + HeartbeatInterval: time.Nanosecond, + }, + when: func(c *client) { + c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppExtendedHeartbeat{}, payload) + heartbeat := payload.(transport.AppExtendedHeartbeat) + assert.Len(t, heartbeat.Integrations, 1) + assert.Equal(t, heartbeat.Integrations[0].Name, "test-integration") + assert.Equal(t, heartbeat.Integrations[0].Version, "1.0.0") + }, + }, + { + name: "configuration-default", + when: func(c *client) { + c.AddAppConfig("key", "value", types.OriginDefault) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppClientConfigurationChange{}, payload) + config := payload.(transport.AppClientConfigurationChange) + assert.Len(t, config.Configuration, 1) + assert.Equal(t, config.Configuration[0].Name, "key") + assert.Equal(t, config.Configuration[0].Value, "value") + assert.Equal(t, config.Configuration[0].Origin, types.OriginDefault) + }, + }, + { + name: "configuration-default", + when: func(c *client) { + c.AddAppConfig("key", "value", types.OriginDefault) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppClientConfigurationChange{}, payload) + config := payload.(transport.AppClientConfigurationChange) + assert.Len(t, config.Configuration, 1) + assert.Equal(t, config.Configuration[0].Name, "key") + assert.Equal(t, config.Configuration[0].Value, "value") + assert.Equal(t, config.Configuration[0].Origin, types.OriginDefault) + }, + }, + { + name: "product-start", + when: func(c *client) { + c.ProductStarted("test-product") + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppProductChange{}, payload) + productChange := payload.(transport.AppProductChange) + assert.Len(t, productChange.Products, 1) + assert.True(t, productChange.Products[types.Namespace("test-product")].Enabled) + }, + }, + { + name: "product-start-error", + when: func(c *client) { + c.ProductStartError("test-product", errors.New("test-error")) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppProductChange{}, payload) + productChange := payload.(transport.AppProductChange) + assert.Len(t, productChange.Products, 1) + assert.False(t, productChange.Products[types.Namespace("test-product")].Enabled) + assert.Equal(t, "test-error", productChange.Products[types.Namespace("test-product")].Error.Message) + }, + }, + { + name: "product-stop", + when: func(c *client) { + c.ProductStopped("test-product") + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppProductChange{}, payload) + productChange := payload.(transport.AppProductChange) + assert.Len(t, productChange.Products, 1) + assert.False(t, productChange.Products[types.Namespace("test-product")].Enabled) + }, + }, + { + name: "integration", + when: func(c *client) { + c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppIntegrationChange{}, payload) + integrationChange := payload.(transport.AppIntegrationChange) + assert.Len(t, integrationChange.Integrations, 1) + assert.Equal(t, integrationChange.Integrations[0].Name, "test-integration") + assert.Equal(t, integrationChange.Integrations[0].Version, "1.0.0") + assert.True(t, integrationChange.Integrations[0].Enabled) + }, + }, + { + name: "integration-error", + when: func(c *client) { + c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0", Error: "test-error"}) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppIntegrationChange{}, payload) + integrationChange := payload.(transport.AppIntegrationChange) + assert.Len(t, integrationChange.Integrations, 1) + assert.Equal(t, integrationChange.Integrations[0].Name, "test-integration") + assert.Equal(t, integrationChange.Integrations[0].Version, "1.0.0") + assert.False(t, integrationChange.Integrations[0].Enabled) + assert.Equal(t, integrationChange.Integrations[0].Error, "test-error") + }, + }, + { + name: "product+integration", + when: func(c *client) { + c.ProductStarted("test-product") + c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.MessageBatch{}, payload) + batch := payload.(transport.MessageBatch) + for _, payload := range batch.Payload { + switch p := payload.Payload.(type) { + case transport.AppProductChange: + assert.Equal(t, transport.RequestTypeAppProductChange, payload.RequestType) + assert.Len(t, p.Products, 1) + assert.True(t, p.Products[types.Namespace("test-product")].Enabled) + case transport.AppIntegrationChange: + assert.Equal(t, transport.RequestTypeAppIntegrationsChange, payload.RequestType) + assert.Len(t, p.Integrations, 1) + assert.Equal(t, p.Integrations[0].Name, "test-integration") + assert.Equal(t, p.Integrations[0].Version, "1.0.0") + assert.True(t, p.Integrations[0].Enabled) + default: + t.Fatalf("unexpected payload type: %T", p) + } + } + }, + }, + { + name: "app-started", + when: func(c *client) { + c.appStart() + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppStarted{}, payload) + appStart := payload.(transport.AppStarted) + assert.Equal(t, appStart.InstallSignature.InstallID, globalconfig.InstrumentationInstallID()) + assert.Equal(t, appStart.InstallSignature.InstallType, globalconfig.InstrumentationInstallType()) + assert.Equal(t, appStart.InstallSignature.InstallTime, globalconfig.InstrumentationInstallTime()) + }, + }, + { + name: "app-started-with-product", + when: func(c *client) { + c.appStart() + c.ProductStarted("test-product") + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppStarted{}, payload) + appStart := payload.(transport.AppStarted) + assert.Equal(t, appStart.Products[types.Namespace("test-product")].Enabled, true) + }, + }, + { + name: "app-started-with-configuration", + when: func(c *client) { + c.appStart() + c.AddAppConfig("key", "value", types.OriginDefault) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppStarted{}, payload) + appStart := payload.(transport.AppStarted) + require.Len(t, appStart.Configuration, 1) + assert.Equal(t, appStart.Configuration[0].Name, "key") + assert.Equal(t, appStart.Configuration[0].Value, "value") + }, + }, + { + name: "app-started+integrations", + when: func(c *client) { + c.appStart() + c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.MessageBatch{}, payload) + batch := payload.(transport.MessageBatch) + + // Check AppStarted is the first payload in MessageBatch + assert.IsType(t, transport.AppStarted{}, batch.Payload[0].Payload) + + for _, payload := range batch.Payload { + switch p := payload.Payload.(type) { + case transport.AppStarted: + assert.Equal(t, transport.RequestTypeAppStarted, payload.RequestType) + case transport.AppIntegrationChange: + assert.Equal(t, transport.RequestTypeAppIntegrationsChange, payload.RequestType) + assert.Len(t, p.Integrations, 1) + assert.Equal(t, p.Integrations[0].Name, "test-integration") + assert.Equal(t, p.Integrations[0].Version, "1.0.0") + default: + t.Fatalf("unexpected payload type: %T", p) + } + } + }, + }, + { + name: "app-started+heartbeat", + clientConfig: ClientConfig{ + HeartbeatInterval: time.Nanosecond, + }, + when: func(c *client) { + c.appStart() + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.MessageBatch{}, payload) + batch := payload.(transport.MessageBatch) + + // Check AppStarted is the first payload in MessageBatch + assert.IsType(t, transport.AppStarted{}, batch.Payload[0].Payload) + + for _, payload := range batch.Payload { + switch p := payload.Payload.(type) { + case transport.AppStarted: + assert.Equal(t, transport.RequestTypeAppStarted, payload.RequestType) + case transport.AppHeartbeat: + assert.Equal(t, transport.RequestTypeAppHeartbeat, payload.RequestType) + default: + t.Fatalf("unexpected payload type: %T", p) + } + } + }, + }, + { + name: "app-stopped", + when: func(c *client) { + c.appStop() + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppClosing{}, payload) + }, + }, } { t.Run(test.name, func(t *testing.T) { t.Parallel() diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 7ce231fc2f..ce682cb701 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -17,14 +17,13 @@ var ( ) // StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global (*client). -func StartApp(client Client) error { - if Disabled() { - return nil +func StartApp(client Client) { + if Disabled() || globalClient.Load() != nil { + return } client.appStart() SwapClient(client) - return nil } // SwapClient swaps the global client with the given client and Flush the old (*client). @@ -40,12 +39,14 @@ func SwapClient(client Client) { // StopApp creates the app-stopped telemetry, adding to the queue and Flush all the queue before stopping the (*client). func StopApp() { - if Disabled() { + if Disabled() || globalClient.Load() == nil { return } if client := globalClient.Swap(nil); client != nil && *client != nil { (*client).appStop() + (*client).Flush() + (*client).Close() } } diff --git a/internal/newtelemetry/integration.go b/internal/newtelemetry/integration.go index e46bc1e4c9..c62fa6ec1d 100644 --- a/internal/newtelemetry/integration.go +++ b/internal/newtelemetry/integration.go @@ -34,7 +34,7 @@ func (i *integrations) Payload() transport.Payload { integrations[idx] = transport.Integration{ Name: integration.Name, Version: integration.Version, - Enabled: true, + Enabled: integration.Error == "", // no error means the integration was enabled successfully Error: integration.Error, } } diff --git a/internal/newtelemetry/internal/mapper/app_closing.go b/internal/newtelemetry/internal/mapper/app_closing.go index b6ce349453..c3d1fd6b14 100644 --- a/internal/newtelemetry/internal/mapper/app_closing.go +++ b/internal/newtelemetry/internal/mapper/app_closing.go @@ -9,14 +9,15 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) +// NewAppClosingMapper returns a new Mapper that appends an AppClosing payload to the given payloads and calls the underlying Mapper with it. func NewAppClosingMapper(underlying Mapper) Mapper { - return &appClosing{wrapper{underlying}} + return &appClosingEnricher{wrapper{underlying}} } -type appClosing struct { +type appClosingEnricher struct { wrapper } -func (t *appClosing) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { +func (t *appClosingEnricher) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { return t.wrapper.Transform(append(payloads, transport.AppClosing{})) } diff --git a/internal/newtelemetry/internal/mapper/app_started.go b/internal/newtelemetry/internal/mapper/app_started.go index cbdb257ff0..169bfb3b6c 100644 --- a/internal/newtelemetry/internal/mapper/app_started.go +++ b/internal/newtelemetry/internal/mapper/app_started.go @@ -10,16 +10,19 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) -type appStarted struct { +type appStartedReducer struct { wrapper } +// NewAppStartedMapper returns a new Mapper that adds an AppStarted payload to the beginning of all payloads +// and pass it down to irs underlying mapper. +// The AppStarted payload ingest the [transport.AppClientConfigurationChange] and [transport.AppProductChange] payloads func NewAppStartedMapper(underlying Mapper) Mapper { - return &appStarted{wrapper{underlying}} + return &appStartedReducer{wrapper{underlying}} } -func (t *appStarted) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { - appStarted := &transport.AppStarted{ +func (t *appStartedReducer) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { + appStarted := transport.AppStarted{ InstallSignature: transport.InstallSignature{ InstallID: globalconfig.InstrumentationInstallID(), InstallType: globalconfig.InstrumentationInstallType(), @@ -39,8 +42,6 @@ func (t *appStarted) Transform(payloads []transport.Payload) ([]transport.Payloa } } - // Following the documentation, an app-started payload cannot be put in a message-batch one so we don't forward it to the next transformation, - // and we also need to put the app-started payload at the beginning of the slice - mappedPayloads, nextMapper := t.wrapper.Transform(payloadLefts) - return append([]transport.Payload{appStarted}, mappedPayloads...), nextMapper + // The app-started event should be the first event in the payload + return t.wrapper.Transform(append([]transport.Payload{appStarted}, payloadLefts...)) } diff --git a/internal/newtelemetry/internal/mapper/default.go b/internal/newtelemetry/internal/mapper/default.go index f63d27c8ee..f069880369 100644 --- a/internal/newtelemetry/internal/mapper/default.go +++ b/internal/newtelemetry/internal/mapper/default.go @@ -16,11 +16,15 @@ import ( // NewDefaultMapper returns a Mapper that transforms payloads into a MessageBatch and adds a heartbeat message. // The heartbeat message is added every heartbeatInterval. func NewDefaultMapper(heartbeatInterval time.Duration) Mapper { - return &defaultMapper{ + mapper := &defaultMapper{ heartbeatEnricher: heartbeatEnricher{ rateLimiter: rate.NewLimiter(rate.Every(heartbeatInterval), 1), }, } + + // The rate limiter is initialized with a token, but we want the first heartbeat to be sent in one minute, so we consume the token + mapper.heartbeatEnricher.rateLimiter.Allow() + return mapper } type defaultMapper struct { @@ -42,11 +46,11 @@ func (t *messageBatchReducer) Transform(payloads []transport.Payload) ([]transpo } messages := make([]transport.Message, len(payloads)) - for _, payload := range payloads { - messages = append(messages, transport.Message{ - Payload: payload, + for i, payload := range payloads { + messages[i] = transport.Message{ RequestType: payload.RequestType(), - }) + Payload: payload, + } } return []transport.Payload{transport.MessageBatch{Payload: messages}}, t diff --git a/internal/newtelemetry/internal/ringbuffer.go b/internal/newtelemetry/internal/ringbuffer.go index e472944878..8c0d3800a8 100644 --- a/internal/newtelemetry/internal/ringbuffer.go +++ b/internal/newtelemetry/internal/ringbuffer.go @@ -9,6 +9,7 @@ import ( "sync" ) +// RingQueue is a thread-safe ring buffer can be used to store a fixed number of elements and overwrite old values when full. type RingQueue[T any] struct { // buffer is the slice that contains the data. buffer []T @@ -23,6 +24,7 @@ type RingQueue[T any] struct { maxBufferSize int } +// NewRingQueue creates a new RingQueue with a minimum size and a maximum size. func NewRingQueue[T any](minSize, maxSize int) *RingQueue[T] { return &RingQueue[T]{ buffer: make([]T, minSize), @@ -33,24 +35,22 @@ func NewRingQueue[T any](minSize, maxSize int) *RingQueue[T] { } } -// Enqueue adds a value to the buffer. +// Enqueue adds one or multiple values to the buffer. returns false if the buffer is full. func (rb *RingQueue[T]) Enqueue(vals ...T) bool { + rb.mu.Lock() + defer rb.mu.Unlock() + spaceLeft := true for _, val := range vals { - if !rb.enqueueLocked(val) { - return false - } + spaceLeft = rb.enqueueLocked(val) } - return true + return spaceLeft } func (rb *RingQueue[T]) enqueueLocked(val T) bool { - rb.mu.Lock() - defer rb.mu.Unlock() - rb.buffer[rb.tail] = val rb.tail = (rb.tail + 1) % len(rb.buffer) - if rb.tail == rb.head && len(rb.buffer) == rb.maxBufferSize { // We loose one element + if rb.tail == rb.head && len(rb.buffer) == rb.maxBufferSize { // We lost one element rb.head = (rb.head + 1) % len(rb.buffer) return false } From f7e7ff7fe458110083965757b0ef8fa000067734 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Wed, 22 Jan 2025 18:33:21 +0100 Subject: [PATCH 16/61] add end 2 end tests and body unmarshaling for testing purposes Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client_test.go | 144 ++++++++++++++++++ .../newtelemetry/internal/transport/body.go | 106 +++++++++++++ 2 files changed, 250 insertions(+) diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index f03a5c7d4f..7335d07803 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -6,7 +6,10 @@ package newtelemetry import ( + "encoding/json" "errors" + "net/http" + "runtime" "testing" "time" @@ -17,6 +20,8 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" + "gopkg.in/DataDog/dd-trace-go.v1/internal/osinfo" + "gopkg.in/DataDog/dd-trace-go.v1/internal/version" ) func TestNewClient(t *testing.T) { @@ -384,3 +389,142 @@ func TestClientFlush(t *testing.T) { }) } } + +type testRoundTripper struct { + roundTrip func(*http.Request) (*http.Response, error) +} + +func (t *testRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + return t.roundTrip(req) +} + +func TestClientEnd2End(t *testing.T) { + tracerConfig := internal.TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + } + clientConfig := ClientConfig{ + AgentURL: "http://localhost:8126", + HTTPClient: &http.Client{ + Timeout: 5 * time.Second, + }, + Debug: true, + } + + parseRequest := func(t *testing.T, request *http.Request) transport.Body { + assert.Equal(t, "v2", request.Header.Get("DD-Telemetry-API-Version")) + assert.Equal(t, "application/json", request.Header.Get("Content-Type")) + assert.Equal(t, "go", request.Header.Get("DD-Client-Library-Language")) + assert.Equal(t, "test-env", request.Header.Get("DD-Agent-Env")) + assert.Equal(t, version.Tag, request.Header.Get("DD-Client-Library-Version")) + assert.Equal(t, globalconfig.InstrumentationInstallID(), request.Header.Get("DD-Agent-Install-Id")) + assert.Equal(t, globalconfig.InstrumentationInstallType(), request.Header.Get("DD-Agent-Install-Type")) + assert.Equal(t, globalconfig.InstrumentationInstallTime(), request.Header.Get("DD-Agent-Install-Time")) + assert.Equal(t, "true", request.Header.Get("DD-Telemetry-Debug-Enabled")) + + assert.NotEmpty(t, request.Header.Get("DD-Agent-Hostname")) + + var body transport.Body + require.NoError(t, json.NewDecoder(request.Body).Decode(&body)) + + assert.Equal(t, "test-service", body.Application.ServiceName) + assert.Equal(t, "test-env", body.Application.Env) + assert.Equal(t, "1.0.0", body.Application.ServiceVersion) + assert.Equal(t, "go", body.Application.LanguageName) + assert.Equal(t, runtime.Version(), body.Application.LanguageVersion) + + assert.NotEmpty(t, body.Host.Hostname) + assert.Equal(t, osinfo.OSName(), body.Host.OS) + assert.Equal(t, osinfo.OSVersion(), body.Host.OSVersion) + assert.Equal(t, osinfo.Architecture(), body.Host.Architecture) + assert.Equal(t, osinfo.KernelName(), body.Host.KernelName) + assert.Equal(t, osinfo.KernelRelease(), body.Host.KernelRelease) + assert.Equal(t, osinfo.KernelVersion(), body.Host.KernelVersion) + + assert.Equal(t, true, body.Debug) + assert.Equal(t, "v2", body.APIVersion) + assert.NotZero(t, body.TracerTime) + assert.EqualValues(t, 1, body.SeqID) + assert.Equal(t, globalconfig.RuntimeID(), body.RuntimeID) + + return body + } + + for _, test := range []struct { + name string + when func(*client) + expect func(*testing.T, *http.Request) (*http.Response, error) + }{ + { + name: "app-start", + when: func(c *client) { + c.appStart() + }, + expect: func(t *testing.T, request *http.Request) (*http.Response, error) { + assert.EqualValues(t, transport.RequestTypeAppStarted, request.Header.Get("DD-Telemetry-Request-Type")) + body := parseRequest(t, request) + assert.Equal(t, transport.RequestTypeAppStarted, body.RequestType) + return &http.Response{ + StatusCode: http.StatusOK, + }, nil + }, + }, + { + name: "app-stop", + when: func(c *client) { + c.appStop() + }, + expect: func(t *testing.T, request *http.Request) (*http.Response, error) { + assert.EqualValues(t, transport.RequestTypeAppClosing, request.Header.Get("DD-Telemetry-Request-Type")) + body := parseRequest(t, request) + assert.Equal(t, transport.RequestTypeAppClosing, body.RequestType) + return &http.Response{ + StatusCode: http.StatusOK, + }, nil + }, + }, + { + name: "app-start+app-stop", + when: func(c *client) { + c.appStart() + c.appStop() + }, + expect: func(t *testing.T, request *http.Request) (*http.Response, error) { + assert.EqualValues(t, transport.RequestTypeMessageBatch, request.Header.Get("DD-Telemetry-Request-Type")) + body := parseRequest(t, request) + assert.Equal(t, transport.RequestTypeMessageBatch, body.RequestType) + + payload := body.Payload.(transport.MessageBatch) + assert.Len(t, payload.Payload, 2) + assert.Equal(t, transport.RequestTypeAppStarted, payload.Payload[0].RequestType) + assert.Equal(t, transport.RequestTypeAppClosing, payload.Payload[1].RequestType) + + return &http.Response{ + StatusCode: http.StatusOK, + }, nil + }, + }, + } { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + clientConfig.HTTPClient.Transport = &testRoundTripper{ + roundTrip: func(req *http.Request) (*http.Response, error) { + if test.expect != nil { + return test.expect(t, req) + } + return &http.Response{ + StatusCode: http.StatusOK, + }, nil + }, + } + + c, err := newClient(tracerConfig, clientConfig) + require.NoError(t, err) + defer c.Close() + + test.when(c) + c.Flush() + }) + } +} diff --git a/internal/newtelemetry/internal/transport/body.go b/internal/newtelemetry/internal/transport/body.go index 1015df61e5..b34f673a23 100644 --- a/internal/newtelemetry/internal/transport/body.go +++ b/internal/newtelemetry/internal/transport/body.go @@ -5,6 +5,11 @@ package transport +import ( + "encoding/json" + "fmt" +) + // Application is identifying information about the app itself type Application struct { ServiceName string `json:"service_name"` @@ -40,3 +45,104 @@ type Body struct { Application Application `json:"application"` Host Host `json:"host"` } + +func (b *Body) UnmarshalJSON(bytes []byte) error { + var anyMap map[string]json.RawMessage + var err error + if err = json.Unmarshal(bytes, &anyMap); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["api_version"], &b.APIVersion); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["request_type"], &b.RequestType); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["tracer_time"], &b.TracerTime); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["runtime_id"], &b.RuntimeID); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["seq_id"], &b.SeqID); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["debug"], &b.Debug); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["application"], &b.Application); err != nil { + return err + } + + if err = json.Unmarshal(anyMap["host"], &b.Host); err != nil { + return err + } + + if b.RequestType == RequestTypeMessageBatch { + var messageBatch struct { + Payload []struct { + RequestType RequestType `json:"request_type"` + Payload json.RawMessage `json:"payload"` + } `json:"payload"` + } + + if err = json.Unmarshal(anyMap["payload"], &messageBatch); err != nil { + return err + } + + batch := make([]Message, len(messageBatch.Payload)) + for i, message := range messageBatch.Payload { + payload, err := unmarshalPayload(message.Payload, message.RequestType) + if err != nil { + return err + } + batch[i] = Message{RequestType: message.RequestType, Payload: payload} + } + b.Payload = MessageBatch{Payload: batch} + return nil + } + + b.Payload, err = unmarshalPayload(anyMap["payload"], b.RequestType) + return err +} + +func unmarshalPayload(bytes json.RawMessage, requestType RequestType) (Payload, error) { + var payload Payload + switch requestType { + case RequestTypeAppClientConfigurationChange: + payload = new(AppClientConfigurationChange) + case RequestTypeAppProductChange: + payload = new(AppProductChange) + case RequestTypeAppIntegrationsChange: + payload = new(AppIntegrationChange) + case RequestTypeAppHeartbeat: + payload = new(AppHeartbeat) + case RequestTypeAppStarted: + payload = new(AppStarted) + case RequestTypeAppClosing: + payload = new(AppClosing) + case RequestTypeAppExtendedHeartBeat: + payload = new(AppExtendedHeartbeat) + case RequestTypeAppDependenciesLoaded: + payload = new(AppDependenciesLoaded) + case RequestTypeDistributions: + payload = new(Distributions) + case RequestTypeGenerateMetrics: + payload = new(GenerateMetrics) + case RequestTypeLogs: + payload = new(Logs) + } + + if err := json.Unmarshal(bytes, payload); err != nil { + return nil, fmt.Errorf("failed to unmarshal payload: %v", err) + } + + return payload, nil +} From 2b2e13b1141ace5b49f322889f4bd1e5fabd8ada Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 23 Jan 2025 15:15:04 +0100 Subject: [PATCH 17/61] add actionQueue for calls because NewClient() is done Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 5 ++-- internal/newtelemetry/globalclient.go | 33 ++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 8977033ede..f9feee657f 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -85,10 +85,9 @@ type client struct { Payload() transport.Payload } - flushTicker *internal.Ticker - heartbeatTicker *internal.Ticker + flushTicker *internal.Ticker - // flushMapper is the transformer to use for the next flush on the gather payloads on this tick + // flushMapper is the transformer to use for the next flush on the gathered payloads on this tick flushMapper mapper.Mapper flushMapperMu sync.Mutex diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index ce682cb701..138a8df227 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -9,11 +9,15 @@ import ( "sync/atomic" globalinternal "gopkg.in/DataDog/dd-trace-go.v1/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) var ( globalClient atomic.Pointer[Client] + + // actionQueue contains all actions done on the global client done before StartApp() with an actual client object is called + actionQueue = internal.NewRingQueue[func(Client)](16, 512) ) // StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global (*client). @@ -22,8 +26,15 @@ func StartApp(client Client) { return } - client.appStart() SwapClient(client) + + actions := actionQueue.GetBuffer() + defer actionQueue.ReleaseBuffer(actions) + for _, action := range actions { + action(client) + } + + client.appStart() } // SwapClient swaps the global client with the given client and Flush the old (*client). @@ -129,6 +140,10 @@ func ProductStarted(product types.Namespace) { if client := globalClient.Load(); client != nil && *client != nil { (*client).ProductStarted(product) + } else { + actionQueue.Enqueue(func(client Client) { + client.ProductStarted(product) + }) } } @@ -140,6 +155,10 @@ func ProductStopped(product types.Namespace) { if client := globalClient.Load(); client != nil && *client != nil { (*client).ProductStopped(product) + } else { + actionQueue.Enqueue(func(client Client) { + client.ProductStopped(product) + }) } } @@ -151,6 +170,10 @@ func ProductStartError(product types.Namespace, err error) { if client := globalClient.Load(); client != nil && *client != nil { (*client).ProductStartError(product, err) + } else { + actionQueue.Enqueue(func(client Client) { + client.ProductStartError(product, err) + }) } } @@ -163,6 +186,10 @@ func AddAppConfig(key string, value any, origin types.Origin) { if client := globalClient.Load(); client != nil && *client != nil { (*client).AddAppConfig(key, value, origin) + } else { + actionQueue.Enqueue(func(client Client) { + client.AddAppConfig(key, value, origin) + }) } } @@ -175,5 +202,9 @@ func AddBulkAppConfig(kvs map[string]any, origin types.Origin) { if client := globalClient.Load(); client != nil && *client != nil { (*client).AddBulkAppConfig(kvs, origin) + } else { + actionQueue.Enqueue(func(client Client) { + client.AddBulkAppConfig(kvs, origin) + }) } } From 70adebc08bb7a558f5d2e93c46cb3b673d118b8d Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Fri, 24 Jan 2025 16:50:48 +0100 Subject: [PATCH 18/61] fix app-extended-heartbeat payload way of being sent each 24 hours Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 2 +- internal/newtelemetry/client_config.go | 10 + internal/newtelemetry/client_test.go | 172 +++++++++++------- .../internal/mapper/app_started.go | 5 +- .../newtelemetry/internal/mapper/default.go | 52 +++--- 5 files changed, 147 insertions(+), 94 deletions(-) diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index f9feee657f..f59039f855 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -53,7 +53,7 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client tracerConfig: tracerConfig, writer: writer, clientConfig: config, - flushMapper: mapper.NewDefaultMapper(config.HeartbeatInterval), + flushMapper: mapper.NewDefaultMapper(config.HeartbeatInterval, config.ExtendedHeartbeatInterval), // This means that, by default, we incur dataloss if we spend ~30mins without flushing, considering we send telemetry data this looks reasonable. // This also means that in the worst case scenario, memory-wise, the app is stabilized after running for 30mins. payloadQueue: internal.NewRingQueue[transport.Payload](4, 32), diff --git a/internal/newtelemetry/client_config.go b/internal/newtelemetry/client_config.go index bf74b40e18..b9ff80ca37 100644 --- a/internal/newtelemetry/client_config.go +++ b/internal/newtelemetry/client_config.go @@ -47,6 +47,9 @@ type ClientConfig struct { // The maximum value is 60s. HeartbeatInterval time.Duration + // ExtendedHeartbeatInterval is the interval at which to send an extended heartbeat payload, defaults to 24h. + ExtendedHeartbeatInterval time.Duration + // FlushIntervalRange is the interval at which the client flushes the data. // By default, the client will start to Flush at 60s intervals and will reduce the interval based on the load till it hit 15s // Both values cannot be higher than 60s because the heartbeat need to be sent at least every 60s. @@ -75,6 +78,9 @@ const ( // defaultHeartbeatInterval is the default interval at which the agent sends a heartbeat. defaultHeartbeatInterval = 60 // seconds + // defaultExtendedHeartbeatInterval is the default interval at which the agent sends an extended heartbeat. + defaultExtendedHeartbeatInterval = 24 * time.Hour + // defaultMinFlushInterval is the default interval at which the client flushes the data. defaultMinFlushInterval = 15.0 * time.Second @@ -158,6 +164,10 @@ func defaultConfig(config ClientConfig) ClientConfig { config.EarlyFlushPayloadSize = defaultEarlyFlushPayloadSize } + if config.ExtendedHeartbeatInterval == 0 { + config.ExtendedHeartbeatInterval = defaultExtendedHeartbeatInterval + } + return config } diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 7335d07803..9885cd89d5 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -108,34 +108,40 @@ func TestClientFlush(t *testing.T) { { name: "extended-heartbeat-config", clientConfig: ClientConfig{ - HeartbeatInterval: time.Nanosecond, + HeartbeatInterval: time.Nanosecond, + ExtendedHeartbeatInterval: time.Nanosecond, }, when: func(c *client) { c.AddAppConfig("key", "value", types.OriginDefault) }, expect: func(t *testing.T, payload transport.Payload) { - require.IsType(t, transport.AppExtendedHeartbeat{}, payload) - heartbeat := payload.(transport.AppExtendedHeartbeat) - assert.Len(t, heartbeat.Configuration, 1) - assert.Equal(t, heartbeat.Configuration[0].Name, "key") - assert.Equal(t, heartbeat.Configuration[0].Value, "value") - assert.Equal(t, heartbeat.Configuration[0].Origin, types.OriginDefault) + require.IsType(t, transport.MessageBatch{}, payload) + batch := payload.(transport.MessageBatch) + require.Len(t, batch.Payload, 2) + assert.Equal(t, transport.RequestTypeAppClientConfigurationChange, batch.Payload[0].RequestType) + assert.Equal(t, transport.RequestTypeAppExtendedHeartBeat, batch.Payload[1].RequestType) + + assert.Len(t, batch.Payload[1].Payload.(transport.AppExtendedHeartbeat).Configuration, 0) }, }, { name: "extended-heartbeat-integrations", clientConfig: ClientConfig{ - HeartbeatInterval: time.Nanosecond, + HeartbeatInterval: time.Nanosecond, + ExtendedHeartbeatInterval: time.Nanosecond, }, when: func(c *client) { c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) }, expect: func(t *testing.T, payload transport.Payload) { - require.IsType(t, transport.AppExtendedHeartbeat{}, payload) - heartbeat := payload.(transport.AppExtendedHeartbeat) - assert.Len(t, heartbeat.Integrations, 1) - assert.Equal(t, heartbeat.Integrations[0].Name, "test-integration") - assert.Equal(t, heartbeat.Integrations[0].Version, "1.0.0") + require.IsType(t, transport.MessageBatch{}, payload) + batch := payload.(transport.MessageBatch) + require.Len(t, batch.Payload, 2) + assert.Equal(t, transport.RequestTypeAppIntegrationsChange, batch.Payload[0].RequestType) + assert.Equal(t, transport.RequestTypeAppExtendedHeartBeat, batch.Payload[1].RequestType) + assert.Len(t, batch.Payload[1].Payload.(transport.AppExtendedHeartbeat).Integrations, 1) + assert.Equal(t, batch.Payload[1].Payload.(transport.AppExtendedHeartbeat).Integrations[0].Name, "test-integration") + assert.Equal(t, batch.Payload[1].Payload.(transport.AppExtendedHeartbeat).Integrations[0].Version, "1.0.0") }, }, { @@ -241,6 +247,7 @@ func TestClientFlush(t *testing.T) { expect: func(t *testing.T, payload transport.Payload) { require.IsType(t, transport.MessageBatch{}, payload) batch := payload.(transport.MessageBatch) + assert.Len(t, batch.Payload, 2) for _, payload := range batch.Payload { switch p := payload.Payload.(type) { case transport.AppProductChange: @@ -259,6 +266,39 @@ func TestClientFlush(t *testing.T) { } }, }, + { + name: "product+integration+heartbeat", + clientConfig: ClientConfig{ + HeartbeatInterval: time.Nanosecond, + }, + when: func(c *client) { + c.ProductStarted("test-product") + c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.MessageBatch{}, payload) + batch := payload.(transport.MessageBatch) + assert.Len(t, batch.Payload, 3) + for _, payload := range batch.Payload { + switch p := payload.Payload.(type) { + case transport.AppProductChange: + assert.Equal(t, transport.RequestTypeAppProductChange, payload.RequestType) + assert.Len(t, p.Products, 1) + assert.True(t, p.Products[types.Namespace("test-product")].Enabled) + case transport.AppIntegrationChange: + assert.Equal(t, transport.RequestTypeAppIntegrationsChange, payload.RequestType) + assert.Len(t, p.Integrations, 1) + assert.Equal(t, p.Integrations[0].Name, "test-integration") + assert.Equal(t, p.Integrations[0].Version, "1.0.0") + assert.True(t, p.Integrations[0].Enabled) + case transport.AppHeartbeat: + assert.Equal(t, transport.RequestTypeAppHeartbeat, payload.RequestType) + default: + t.Fatalf("unexpected payload type: %T", p) + } + } + }, + }, { name: "app-started", when: func(c *client) { @@ -305,24 +345,17 @@ func TestClientFlush(t *testing.T) { c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) }, expect: func(t *testing.T, payload transport.Payload) { - require.IsType(t, transport.MessageBatch{}, payload) - batch := payload.(transport.MessageBatch) - - // Check AppStarted is the first payload in MessageBatch - assert.IsType(t, transport.AppStarted{}, batch.Payload[0].Payload) - - for _, payload := range batch.Payload { - switch p := payload.Payload.(type) { - case transport.AppStarted: - assert.Equal(t, transport.RequestTypeAppStarted, payload.RequestType) - case transport.AppIntegrationChange: - assert.Equal(t, transport.RequestTypeAppIntegrationsChange, payload.RequestType) - assert.Len(t, p.Integrations, 1) - assert.Equal(t, p.Integrations[0].Name, "test-integration") - assert.Equal(t, p.Integrations[0].Version, "1.0.0") - default: - t.Fatalf("unexpected payload type: %T", p) - } + switch p := payload.(type) { + case transport.AppStarted: + assert.Equal(t, globalconfig.InstrumentationInstallID(), p.InstallSignature.InstallID) + assert.Equal(t, globalconfig.InstrumentationInstallType(), p.InstallSignature.InstallType) + assert.Equal(t, globalconfig.InstrumentationInstallTime(), p.InstallSignature.InstallTime) + case transport.AppIntegrationChange: + assert.Len(t, p.Integrations, 1) + assert.Equal(t, p.Integrations[0].Name, "test-integration") + assert.Equal(t, p.Integrations[0].Version, "1.0.0") + default: + t.Fatalf("unexpected payload type: %T", p) } }, }, @@ -335,21 +368,14 @@ func TestClientFlush(t *testing.T) { c.appStart() }, expect: func(t *testing.T, payload transport.Payload) { - require.IsType(t, transport.MessageBatch{}, payload) - batch := payload.(transport.MessageBatch) - - // Check AppStarted is the first payload in MessageBatch - assert.IsType(t, transport.AppStarted{}, batch.Payload[0].Payload) - - for _, payload := range batch.Payload { - switch p := payload.Payload.(type) { - case transport.AppStarted: - assert.Equal(t, transport.RequestTypeAppStarted, payload.RequestType) - case transport.AppHeartbeat: - assert.Equal(t, transport.RequestTypeAppHeartbeat, payload.RequestType) - default: - t.Fatalf("unexpected payload type: %T", p) - } + switch p := payload.(type) { + case transport.AppStarted: + assert.Equal(t, globalconfig.InstrumentationInstallID(), p.InstallSignature.InstallID) + assert.Equal(t, globalconfig.InstrumentationInstallType(), p.InstallSignature.InstallType) + assert.Equal(t, globalconfig.InstrumentationInstallTime(), p.InstallSignature.InstallTime) + case transport.AppHeartbeat: + default: + t.Fatalf("unexpected payload type: %T", p) } }, }, @@ -404,13 +430,6 @@ func TestClientEnd2End(t *testing.T) { Env: "test-env", Version: "1.0.0", } - clientConfig := ClientConfig{ - AgentURL: "http://localhost:8126", - HTTPClient: &http.Client{ - Timeout: 5 * time.Second, - }, - Debug: true, - } parseRequest := func(t *testing.T, request *http.Request) transport.Body { assert.Equal(t, "v2", request.Header.Get("DD-Telemetry-API-Version")) @@ -428,6 +447,7 @@ func TestClientEnd2End(t *testing.T) { var body transport.Body require.NoError(t, json.NewDecoder(request.Body).Decode(&body)) + assert.Equal(t, string(body.RequestType), request.Header.Get("DD-Telemetry-Request-Type")) assert.Equal(t, "test-service", body.Application.ServiceName) assert.Equal(t, "test-env", body.Application.Env) assert.Equal(t, "1.0.0", body.Application.ServiceVersion) @@ -445,7 +465,7 @@ func TestClientEnd2End(t *testing.T) { assert.Equal(t, true, body.Debug) assert.Equal(t, "v2", body.APIVersion) assert.NotZero(t, body.TracerTime) - assert.EqualValues(t, 1, body.SeqID) + assert.LessOrEqual(t, int64(1), body.SeqID) assert.Equal(t, globalconfig.RuntimeID(), body.RuntimeID) return body @@ -462,7 +482,7 @@ func TestClientEnd2End(t *testing.T) { c.appStart() }, expect: func(t *testing.T, request *http.Request) (*http.Response, error) { - assert.EqualValues(t, transport.RequestTypeAppStarted, request.Header.Get("DD-Telemetry-Request-Type")) + assert.Equal(t, string(transport.RequestTypeAppStarted), request.Header.Get("DD-Telemetry-Request-Type")) body := parseRequest(t, request) assert.Equal(t, transport.RequestTypeAppStarted, body.RequestType) return &http.Response{ @@ -476,7 +496,7 @@ func TestClientEnd2End(t *testing.T) { c.appStop() }, expect: func(t *testing.T, request *http.Request) (*http.Response, error) { - assert.EqualValues(t, transport.RequestTypeAppClosing, request.Header.Get("DD-Telemetry-Request-Type")) + assert.Equal(t, string(transport.RequestTypeAppClosing), request.Header.Get("DD-Telemetry-Request-Type")) body := parseRequest(t, request) assert.Equal(t, transport.RequestTypeAppClosing, body.RequestType) return &http.Response{ @@ -491,14 +511,19 @@ func TestClientEnd2End(t *testing.T) { c.appStop() }, expect: func(t *testing.T, request *http.Request) (*http.Response, error) { - assert.EqualValues(t, transport.RequestTypeMessageBatch, request.Header.Get("DD-Telemetry-Request-Type")) body := parseRequest(t, request) - assert.Equal(t, transport.RequestTypeMessageBatch, body.RequestType) - payload := body.Payload.(transport.MessageBatch) - assert.Len(t, payload.Payload, 2) - assert.Equal(t, transport.RequestTypeAppStarted, payload.Payload[0].RequestType) - assert.Equal(t, transport.RequestTypeAppClosing, payload.Payload[1].RequestType) + switch request.Header.Get("DD-Telemetry-Request-Type") { + case string(transport.RequestTypeAppStarted): + payload := body.Payload.(*transport.AppStarted) + assert.Equal(t, globalconfig.InstrumentationInstallID(), payload.InstallSignature.InstallID) + assert.Equal(t, globalconfig.InstrumentationInstallType(), payload.InstallSignature.InstallType) + assert.Equal(t, globalconfig.InstrumentationInstallTime(), payload.InstallSignature.InstallTime) + case string(transport.RequestTypeAppClosing): + + default: + t.Fatalf("unexpected request type: %s", request.Header.Get("DD-Telemetry-Request-Type")) + } return &http.Response{ StatusCode: http.StatusOK, @@ -508,15 +533,22 @@ func TestClientEnd2End(t *testing.T) { } { t.Run(test.name, func(t *testing.T) { t.Parallel() - clientConfig.HTTPClient.Transport = &testRoundTripper{ - roundTrip: func(req *http.Request) (*http.Response, error) { - if test.expect != nil { - return test.expect(t, req) - } - return &http.Response{ - StatusCode: http.StatusOK, - }, nil + clientConfig := ClientConfig{ + AgentURL: "http://localhost:8126", + HTTPClient: &http.Client{ + Timeout: 5 * time.Second, + Transport: &testRoundTripper{ + roundTrip: func(req *http.Request) (*http.Response, error) { + if test.expect != nil { + return test.expect(t, req) + } + return &http.Response{ + StatusCode: http.StatusOK, + }, nil + }, + }, }, + Debug: true, } c, err := newClient(tracerConfig, clientConfig) diff --git a/internal/newtelemetry/internal/mapper/app_started.go b/internal/newtelemetry/internal/mapper/app_started.go index 169bfb3b6c..395a401196 100644 --- a/internal/newtelemetry/internal/mapper/app_started.go +++ b/internal/newtelemetry/internal/mapper/app_started.go @@ -42,6 +42,7 @@ func (t *appStartedReducer) Transform(payloads []transport.Payload) ([]transport } } - // The app-started event should be the first event in the payload - return t.wrapper.Transform(append([]transport.Payload{appStarted}, payloadLefts...)) + // The app-started event should be the first event in the payload and not in an message-batch + payloads, mapper := t.wrapper.Transform(payloadLefts) + return append([]transport.Payload{appStarted}, payloads...), mapper } diff --git a/internal/newtelemetry/internal/mapper/default.go b/internal/newtelemetry/internal/mapper/default.go index f069880369..5007469f70 100644 --- a/internal/newtelemetry/internal/mapper/default.go +++ b/internal/newtelemetry/internal/mapper/default.go @@ -15,15 +15,17 @@ import ( // NewDefaultMapper returns a Mapper that transforms payloads into a MessageBatch and adds a heartbeat message. // The heartbeat message is added every heartbeatInterval. -func NewDefaultMapper(heartbeatInterval time.Duration) Mapper { +func NewDefaultMapper(heartbeatInterval, extendedHeartBeatInterval time.Duration) Mapper { mapper := &defaultMapper{ heartbeatEnricher: heartbeatEnricher{ - rateLimiter: rate.NewLimiter(rate.Every(heartbeatInterval), 1), + RL: rate.NewLimiter(rate.Every(heartbeatInterval), 1), + extendedHeartbeatRL: rate.NewLimiter(rate.Every(extendedHeartBeatInterval), 1), }, } // The rate limiter is initialized with a token, but we want the first heartbeat to be sent in one minute, so we consume the token - mapper.heartbeatEnricher.rateLimiter.Allow() + mapper.heartbeatEnricher.RL.Allow() + mapper.heartbeatEnricher.extendedHeartbeatRL.Allow() return mapper } @@ -57,33 +59,41 @@ func (t *messageBatchReducer) Transform(payloads []transport.Payload) ([]transpo } type heartbeatEnricher struct { - rateLimiter *rate.Limiter + RL *rate.Limiter + extendedHeartbeatRL *rate.Limiter + + extendedHeartbeat transport.AppExtendedHeartbeat + heartBeat transport.AppHeartbeat } func (t *heartbeatEnricher) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { - if !t.rateLimiter.Allow() { - return payloads, t - } - - extendedHeartbeat := transport.AppExtendedHeartbeat{} - payloadLefts := make([]transport.Payload, 0, len(payloads)) + // Built the extended heartbeat using other payloads + // Composition described here: + // https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/main/GeneratedDocumentation/ApiDocs/v2/producing-telemetry.md#app-extended-heartbeat for _, payload := range payloads { - switch payload.(type) { + switch p := payload.(type) { + case transport.AppStarted: + // Should be sent only once anyway + t.extendedHeartbeat.Configuration = p.Configuration case transport.AppDependenciesLoaded: - extendedHeartbeat.Dependencies = payload.(transport.AppDependenciesLoaded).Dependencies + if t.extendedHeartbeat.Dependencies == nil { + t.extendedHeartbeat.Dependencies = p.Dependencies + } case transport.AppIntegrationChange: - extendedHeartbeat.Integrations = payload.(transport.AppIntegrationChange).Integrations - case transport.AppClientConfigurationChange: - extendedHeartbeat.Configuration = payload.(transport.AppClientConfigurationChange).Configuration - default: - payloadLefts = append(payloadLefts, payload) + // The number of integrations should be small enough so we can just append to the list + t.extendedHeartbeat.Integrations = append(t.extendedHeartbeat.Integrations, p.Integrations...) } } - if len(payloadLefts) == len(payloads) { - // No Payloads were consumed by the extended heartbeat, we can add a regular heartbeat - return append(payloads, transport.AppHeartbeat{}), t + if !t.RL.Allow() { + // We don't send anything + return payloads, t + } + + if t.extendedHeartbeatRL.Allow() { + // We have an extended heartbeat to send + return append(payloads, t.extendedHeartbeat), t } - return append(payloadLefts, extendedHeartbeat), t + return append(payloads, t.heartBeat), t } From f52047b1f4a133b664bd34a73f50e00f00dfac05 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Fri, 24 Jan 2025 16:54:36 +0100 Subject: [PATCH 19/61] support for app-dependencies-loaded payload Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 14 ++-- internal/newtelemetry/client_config.go | 13 ++-- internal/newtelemetry/client_test.go | 73 ++++++++++++++++++++- internal/newtelemetry/dependencies.go | 91 ++++++++++++++++++++++++++ 4 files changed, 179 insertions(+), 12 deletions(-) create mode 100644 internal/newtelemetry/dependencies.go diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index f59039f855..a366faeb38 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -30,15 +30,15 @@ func NewClient(service, env, version string, config ClientConfig) (Client, error return nil, errors.New("version must not be empty") } - return newClient(internal.TracerConfig{Service: service, Env: env, Version: version}, config) -} - -func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client, error) { config = defaultConfig(config) if err := config.validateConfig(); err != nil { return nil, err } + return newClient(internal.TracerConfig{Service: service, Env: env, Version: version}, config) +} + +func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client, error) { writerConfig, err := newWriterConfig(config, tracerConfig) if err != nil { return nil, err @@ -57,12 +57,17 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client // This means that, by default, we incur dataloss if we spend ~30mins without flushing, considering we send telemetry data this looks reasonable. // This also means that in the worst case scenario, memory-wise, the app is stabilized after running for 30mins. payloadQueue: internal.NewRingQueue[transport.Payload](4, 32), + + dependencies: dependencies{ + DependencyLoader: config.DependencyLoader, + }, } client.dataSources = append(client.dataSources, &client.integrations, &client.products, &client.configuration, + &client.dependencies, ) client.flushTicker = internal.NewTicker(func() { @@ -81,6 +86,7 @@ type client struct { integrations integrations products products configuration configuration + dependencies dependencies dataSources []interface { Payload() transport.Payload } diff --git a/internal/newtelemetry/client_config.go b/internal/newtelemetry/client_config.go index b9ff80ca37..1b145abcb4 100644 --- a/internal/newtelemetry/client_config.go +++ b/internal/newtelemetry/client_config.go @@ -10,6 +10,7 @@ import ( "net/http" "net/url" "os" + "runtime/debug" "time" globalinternal "gopkg.in/DataDog/dd-trace-go.v1/internal" @@ -17,11 +18,11 @@ import ( ) type ClientConfig struct { - // DependencyCollectionEnabled determines whether dependency data is sent via telemetry. - // If false, libraries should not send the app-dependencies-loaded event. - // We default this to true since Application Security Monitoring uses this data to detect vulnerabilities in the ASM-SCA product + // DependencyLoader determines how dependency data is sent via telemetry. + // If nil, the library should not send the app-dependencies-loaded event. + // The default value is [debug.ReadBuildInfo] since Application Security Monitoring uses this data to detect vulnerabilities in the ASM-SCA product // This can be controlled via the env var DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED - DependencyCollectionEnabled bool + DependencyLoader func() (*debug.BuildInfo, bool) // MetricsEnabled etermines whether metrics are sent via telemetry. // If false, libraries should not send the generate-metrics or distributions events. @@ -148,8 +149,8 @@ func defaultConfig(config ClientConfig) ClientConfig { config.FlushIntervalRange.Max = clamp(config.FlushIntervalRange.Max, time.Microsecond, 60*time.Second) } - if !config.DependencyCollectionEnabled { - config.DependencyCollectionEnabled = globalinternal.BoolEnv("DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED", true) + if config.DependencyLoader == nil && globalinternal.BoolEnv("DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED", true) { + config.DependencyLoader = debug.ReadBuildInfo } if !config.MetricsEnabled { diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 9885cd89d5..5f8b96ab5e 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -8,8 +8,10 @@ package newtelemetry import ( "encoding/json" "errors" + "fmt" "net/http" "runtime" + "runtime/debug" "testing" "time" @@ -388,11 +390,75 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.AppClosing{}, payload) }, }, + { + name: "app-dependencies-loaded", + clientConfig: ClientConfig{ + DependencyLoader: func() (*debug.BuildInfo, bool) { + return &debug.BuildInfo{ + Deps: []*debug.Module{ + {Path: "test", Version: "v1.0.0"}, + {Path: "test2", Version: "v2.0.0"}, + {Path: "test3", Version: "3.0.0"}, + }, + }, true + }, + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppDependenciesLoaded{}, payload) + deps := payload.(transport.AppDependenciesLoaded) + + assert.Len(t, deps.Dependencies, 3) + assert.Equal(t, deps.Dependencies[0].Name, "test") + assert.Equal(t, deps.Dependencies[0].Version, "1.0.0") + assert.Equal(t, deps.Dependencies[1].Name, "test2") + assert.Equal(t, deps.Dependencies[1].Version, "2.0.0") + assert.Equal(t, deps.Dependencies[2].Name, "test3") + assert.Equal(t, deps.Dependencies[2].Version, "3.0.0") + }, + }, + { + name: "app-many-dependencies-loaded", + clientConfig: ClientConfig{ + DependencyLoader: func() (*debug.BuildInfo, bool) { + modules := make([]*debug.Module, 2001) + for i := range modules { + modules[i] = &debug.Module{ + Path: fmt.Sprintf("test-%d", i), + Version: fmt.Sprintf("v%d.0.0", i), + } + } + return &debug.BuildInfo{ + Deps: modules, + }, true + }, + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.AppDependenciesLoaded{}, payload) + deps := payload.(transport.AppDependenciesLoaded) + + if len(deps.Dependencies) != 2000 && len(deps.Dependencies) != 1 { + t.Fatalf("expected 2000 and 1 dependencies, got %d", len(deps.Dependencies)) + } + + if len(deps.Dependencies) == 1 { + assert.Equal(t, deps.Dependencies[0].Name, "test-0") + assert.Equal(t, deps.Dependencies[0].Version, "0.0.0") + return + } + + for i := range deps.Dependencies { + assert.Equal(t, deps.Dependencies[i].Name, fmt.Sprintf("test-%d", i)) + assert.Equal(t, deps.Dependencies[i].Version, fmt.Sprintf("%d.0.0", i)) + } + }, + }, } { t.Run(test.name, func(t *testing.T) { t.Parallel() - test.clientConfig.AgentURL = "http://localhost:8126" - c, err := newClient(tracerConfig, test.clientConfig) + config := defaultConfig(test.clientConfig) + config.AgentURL = "http://localhost:8126" + config.DependencyLoader = test.clientConfig.DependencyLoader // Don't use the default dependency loader + c, err := newClient(tracerConfig, config) require.NoError(t, err) defer c.Close() @@ -551,6 +617,9 @@ func TestClientEnd2End(t *testing.T) { Debug: true, } + clientConfig = defaultConfig(clientConfig) + clientConfig.DependencyLoader = nil + c, err := newClient(tracerConfig, clientConfig) require.NoError(t, err) defer c.Close() diff --git a/internal/newtelemetry/dependencies.go b/internal/newtelemetry/dependencies.go new file mode 100644 index 0000000000..d830816ab6 --- /dev/null +++ b/internal/newtelemetry/dependencies.go @@ -0,0 +1,91 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "runtime/debug" + "strings" + "sync" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +type dependencies struct { + DependencyLoader func() (*debug.BuildInfo, bool) + + once sync.Once + + mu sync.Mutex + payloads []transport.Payload +} + +func (d *dependencies) Payload() transport.Payload { + if d.DependencyLoader == nil { + return nil + } + + d.mu.Lock() + defer d.mu.Unlock() + + d.once.Do(func() { + deps := d.loadDeps() + // Requirement described here: + // https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/main/GeneratedDocumentation/ApiDocs/v2/producing-telemetry.md#app-dependencies-loaded + if len(deps) > 2000 { + log.Debug("telemetry: too many (%d) dependencies to send, sending over multiple payloads", len(deps)) + } + + for i := 0; i < len(deps); i += 2000 { + end := min(i+2000, len(deps)) + + d.payloads = append(d.payloads, transport.AppDependenciesLoaded{ + Dependencies: deps[i:end], + }) + } + }) + + if len(d.payloads) == 0 { + return nil + } + + payloadZero := d.payloads[0] + if len(d.payloads) == 1 { + d.payloads = nil + } + + if len(d.payloads) > 1 { + d.payloads = d.payloads[1:] + } + + return payloadZero +} + +func (d *dependencies) loadDeps() []transport.Dependency { + deps, ok := d.DependencyLoader() + if !ok { + log.Debug("telemetry: could not read build info, no dependencies will be reported") + return nil + } + + transportDeps := make([]transport.Dependency, 0, len(deps.Deps)) + for _, dep := range deps.Deps { + if dep == nil { + continue + } + + if dep.Replace != nil && dep.Replace.Version != "" { + dep = dep.Replace + } + + transportDeps = append(transportDeps, transport.Dependency{ + Name: dep.Path, + Version: strings.TrimPrefix(dep.Version, "v"), + }) + } + + return transportDeps +} From 5f5b7ba73ce7695fa95af10a440b590dfb79d236 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 27 Jan 2025 15:58:45 +0100 Subject: [PATCH 20/61] setup logging and the recorder for metrics. Change the api to remove the TelemetryLogger and replace it with a simple Log function Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 25 +-- internal/newtelemetry/client.go | 18 +- internal/newtelemetry/globalclient.go | 173 +++++++++--------- internal/newtelemetry/internal/recorder.go | 39 ++++ .../newtelemetry/internal/transport/logs.go | 4 +- internal/newtelemetry/logger.go | 118 ++++++++++++ 6 files changed, 264 insertions(+), 113 deletions(-) create mode 100644 internal/newtelemetry/internal/recorder.go create mode 100644 internal/newtelemetry/logger.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index e6631c3330..02fc7caf58 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -24,26 +24,6 @@ type MetricHandle interface { flush() } -// TelemetryLogger is the interface implementing the telemetry logs. Supports log deduplication. All methods are Thread-safe -// This is an interface for easier testing but all functions will be mirrored at the package level to call -// the global client. -type TelemetryLogger interface { - // WithTags creates a new Logger which will send a comma-separated list of tags with the next logs - WithTags(tags string) TelemetryLogger - - // WithStackTrace creates a new Logger which will send a stack trace generated for each next log. - WithStackTrace() TelemetryLogger - - // Error sends a telemetry log at the ERROR level - Error(text string) - - // Warn sends a telemetry log at the WARN level - Warn(text string) - - // Debug sends a telemetry log at the DEBUG level - Debug(text string) -} - // Integration is an integration that is configured to be traced. type Integration struct { // Name is an arbitrary string that must stay constant for the integration. @@ -73,8 +53,9 @@ type Client interface { // Distribution creates a new metric handle for the given parameters that can be used to submit values. Distribution(namespace types.Namespace, name string, tags map[string]string) MetricHandle - // Logger returns an implementation of the Logger interface which sends telemetry logs. - Logger() TelemetryLogger + // Log sends a telemetry log at the desired level with the given text and options. + // Options include sending key-value pairs as tags, and a stack trace frozen from inside the Log function. + Log(level LogLevel, text string, options ...LogOption) // ProductStarted declares a product to have started at the customer’s request ProductStarted(product types.Namespace) diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index a366faeb38..b7a590a4b4 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -70,6 +70,10 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client &client.dependencies, ) + if config.LogsEnabled { + client.dataSources = append(client.dataSources, &client.logger) + } + client.flushTicker = internal.NewTicker(func() { client.Flush() }, config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) @@ -87,6 +91,7 @@ type client struct { products products configuration configuration dependencies dependencies + logger logger dataSources []interface { Payload() transport.Payload } @@ -101,6 +106,14 @@ type client struct { payloadQueue *internal.RingQueue[transport.Payload] } +func (c *client) Log(level LogLevel, text string, options ...LogOption) { + if !c.clientConfig.LogsEnabled { + return + } + + c.logger.Add(level, text, options...) +} + func (c *client) MarkIntegrationAsLoaded(integration Integration) { c.integrations.Add(integration) } @@ -125,11 +138,6 @@ func (c *client) Distribution(_ types.Namespace, _ string, _ map[string]string) panic("implement me") } -func (c *client) Logger() TelemetryLogger { - //TODO implement me - panic("implement me") -} - func (c *client) ProductStarted(product types.Namespace) { c.products.Add(product, true, nil) } diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 138a8df227..abd80e60f5 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -16,8 +16,8 @@ import ( var ( globalClient atomic.Pointer[Client] - // actionQueue contains all actions done on the global client done before StartApp() with an actual client object is called - actionQueue = internal.NewRingQueue[func(Client)](16, 512) + // globalClientRecorder contains all actions done on the global client done before StartApp() with an actual client object is called + globalClientRecorder = internal.NewRecorder[Client]() ) // StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global (*client). @@ -28,11 +28,7 @@ func StartApp(client Client) { SwapClient(client) - actions := actionQueue.GetBuffer() - defer actionQueue.ReleaseBuffer(actions) - for _, action := range actions { - action(client) - } + globalClientRecorder.Replay(client) client.appStart() } @@ -73,11 +69,11 @@ func Count(namespace types.Namespace, name string, tags map[string]string) Metri return nil } - if client := globalClient.Load(); client != nil && *client != nil { - return (*client).Count(namespace, name, tags) + client := globalClient.Load() + if client == nil || *client == nil { + return nil } - - return nil + return (*client).Count(namespace, name, tags) } // Rate creates a new metric handle for the given parameters that can be used to submit values. @@ -86,11 +82,11 @@ func Rate(namespace types.Namespace, name string, tags map[string]string) Metric return nil } - if client := globalClient.Load(); client != nil && *client != nil { - return (*client).Rate(namespace, name, tags) + client := globalClient.Load() + if client == nil || *client == nil { + return nil } - - return nil + return (*client).Rate(namespace, name, tags) } // Gauge creates a new metric handle for the given parameters that can be used to submit values. @@ -99,11 +95,11 @@ func Gauge(namespace types.Namespace, name string, tags map[string]string) Metri return nil } - if client := globalClient.Load(); client != nil && *client != nil { - return (*client).Gauge(namespace, name, tags) + client := globalClient.Load() + if client == nil || *client == nil { + return nil } - - return nil + return (*client).Gauge(namespace, name, tags) } // Distribution creates a new metric handle for the given parameters that can be used to submit values. @@ -112,99 +108,108 @@ func Distribution(namespace types.Namespace, name string, tags map[string]string return nil } - if client := globalClient.Load(); client != nil && *client != nil { - return (*client).Distribution(namespace, name, tags) + client := globalClient.Load() + if client == nil || *client == nil { + return nil } + return (*client).Distribution(namespace, name, tags) +} - return nil +func Log(level LogLevel, text string, options ...LogOption) { + globalClientCall(func(client Client) { + client.Log(level, text, options...) + }) } -// Logger returns an implementation of the TelemetryLogger interface which sends telemetry logs. -func Logger() TelemetryLogger { - if Disabled() { - return nil - } +// ProductStarted declares a product to have started at the customer’s request. If telemetry is disabled, it will do nothing. +// If the telemetry client has not started yet, it will record the action and replay it once the client is started. +func ProductStarted(product types.Namespace) { + globalClientCall(func(client Client) { + client.ProductStarted(product) + }) +} - if client := globalClient.Load(); client != nil && *client != nil { - return (*client).Logger() - } +// ProductStopped declares a product to have being stopped by the customer. If telemetry is disabled, it will do nothing. +// If the telemetry client has not started yet, it will record the action and replay it once the client is started. +func ProductStopped(product types.Namespace) { + globalClientCall(func(client Client) { + client.ProductStopped(product) + }) +} - return nil +// ProductStartError declares that a product could not start because of the following error. If telemetry is disabled, it will do nothing. +// If the telemetry client has not started yet, it will record the action and replay it once the client is started. +func ProductStartError(product types.Namespace, err error) { + globalClientCall(func(client Client) { + client.ProductStartError(product, err) + }) } -// ProductStarted declares a product to have started at the customer’s request -func ProductStarted(product types.Namespace) { - if Disabled() { - return - } +// AddAppConfig adds a key value pair to the app configuration and send the change to telemetry +// value has to be json serializable and the origin is the source of the change. If telemetry is disabled, it will do nothing. +// If the telemetry client has not started yet, it will record the action and replay it once the client is started. +func AddAppConfig(key string, value any, origin types.Origin) { + globalClientCall(func(client Client) { + client.AddAppConfig(key, value, origin) + }) +} - if client := globalClient.Load(); client != nil && *client != nil { - (*client).ProductStarted(product) - } else { - actionQueue.Enqueue(func(client Client) { - client.ProductStarted(product) - }) - } +// AddBulkAppConfig adds a list of key value pairs to the app configuration and sends the change to telemetry. +// Same as AddAppConfig but for multiple values. If telemetry is disabled, it will do nothing. +// If the telemetry client has not started yet, it will record the action and replay it once the client is started. +func AddBulkAppConfig(kvs map[string]any, origin types.Origin) { + globalClientCall(func(client Client) { + client.AddBulkAppConfig(kvs, origin) + }) } -// ProductStopped declares a product to have being stopped by the customer -func ProductStopped(product types.Namespace) { +// globalClientCall takes a function that takes a Client and calls it with the global client if it exists. +// otherwise, it records the action for when the client is started. +func globalClientCall(fun func(client Client)) { if Disabled() { return } - if client := globalClient.Load(); client != nil && *client != nil { - (*client).ProductStopped(product) - } else { - actionQueue.Enqueue(func(client Client) { - client.ProductStopped(product) + client := globalClient.Load() + if client == nil || *client == nil { + globalClientRecorder.Record(func(client Client) { + fun(client) }) - } -} - -// ProductStartError declares that a product could not start because of the following error -func ProductStartError(product types.Namespace, err error) { - if Disabled() { return } - if client := globalClient.Load(); client != nil && *client != nil { - (*client).ProductStartError(product, err) - } else { - actionQueue.Enqueue(func(client Client) { - client.ProductStartError(product, err) - }) - } + fun(*client) } -// AddAppConfig adds a key value pair to the app configuration and send the change to telemetry -// value has to be json serializable and the origin is the source of the change. -func AddAppConfig(key string, value any, origin types.Origin) { - if Disabled() { - return - } +type metricsHotPointer struct { + ptr atomic.Pointer[MetricHandle] + recorder internal.Recorder[MetricHandle] +} - if client := globalClient.Load(); client != nil && *client != nil { - (*client).AddAppConfig(key, value, origin) - } else { - actionQueue.Enqueue(func(client Client) { - client.AddAppConfig(key, value, origin) +func (t *metricsHotPointer) Submit(value float64) { + inner := t.ptr.Load() + if inner == nil || *inner == nil { + t.recorder.Record(func(handle MetricHandle) { + handle.Submit(value) }) } + + (*inner).Submit(value) } -// AddBulkAppConfig adds a list of key value pairs to the app configuration and sends the change to telemetry. -// Same as AddAppConfig but for multiple values. -func AddBulkAppConfig(kvs map[string]any, origin types.Origin) { - if Disabled() { +func (t *metricsHotPointer) flush() { + inner := t.ptr.Load() + if inner == nil || *inner == nil { return } - if client := globalClient.Load(); client != nil && *client != nil { - (*client).AddBulkAppConfig(kvs, origin) - } else { - actionQueue.Enqueue(func(client Client) { - client.AddBulkAppConfig(kvs, origin) - }) + (*inner).flush() +} + +func (t *metricsHotPointer) swap(handle MetricHandle) { + if t.ptr.Swap(&handle) == nil { + t.recorder.Replay(handle) } } + +var _ MetricHandle = (*metricsHotPointer)(nil) diff --git a/internal/newtelemetry/internal/recorder.go b/internal/newtelemetry/internal/recorder.go new file mode 100644 index 0000000000..985708d9c5 --- /dev/null +++ b/internal/newtelemetry/internal/recorder.go @@ -0,0 +1,39 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" +) + +// Recorder is a generic thread-safe type that records functions that could have taken place before object T was created. +// Once object T is created, the Recorder can replay all the recorded functions with object T as an argument. +type Recorder[T any] struct { + queue *RingQueue[func(T)] +} + +// NewRecorder creates a new [Recorder] instance. with 512 as the maximum number of recorded functions. +func NewRecorder[T any]() Recorder[T] { + return Recorder[T]{ + queue: NewRingQueue[func(T)](16, 512), + } +} + +func (r Recorder[T]) Record(f func(T)) { + if !r.queue.Enqueue(f) { + log.Debug("telemetry: recorder queue is full, dropping record") + } +} + +func (r Recorder[T]) Replay(t T) { + for { + f := r.queue.Dequeue() + if f == nil { + break + } + f(t) + } +} diff --git a/internal/newtelemetry/internal/transport/logs.go b/internal/newtelemetry/internal/transport/logs.go index e945b9f155..c0cebe2849 100644 --- a/internal/newtelemetry/internal/transport/logs.go +++ b/internal/newtelemetry/internal/transport/logs.go @@ -24,8 +24,8 @@ const ( type LogMessage struct { Message string `json:"message"` Level LogLevel `json:"level"` - Count int `json:"count,omitempty"` + Count uint32 `json:"count,omitempty"` Tags string `json:"tags,omitempty"` // comma separated list of tags, e.g. "tag1:1,tag2:toto" StackTrace string `json:"stack_trace,omitempty"` - TracerTime int `json:"tracer_time,omitempty"` // Unix timestamp in seconds + TracerTime int64 `json:"tracer_time,omitempty"` // Unix timestamp in seconds } diff --git a/internal/newtelemetry/logger.go b/internal/newtelemetry/logger.go new file mode 100644 index 0000000000..3a11a7fbda --- /dev/null +++ b/internal/newtelemetry/logger.go @@ -0,0 +1,118 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "runtime" + "strings" + "sync" + "sync/atomic" + "time" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +type LogOption func(key *loggerKey) + +func WithTags(tags map[string]string) LogOption { + compiledTags := "" + for k, v := range tags { + compiledTags += k + ":" + v + "," + } + compiledTags = strings.TrimSuffix(compiledTags, ",") + return func(key *loggerKey) { + key.tags = compiledTags + } +} + +func WithStacktrace() LogOption { + buf := make([]byte, 1<<12) + runtime.Stack(buf, false) + return func(key *loggerKey) { + key.stacktrace = string(buf) + } +} + +type LogLevel = transport.LogLevel + +const ( + LogDebug = transport.LogLevelDebug + LogWarn = transport.LogLevelWarn + LogError = transport.LogLevelError +) + +type loggerKey struct { + tags string + message string + level LogLevel + stacktrace string +} + +type loggerValue struct { + count atomic.Uint32 + time int64 // Unix timestamp +} + +type logger struct { + store atomic.Pointer[sync.Map] +} + +func (logger *logger) Add(level LogLevel, text string, opts ...LogOption) { + store := logger.store.Load() + if store == nil { + store = new(sync.Map) + for logger.store.CompareAndSwap(nil, store) { + continue + } + } + + key := loggerKey{ + message: text, + level: level, + } + + for _, opt := range opts { + opt(&key) + } + + val, ok := store.Load(key) + if ok { + val.(*loggerValue).count.Add(1) + return + } + + newVal := &loggerValue{ + time: time.Now().Unix(), + } + + newVal.count.Add(1) + store.Store(key, newVal) +} + +func (logger *logger) Payload() transport.Payload { + store := logger.store.Swap(nil) + if store == nil { + return nil + } + + var logs []transport.LogMessage + store.Range(func(key, value any) bool { + k := key.(loggerKey) + v := value.(*loggerValue) + logs = append(logs, transport.LogMessage{ + Message: k.message, + Level: k.level, + Tags: k.tags, + Count: v.count.Load(), + StackTrace: k.stacktrace, + TracerTime: v.time, + }) + return true + }) + + store.Clear() + return transport.Logs{Logs: logs} +} From 712971badd01c87331749454e6dfbad715600daf Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 27 Jan 2025 17:40:22 +0100 Subject: [PATCH 21/61] tested logs API and added benchmarks for it Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client_config.go | 4 + internal/newtelemetry/client_test.go | 275 +++++++++++++++++++++++++ internal/newtelemetry/logger.go | 43 ++-- 3 files changed, 308 insertions(+), 14 deletions(-) diff --git a/internal/newtelemetry/client_config.go b/internal/newtelemetry/client_config.go index 1b145abcb4..7da0a5c2ee 100644 --- a/internal/newtelemetry/client_config.go +++ b/internal/newtelemetry/client_config.go @@ -199,6 +199,10 @@ func newWriterConfig(config ClientConfig, tracerConfig internal.TracerConfig) (i endpoints = append(endpoints, request) } + if len(endpoints) == 0 { + return internal.WriterConfig{}, fmt.Errorf("telemetry: could not build any endpoint, please provide an AgentURL or an APIKey with an optional AgentlessURL") + } + return internal.WriterConfig{ TracerConfig: tracerConfig, Endpoints: endpoints, diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 5f8b96ab5e..6225a10dff 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -12,6 +12,10 @@ import ( "net/http" "runtime" "runtime/debug" + "slices" + "strconv" + "strings" + "sync" "testing" "time" @@ -452,6 +456,178 @@ func TestClientFlush(t *testing.T) { } }, }, + { + name: "single-log-debug", + when: func(c *client) { + c.Log(LogDebug, "test") + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, logs.Logs[0].Level, transport.LogLevelDebug) + assert.Equal(t, logs.Logs[0].Message, "test") + }, + }, + { + name: "single-log-warn", + when: func(c *client) { + c.Log(LogWarn, "test") + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, logs.Logs[0].Level, transport.LogLevelWarn) + assert.Equal(t, logs.Logs[0].Message, "test") + }, + }, + { + name: "single-log-error", + when: func(c *client) { + c.Log(LogError, "test") + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) + assert.Equal(t, logs.Logs[0].Message, "test") + }, + }, + { + name: "multiple-logs-same-key", + when: func(c *client) { + c.Log(LogError, "test") + c.Log(LogError, "test") + c.Log(LogError, "test") + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) + assert.Equal(t, logs.Logs[0].Message, "test") + assert.Equal(t, logs.Logs[0].Count, uint32(3)) + }, + }, + { + name: "single-log-with-tag", + when: func(c *client) { + c.Log(LogError, "test", WithTags(map[string]string{"key": "value"})) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) + assert.Equal(t, logs.Logs[0].Message, "test") + assert.Equal(t, logs.Logs[0].Tags, "key:value") + }, + }, + { + name: "single-log-with-tags", + when: func(c *client) { + c.Log(LogError, "test", WithTags(map[string]string{"key": "value", "key2": "value2"})) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) + assert.Equal(t, logs.Logs[0].Message, "test") + tags := strings.Split(logs.Logs[0].Tags, ",") + assert.Contains(t, tags, "key:value") + assert.Contains(t, tags, "key2:value2") + }, + }, + { + name: "single-log-with-tags-and-without", + when: func(c *client) { + c.Log(LogError, "test", WithTags(map[string]string{"key": "value", "key2": "value2"})) + c.Log(LogError, "test") + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 2) + + slices.SortStableFunc(logs.Logs, func(i, j transport.LogMessage) int { + return strings.Compare(i.Tags, j.Tags) + }) + + assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) + assert.Equal(t, logs.Logs[0].Message, "test") + assert.Equal(t, logs.Logs[0].Count, uint32(1)) + assert.Empty(t, logs.Logs[0].Tags) + + assert.Equal(t, logs.Logs[1].Level, transport.LogLevelError) + assert.Equal(t, logs.Logs[1].Message, "test") + assert.Equal(t, logs.Logs[1].Count, uint32(1)) + tags := strings.Split(logs.Logs[1].Tags, ",") + assert.Contains(t, tags, "key:value") + assert.Contains(t, tags, "key2:value2") + }, + }, + { + name: "single-log-with-stacktrace", + when: func(c *client) { + c.Log(LogError, "test", WithStacktrace()) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) + assert.Equal(t, logs.Logs[0].Message, "test") + assert.Contains(t, logs.Logs[0].StackTrace, "internal/newtelemetry/client_test.go") + }, + }, + { + name: "single-log-with-stacktrace-and-tags", + when: func(c *client) { + c.Log(LogError, "test", WithStacktrace(), WithTags(map[string]string{"key": "value", "key2": "value2"})) + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 1) + assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) + assert.Equal(t, logs.Logs[0].Message, "test") + assert.Contains(t, logs.Logs[0].StackTrace, "internal/newtelemetry/client_test.go") + tags := strings.Split(logs.Logs[0].Tags, ",") + assert.Contains(t, tags, "key:value") + assert.Contains(t, tags, "key2:value2") + + }, + }, + { + name: "multiple-logs-different-levels", + when: func(c *client) { + c.Log(LogError, "test") + c.Log(LogWarn, "test") + c.Log(LogDebug, "test") + }, + expect: func(t *testing.T, payload transport.Payload) { + require.IsType(t, transport.Logs{}, payload) + logs := payload.(transport.Logs) + require.Len(t, logs.Logs, 3) + + slices.SortStableFunc(logs.Logs, func(i, j transport.LogMessage) int { + return strings.Compare(string(i.Level), string(j.Level)) + }) + + assert.Equal(t, logs.Logs[0].Level, transport.LogLevelDebug) + assert.Equal(t, logs.Logs[0].Message, "test") + assert.Equal(t, logs.Logs[0].Count, uint32(1)) + assert.Equal(t, logs.Logs[1].Level, transport.LogLevelError) + assert.Equal(t, logs.Logs[1].Message, "test") + assert.Equal(t, logs.Logs[1].Count, uint32(1)) + assert.Equal(t, logs.Logs[2].Level, transport.LogLevelWarn) + assert.Equal(t, logs.Logs[2].Message, "test") + assert.Equal(t, logs.Logs[2].Count, uint32(1)) + }, + }, } { t.Run(test.name, func(t *testing.T) { t.Parallel() @@ -629,3 +805,102 @@ func TestClientEnd2End(t *testing.T) { }) } } + +func BenchmarkLogs(b *testing.B) { + clientConfig := ClientConfig{ + HeartbeatInterval: time.Hour, + ExtendedHeartbeatInterval: time.Hour, + AgentURL: "http://localhost:8126", + } + + b.Run("simple", func(b *testing.B) { + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Log(LogDebug, "this is supposed to be a DEBUG log of representative length with a variable message: "+strconv.Itoa(i%10)) + } + }) + + b.Run("with-tags", func(b *testing.B) { + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Log(LogWarn, "this is supposed to be a WARN log of representative length", WithTags(map[string]string{"key": strconv.Itoa(i % 10)})) + } + }) + + b.Run("with-stacktrace", func(b *testing.B) { + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Log(LogError, "this is supposed to be a ERROR log of representative length", WithStacktrace()) + } + }) +} + +func BenchmarkWorstCaseScenarioFloodLogging(b *testing.B) { + nbSameLogs := 10 + nbDifferentLogs := 100 + nbGoroutines := 25 + + clientConfig := ClientConfig{ + HeartbeatInterval: time.Hour, + ExtendedHeartbeatInterval: time.Hour, + FlushIntervalRange: struct { + Min time.Duration + Max time.Duration + }{Min: time.Second, Max: time.Second}, + AgentURL: "http://localhost:8126", + + // Empty transport to avoid sending data to the agent + HTTPClient: &http.Client{ + Timeout: 5 * time.Second, + Transport: &testRoundTripper{ + roundTrip: func(_ *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + }, nil + }, + }, + }, + } + + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + + for x := 0; x < b.N; x++ { + var wg sync.WaitGroup + + for i := 0; i < nbGoroutines; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < nbDifferentLogs; j++ { + for k := 0; k < nbSameLogs; k++ { + c.Log(LogDebug, "this is supposed to be a DEBUG log of representative length"+strconv.Itoa(i), WithTags(map[string]string{"key": strconv.Itoa(j)})) + } + } + }() + } + + wg.Wait() + } + + b.Log("Called (*client).Log ", nbGoroutines*nbDifferentLogs*nbSameLogs, " times") +} diff --git a/internal/newtelemetry/logger.go b/internal/newtelemetry/logger.go index 3a11a7fbda..f250f8d077 100644 --- a/internal/newtelemetry/logger.go +++ b/internal/newtelemetry/logger.go @@ -15,24 +15,35 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) -type LogOption func(key *loggerKey) +type LogOption func(key *loggerKey, value *loggerValue) +// WithTags returns a LogOption that sets the tags for the telemetry log message. Tags are key-value pairs that are then +// serialized into a simple "key:value,key2:value2" format. No quoting or escaping is performed. func WithTags(tags map[string]string) LogOption { compiledTags := "" for k, v := range tags { compiledTags += k + ":" + v + "," } compiledTags = strings.TrimSuffix(compiledTags, ",") - return func(key *loggerKey) { + return func(key *loggerKey, _ *loggerValue) { + if key == nil { + return + } key.tags = compiledTags } } +// WithStacktrace returns a LogOption that sets the stacktrace for the telemetry log message. The stacktrace is a string +// that is generated inside the WithStacktrace function. Logs demultiplication does not take the stacktrace into account. +// This means that a log that has been demultiplicated will only show of the first log. func WithStacktrace() LogOption { buf := make([]byte, 1<<12) - runtime.Stack(buf, false) - return func(key *loggerKey) { - key.stacktrace = string(buf) + buf = buf[:runtime.Stack(buf, false)] + return func(_ *loggerKey, value *loggerValue) { + if value == nil { + return + } + value.stacktrace = string(buf) } } @@ -45,15 +56,15 @@ const ( ) type loggerKey struct { - tags string - message string - level LogLevel - stacktrace string + tags string + message string + level LogLevel } type loggerValue struct { - count atomic.Uint32 - time int64 // Unix timestamp + count atomic.Uint32 + stacktrace string + time int64 // Unix timestamp } type logger struct { @@ -75,7 +86,7 @@ func (logger *logger) Add(level LogLevel, text string, opts ...LogOption) { } for _, opt := range opts { - opt(&key) + opt(&key, nil) } val, ok := store.Load(key) @@ -88,7 +99,11 @@ func (logger *logger) Add(level LogLevel, text string, opts ...LogOption) { time: time.Now().Unix(), } - newVal.count.Add(1) + for _, opt := range opts { + opt(&key, newVal) + } + + newVal.count.Store(1) store.Store(key, newVal) } @@ -107,7 +122,7 @@ func (logger *logger) Payload() transport.Payload { Level: k.level, Tags: k.tags, Count: v.count.Load(), - StackTrace: k.stacktrace, + StackTrace: v.stacktrace, TracerTime: v.time, }) return true From 214c2a7d1abaf74c3f866e0cff3b37f3a1cf8ce8 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Tue, 28 Jan 2025 10:36:46 +0100 Subject: [PATCH 22/61] use internal.RecordWriter instead of the testWriter Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client_test.go | 160 +++++++++++++---------- internal/newtelemetry/internal/writer.go | 14 +- 2 files changed, 100 insertions(+), 74 deletions(-) diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 6225a10dff..0256cf7f87 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -80,15 +80,6 @@ func TestNewClient(t *testing.T) { } } -type testWriter struct { - flush func(transport.Payload) -} - -func (w *testWriter) Flush(payloads transport.Payload) (int, error) { - w.flush(payloads) - return 1, nil -} - func TestClientFlush(t *testing.T) { tracerConfig := internal.TracerConfig{ Service: "test-service", @@ -99,14 +90,15 @@ func TestClientFlush(t *testing.T) { name string clientConfig ClientConfig when func(c *client) - expect func(*testing.T, transport.Payload) + expect func(*testing.T, []transport.Payload) }{ { name: "heartbeat", clientConfig: ClientConfig{ HeartbeatInterval: time.Nanosecond, }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppHeartbeat{}, payload) assert.Equal(t, payload.RequestType(), transport.RequestTypeAppHeartbeat) }, @@ -120,7 +112,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.AddAppConfig("key", "value", types.OriginDefault) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.MessageBatch{}, payload) batch := payload.(transport.MessageBatch) require.Len(t, batch.Payload, 2) @@ -139,7 +132,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.MessageBatch{}, payload) batch := payload.(transport.MessageBatch) require.Len(t, batch.Payload, 2) @@ -155,7 +149,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.AddAppConfig("key", "value", types.OriginDefault) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppClientConfigurationChange{}, payload) config := payload.(transport.AppClientConfigurationChange) assert.Len(t, config.Configuration, 1) @@ -169,7 +164,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.AddAppConfig("key", "value", types.OriginDefault) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppClientConfigurationChange{}, payload) config := payload.(transport.AppClientConfigurationChange) assert.Len(t, config.Configuration, 1) @@ -183,7 +179,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.ProductStarted("test-product") }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppProductChange{}, payload) productChange := payload.(transport.AppProductChange) assert.Len(t, productChange.Products, 1) @@ -195,7 +192,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.ProductStartError("test-product", errors.New("test-error")) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppProductChange{}, payload) productChange := payload.(transport.AppProductChange) assert.Len(t, productChange.Products, 1) @@ -208,7 +206,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.ProductStopped("test-product") }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppProductChange{}, payload) productChange := payload.(transport.AppProductChange) assert.Len(t, productChange.Products, 1) @@ -220,7 +219,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppIntegrationChange{}, payload) integrationChange := payload.(transport.AppIntegrationChange) assert.Len(t, integrationChange.Integrations, 1) @@ -234,7 +234,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0", Error: "test-error"}) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppIntegrationChange{}, payload) integrationChange := payload.(transport.AppIntegrationChange) assert.Len(t, integrationChange.Integrations, 1) @@ -250,7 +251,8 @@ func TestClientFlush(t *testing.T) { c.ProductStarted("test-product") c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.MessageBatch{}, payload) batch := payload.(transport.MessageBatch) assert.Len(t, batch.Payload, 2) @@ -281,7 +283,8 @@ func TestClientFlush(t *testing.T) { c.ProductStarted("test-product") c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.MessageBatch{}, payload) batch := payload.(transport.MessageBatch) assert.Len(t, batch.Payload, 3) @@ -310,7 +313,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.appStart() }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppStarted{}, payload) appStart := payload.(transport.AppStarted) assert.Equal(t, appStart.InstallSignature.InstallID, globalconfig.InstrumentationInstallID()) @@ -324,7 +328,8 @@ func TestClientFlush(t *testing.T) { c.appStart() c.ProductStarted("test-product") }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppStarted{}, payload) appStart := payload.(transport.AppStarted) assert.Equal(t, appStart.Products[types.Namespace("test-product")].Enabled, true) @@ -336,7 +341,8 @@ func TestClientFlush(t *testing.T) { c.appStart() c.AddAppConfig("key", "value", types.OriginDefault) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppStarted{}, payload) appStart := payload.(transport.AppStarted) require.Len(t, appStart.Configuration, 1) @@ -350,19 +356,21 @@ func TestClientFlush(t *testing.T) { c.appStart() c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) }, - expect: func(t *testing.T, payload transport.Payload) { - switch p := payload.(type) { - case transport.AppStarted: - assert.Equal(t, globalconfig.InstrumentationInstallID(), p.InstallSignature.InstallID) - assert.Equal(t, globalconfig.InstrumentationInstallType(), p.InstallSignature.InstallType) - assert.Equal(t, globalconfig.InstrumentationInstallTime(), p.InstallSignature.InstallTime) - case transport.AppIntegrationChange: - assert.Len(t, p.Integrations, 1) - assert.Equal(t, p.Integrations[0].Name, "test-integration") - assert.Equal(t, p.Integrations[0].Version, "1.0.0") - default: - t.Fatalf("unexpected payload type: %T", p) - } + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppStarted{}, payload) + appStart := payload.(transport.AppStarted) + assert.Equal(t, globalconfig.InstrumentationInstallID(), appStart.InstallSignature.InstallID) + assert.Equal(t, globalconfig.InstrumentationInstallType(), appStart.InstallSignature.InstallType) + assert.Equal(t, globalconfig.InstrumentationInstallTime(), appStart.InstallSignature.InstallTime) + + payload = payloads[1] + require.IsType(t, transport.AppIntegrationChange{}, payload) + p := payload.(transport.AppIntegrationChange) + + assert.Len(t, p.Integrations, 1) + assert.Equal(t, p.Integrations[0].Name, "test-integration") + assert.Equal(t, p.Integrations[0].Version, "1.0.0") }, }, { @@ -373,16 +381,16 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.appStart() }, - expect: func(t *testing.T, payload transport.Payload) { - switch p := payload.(type) { - case transport.AppStarted: - assert.Equal(t, globalconfig.InstrumentationInstallID(), p.InstallSignature.InstallID) - assert.Equal(t, globalconfig.InstrumentationInstallType(), p.InstallSignature.InstallType) - assert.Equal(t, globalconfig.InstrumentationInstallTime(), p.InstallSignature.InstallTime) - case transport.AppHeartbeat: - default: - t.Fatalf("unexpected payload type: %T", p) - } + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppStarted{}, payload) + appStart := payload.(transport.AppStarted) + assert.Equal(t, globalconfig.InstrumentationInstallID(), appStart.InstallSignature.InstallID) + assert.Equal(t, globalconfig.InstrumentationInstallType(), appStart.InstallSignature.InstallType) + assert.Equal(t, globalconfig.InstrumentationInstallTime(), appStart.InstallSignature.InstallTime) + + payload = payloads[1] + require.IsType(t, transport.AppHeartbeat{}, payload) }, }, { @@ -390,7 +398,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.appStop() }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppClosing{}, payload) }, }, @@ -407,7 +416,8 @@ func TestClientFlush(t *testing.T) { }, true }, }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppDependenciesLoaded{}, payload) deps := payload.(transport.AppDependenciesLoaded) @@ -436,7 +446,8 @@ func TestClientFlush(t *testing.T) { }, true }, }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.AppDependenciesLoaded{}, payload) deps := payload.(transport.AppDependenciesLoaded) @@ -461,7 +472,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.Log(LogDebug, "test") }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) @@ -474,7 +486,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.Log(LogWarn, "test") }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) @@ -487,7 +500,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.Log(LogError, "test") }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) @@ -502,7 +516,8 @@ func TestClientFlush(t *testing.T) { c.Log(LogError, "test") c.Log(LogError, "test") }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) @@ -516,7 +531,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.Log(LogError, "test", WithTags(map[string]string{"key": "value"})) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) @@ -530,7 +546,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.Log(LogError, "test", WithTags(map[string]string{"key": "value", "key2": "value2"})) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) @@ -547,7 +564,8 @@ func TestClientFlush(t *testing.T) { c.Log(LogError, "test", WithTags(map[string]string{"key": "value", "key2": "value2"})) c.Log(LogError, "test") }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 2) @@ -574,7 +592,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.Log(LogError, "test", WithStacktrace()) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) @@ -588,7 +607,8 @@ func TestClientFlush(t *testing.T) { when: func(c *client) { c.Log(LogError, "test", WithStacktrace(), WithTags(map[string]string{"key": "value", "key2": "value2"})) }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) @@ -608,7 +628,8 @@ func TestClientFlush(t *testing.T) { c.Log(LogWarn, "test") c.Log(LogDebug, "test") }, - expect: func(t *testing.T, payload transport.Payload) { + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 3) @@ -638,22 +659,17 @@ func TestClientFlush(t *testing.T) { require.NoError(t, err) defer c.Close() - var writerFlushCalled bool - - c.writer = &testWriter{ - flush: func(payload transport.Payload) { - writerFlushCalled = true - if test.expect != nil { - test.expect(t, payload) - } - }, - } + recordWriter := &internal.RecordWriter{} + c.writer = recordWriter if test.when != nil { test.when(c) } c.Flush() - require.Truef(t, writerFlushCalled, "expected writer.Flush() to be called") + + payloads := recordWriter.Payloads() + require.LessOrEqual(t, 1, len(payloads)) + test.expect(t, payloads) }) } } diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go index 948348fcd1..9204fc3eb2 100644 --- a/internal/newtelemetry/internal/writer.go +++ b/internal/newtelemetry/internal/writer.go @@ -253,9 +253,19 @@ type RecordWriter struct { payloads []transport.Payload } -func (w *RecordWriter) Flush(payload transport.Payload) error { +func (w *RecordWriter) Flush(payload transport.Payload) (int, error) { w.mu.Lock() defer w.mu.Unlock() w.payloads = append(w.payloads, payload) - return nil + return 1, nil } + +func (w *RecordWriter) Payloads() []transport.Payload { + w.mu.Lock() + defer w.mu.Unlock() + copyPayloads := make([]transport.Payload, len(w.payloads)) + copy(copyPayloads, w.payloads) + return copyPayloads +} + +var _ Writer = (*RecordWriter)(nil) From f84763125c1d6ea4d4673c4dc8923a4de903cad1 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Tue, 28 Jan 2025 11:22:21 +0100 Subject: [PATCH 23/61] refactor the writer and flush method to record more data that will then be used as telemetry metrics Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 41 +++++++----- internal/newtelemetry/internal/writer.go | 84 +++++++++++++++++------- 2 files changed, 87 insertions(+), 38 deletions(-) diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index b7a590a4b4..630628246c 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -185,13 +185,19 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { c.flushMapperMu.Unlock() } + if c.payloadQueue.IsEmpty() && len(payloads) == 0 { + c.flushTicker.DecreaseSpeed() + return 0, nil + } + c.payloadQueue.Enqueue(payloads...) payloads = c.payloadQueue.GetBuffer() defer c.payloadQueue.ReleaseBuffer(payloads) var ( - nbBytes int - nonFatalErros []error + nbBytes int + speedIncreased bool + failedCalls []internal.EndpointRequestResult ) for i, payload := range payloads { @@ -199,33 +205,36 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { continue } - nbBytesOfPayload, err := c.writer.Flush(payload) - if nbBytes > 0 && err != nil { - nonFatalErros = append(nonFatalErros, err) - err = nil - } - + results, err := c.writer.Flush(payload) if err != nil { - // We stop flushing when we encounter a fatal error, put the payloads in the + // We stop flushing when we encounter a fatal error, put the payloads in the queue and return the error log.Error("error while flushing telemetry data: %v", err) c.payloadQueue.Enqueue(payloads[i:]...) return nbBytes, err } - if nbBytesOfPayload > c.clientConfig.EarlyFlushPayloadSize { + failedCalls = append(failedCalls, results[:len(results)-1]...) + successfulCall := results[len(results)-1] + + if !speedIncreased && successfulCall.PayloadByteSize > c.clientConfig.EarlyFlushPayloadSize { // We increase the speed of the flushTicker to try to flush the remaining payloads faster as we are at risk of sending too large payloads to the backend c.flushTicker.IncreaseSpeed() + speedIncreased = true } - nbBytes += nbBytesOfPayload + nbBytes += successfulCall.PayloadByteSize } - if len(nonFatalErros) > 0 { - err := "error" - if len(nonFatalErros) > 1 { - err = "errors" + if len(failedCalls) > 0 { + errName := "error" + if len(failedCalls) > 1 { + errName = "errors" + } + var errs []error + for _, call := range failedCalls { + errs = append(errs, call.Error) } - log.Debug("non-fatal %s while flushing telemetry data: %v", err, errors.Join(nonFatalErros...)) + log.Debug("non-fatal %s while flushing telemetry data: %v", errName, errors.Join(errs...)) } return nbBytes, nil diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go index 9204fc3eb2..e2302c9929 100644 --- a/internal/newtelemetry/internal/writer.go +++ b/internal/newtelemetry/internal/writer.go @@ -85,10 +85,19 @@ func newBody(config TracerConfig, debugMode bool) *transport.Body { // The telemetry data is sent as a JSON payload as described in the API documentation. type Writer interface { // Flush does a synchronous call to the telemetry endpoint with the given payload. Thread-safe. - // It returns the number of bytes sent and an error if any. - // Keep in mind that errors can be returned even if the payload was sent successfully. - // Please check if the number of bytes sent is greater than 0 to know if the payload was sent. - Flush(transport.Payload) (int, error) + // It returns a non-empty [EndpointRequestResult] slice and a nil error if the payload was sent successfully. + // Otherwise, the error is a call to [errors.Join] on all errors that occurred. + Flush(transport.Payload) ([]EndpointRequestResult, error) +} + +// EndpointRequestResult is returned by the Flush method of the Writer interface. +type EndpointRequestResult struct { + // Error is the error that occurred when sending the payload to the endpoint. This is nil if the payload was sent successfully. + Error error + // PayloadByteSize is the number of bytes that were sent to the endpoint, zero if the payload was not sent. + PayloadByteSize int + // CallDuration is the duration of the call to the endpoint if the call was successful + CallDuration time.Duration } type writer struct { @@ -202,49 +211,75 @@ func (w *writer) newRequest(endpoint *http.Request, payload transport.Payload) * // SumReaderCloser is a ReadCloser that wraps another ReadCloser and counts the number of bytes read. type SumReaderCloser struct { io.ReadCloser - n *int + n int } func (s *SumReaderCloser) Read(p []byte) (n int, err error) { n, err = s.ReadCloser.Read(p) - *s.n += n + s.n += n return } -func (w *writer) Flush(payload transport.Payload) (int, error) { +// WriterStatusCodeError is an error that is returned when the writer receives an unexpected status code from the server. +type WriterStatusCodeError struct { + StatusCode string + Body string +} + +func (w *WriterStatusCodeError) Error() string { + return fmt.Sprintf("unexpected status code: %q (received body: %q)", w.StatusCode, w.Body) +} + +func (w *writer) Flush(payload transport.Payload) ([]EndpointRequestResult, error) { w.mu.Lock() defer w.mu.Unlock() - var ( - errs []error - sumRead int - ) + var results []EndpointRequestResult for _, endpoint := range w.endpoints { - request := w.newRequest(endpoint, payload) - request.Body = &SumReaderCloser{ReadCloser: request.Body, n: &sumRead} + var ( + request = w.newRequest(endpoint, payload) + sumReaderCloser = &SumReaderCloser{ReadCloser: request.Body} + now = time.Now() + ) + + request.Body = sumReaderCloser response, err := w.httpClient.Do(request) if err != nil { - errs = append(errs, fmt.Errorf("telemetry/writer: %w", err)) - sumRead = 0 + results = append(results, EndpointRequestResult{Error: err}) continue } - // Currently we have a maximum of 3 endpoints so we can afford to close bodies at the end of the function - //goland:noinspection GoDeferInLoop + // We only have a few endpoints, so we can afford to keep the response body stream open until we are done with it defer response.Body.Close() if response.StatusCode >= 300 || response.StatusCode < 200 { respBodyBytes, _ := io.ReadAll(response.Body) // maybe we can find an error reason in the response body - errs = append(errs, fmt.Errorf("telemetry/writer: unexpected status code: %q (received body: %q)", response.Status, string(respBodyBytes))) - sumRead = 0 + results = append(results, EndpointRequestResult{Error: &WriterStatusCodeError{ + StatusCode: response.Status, + Body: string(respBodyBytes), + }}) continue } + results = append(results, EndpointRequestResult{ + PayloadByteSize: sumReaderCloser.n, + CallDuration: time.Since(now), + }) + // We succeeded, no need to try the other endpoints break } - return sumRead, errors.Join(errs...) + var err error + if results[len(results)-1].Error != nil { + var errs []error + for _, result := range results { + errs = append(errs, result.Error) + } + err = errors.Join(errs...) + } + + return results, err } // RecordWriter is a Writer that stores the payloads in memory. Used for testing purposes @@ -253,11 +288,16 @@ type RecordWriter struct { payloads []transport.Payload } -func (w *RecordWriter) Flush(payload transport.Payload) (int, error) { +func (w *RecordWriter) Flush(payload transport.Payload) ([]EndpointRequestResult, error) { w.mu.Lock() defer w.mu.Unlock() w.payloads = append(w.payloads, payload) - return 1, nil + return []EndpointRequestResult{ + { + PayloadByteSize: 1, + CallDuration: time.Nanosecond, + }, + }, nil } func (w *RecordWriter) Payloads() []transport.Payload { From 3ebfb63b390bf05cc2bede528c3cb10cf478c0f8 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Tue, 28 Jan 2025 11:46:51 +0100 Subject: [PATCH 24/61] added replay feature to the metrics function of the global client Signed-off-by: Eliott Bouhana --- internal/newtelemetry/globalclient.go | 94 +++++++++++++++------- internal/newtelemetry/internal/recorder.go | 6 ++ 2 files changed, 69 insertions(+), 31 deletions(-) diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index abd80e60f5..e7c0472a78 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -6,6 +6,7 @@ package newtelemetry import ( + "sync" "sync/atomic" globalinternal "gopkg.in/DataDog/dd-trace-go.v1/internal" @@ -18,6 +19,10 @@ var ( // globalClientRecorder contains all actions done on the global client done before StartApp() with an actual client object is called globalClientRecorder = internal.NewRecorder[Client]() + + // metricsHandleHotPointers contains all the metricsHotPointer, used to replay actions done before the actual MetricHandle is set + metricsHandleHotPointers []metricsHotPointer + metricsHandleHotPointersMu sync.Mutex ) // StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global (*client). @@ -41,6 +46,14 @@ func SwapClient(client Client) { if oldClient := globalClient.Swap(&client); oldClient != nil && *oldClient != nil { (*oldClient).Close() + + // Swap all metrics hot pointers to the actual MetricHandle + metricsHandleHotPointersMu.Lock() + defer metricsHandleHotPointersMu.Unlock() + for i := range metricsHandleHotPointers { + hotPointer := &metricsHandleHotPointers[i] + hotPointer.swap(hotPointer.maker(client)) + } } } @@ -57,62 +70,67 @@ func StopApp() { } } +var telemetryClientDisabled = globalinternal.BoolEnv("DD_INSTRUMENTATION_TELEMETRY_ENABLED", true) + // Disabled returns whether instrumentation telemetry is disabled // according to the DD_INSTRUMENTATION_TELEMETRY_ENABLED env var func Disabled() bool { - return !globalinternal.BoolEnv("DD_INSTRUMENTATION_TELEMETRY_ENABLED", true) + return telemetryClientDisabled } // Count creates a new metric handle for the given parameters that can be used to submit values. +// Count will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. +// The MetricHandle is then swapped with the actual MetricHandle once the client is started. func Count(namespace types.Namespace, name string, tags map[string]string) MetricHandle { - if Disabled() { - return nil - } - - client := globalClient.Load() - if client == nil || *client == nil { - return nil - } - return (*client).Count(namespace, name, tags) + return newMetric(func(client Client) MetricHandle { + return client.Count(namespace, name, tags) + }) } // Rate creates a new metric handle for the given parameters that can be used to submit values. +// Rate will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. +// The MetricHandle is then swapped with the actual MetricHandle once the client is started. func Rate(namespace types.Namespace, name string, tags map[string]string) MetricHandle { - if Disabled() { - return nil - } - - client := globalClient.Load() - if client == nil || *client == nil { - return nil - } - return (*client).Rate(namespace, name, tags) + return newMetric(func(client Client) MetricHandle { + return client.Rate(namespace, name, tags) + }) } // Gauge creates a new metric handle for the given parameters that can be used to submit values. +// Gauge will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. +// The MetricHandle is then swapped with the actual MetricHandle once the client is started. func Gauge(namespace types.Namespace, name string, tags map[string]string) MetricHandle { - if Disabled() { - return nil - } - - client := globalClient.Load() - if client == nil || *client == nil { - return nil - } - return (*client).Gauge(namespace, name, tags) + return newMetric(func(client Client) MetricHandle { + return client.Gauge(namespace, name, tags) + }) } // Distribution creates a new metric handle for the given parameters that can be used to submit values. +// Distribution will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. +// The MetricHandle is then swapped with the actual MetricHandle once the client is started. func Distribution(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + return newMetric(func(client Client) MetricHandle { + return client.Distribution(namespace, name, tags) + }) +} + +func newMetric(maker func(client Client) MetricHandle) MetricHandle { if Disabled() { - return nil + // Act as a noop if telemetry is disabled + return &metricsHotPointer{} } client := globalClient.Load() + wrapper := newMetricsHotPointer(maker) if client == nil || *client == nil { - return nil + wrapper.recorder = internal.NewRecorder[MetricHandle]() } - return (*client).Distribution(namespace, name, tags) + + globalClientCall(func(client Client) { + wrapper.swap(maker(client)) + }) + + return wrapper } func Log(level LogLevel, text string, options ...LogOption) { @@ -181,12 +199,26 @@ func globalClientCall(fun func(client Client)) { fun(*client) } +func newMetricsHotPointer(maker func(client Client) MetricHandle) *metricsHotPointer { + metricsHandleHotPointersMu.Lock() + defer metricsHandleHotPointersMu.Unlock() + + metricsHandleHotPointers = append(metricsHandleHotPointers, metricsHotPointer{maker: maker}) + return &metricsHandleHotPointers[len(metricsHandleHotPointers)-1] +} + +// metricsHotPointer is a MetricHandle that holds a pointer to another MetricHandle and a recorder to replay actions done before the actual MetricHandle is set. type metricsHotPointer struct { ptr atomic.Pointer[MetricHandle] recorder internal.Recorder[MetricHandle] + maker func(client Client) MetricHandle } func (t *metricsHotPointer) Submit(value float64) { + if Disabled() { + return + } + inner := t.ptr.Load() if inner == nil || *inner == nil { t.recorder.Record(func(handle MetricHandle) { diff --git a/internal/newtelemetry/internal/recorder.go b/internal/newtelemetry/internal/recorder.go index 985708d9c5..76923377e1 100644 --- a/internal/newtelemetry/internal/recorder.go +++ b/internal/newtelemetry/internal/recorder.go @@ -23,12 +23,18 @@ func NewRecorder[T any]() Recorder[T] { } func (r Recorder[T]) Record(f func(T)) { + if r.queue == nil { + return + } if !r.queue.Enqueue(f) { log.Debug("telemetry: recorder queue is full, dropping record") } } func (r Recorder[T]) Replay(t T) { + if r.queue == nil { + return + } for { f := r.queue.Dequeue() if f == nil { From f9dd56c428aea5ff8b680f8f20492a7fc0ce85ef Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Tue, 28 Jan 2025 14:23:11 +0100 Subject: [PATCH 25/61] add internal package for known metrics from the backend Signed-off-by: Eliott Bouhana --- .../internal/metrics/common_metrics.json | 174 ++++++++++++++++++ .../internal/metrics/generator/generator.go | 143 ++++++++++++++ .../internal/metrics/golang_metrics.json | 3 + .../internal/metrics/known_metrics.go | 53 ++++++ 4 files changed, 373 insertions(+) create mode 100644 internal/newtelemetry/internal/metrics/common_metrics.json create mode 100644 internal/newtelemetry/internal/metrics/generator/generator.go create mode 100644 internal/newtelemetry/internal/metrics/golang_metrics.json create mode 100644 internal/newtelemetry/internal/metrics/known_metrics.go diff --git a/internal/newtelemetry/internal/metrics/common_metrics.json b/internal/newtelemetry/internal/metrics/common_metrics.json new file mode 100644 index 0000000000..56568f15e9 --- /dev/null +++ b/internal/newtelemetry/internal/metrics/common_metrics.json @@ -0,0 +1,174 @@ +[ + "code_coverage.errors", + "code_coverage.files", + "code_coverage.is_empty", + "code_coverage_finished", + "code_coverage_started", + "context_header.truncated", + "context_header_style.extracted", + "context_header_style.injected", + "docker_lib_injection.failure", + "docker_lib_injection.success", + "early_flake_detection.request", + "early_flake_detection.request_errors", + "early_flake_detection.request_ms", + "early_flake_detection.response_bytes", + "early_flake_detection.response_tests", + "endpoint_payload.bytes", + "endpoint_payload.dropped", + "endpoint_payload.events_count", + "endpoint_payload.events_serialization_ms", + "endpoint_payload.requests", + "endpoint_payload.requests_errors", + "endpoint_payload.requests_ms", + "evaluators.error", + "evaluators.init", + "evaluators.rule_sample_rate", + "evaluators.run", + "event_created", + "event_finished", + "events_enqueued_for_serialization", + "executed.propagation", + "executed.sink", + "executed.source", + "executed.tainted", + "exporter_fallback", + "flaky_tests.request", + "flaky_tests.request_errors", + "flaky_tests.request_ms", + "flaky_tests.response_bytes", + "flaky_tests.response_tests", + "git.command", + "git.command_errors", + "git.command_ms", + "git_requests.objects_pack", + "git_requests.objects_pack_bytes", + "git_requests.objects_pack_errors", + "git_requests.objects_pack_files", + "git_requests.objects_pack_ms", + "git_requests.search_commits", + "git_requests.search_commits_errors", + "git_requests.search_commits_ms", + "git_requests.settings", + "git_requests.settings_errors", + "git_requests.settings_ms", + "git_requests.settings_response", + "host_lib_injection.failure", + "host_lib_injection.success", + "impacted_tests_detection.request", + "impacted_tests_detection.request_errors", + "impacted_tests_detection.request_ms", + "impacted_tests_detection.response_bytes", + "impacted_tests_detection.response_files", + "init_time", + "inject.error", + "inject.language_detection", + "inject.latency.baseline", + "inject.latency.end_to_end", + "inject.latency.init_container", + "inject.skip", + "inject.success", + "injection.content_security_policy", + "injection.failed", + "injection.initialization.failed", + "injection.initialization.succeed", + "injection.installation", + "injection.installation.duration", + "injection.ms", + "injection.response.bytes", + "injection.skipped", + "injection.succeed", + "instrum.user_auth.missing_user_id", + "instrum.user_auth.missing_user_login", + "instrumented.propagation", + "instrumented.sink", + "instrumented.source", + "integration_errors", + "itr_forced_run", + "itr_skippable_tests.request", + "itr_skippable_tests.request_errors", + "itr_skippable_tests.request_ms", + "itr_skippable_tests.response_bytes", + "itr_skippable_tests.response_suites", + "itr_skippable_tests.response_tests", + "itr_skipped", + "itr_unskippable", + "json.tag.size.exceeded", + "k8s_lib_injection.failure", + "k8s_lib_injection.success", + "known_tests.request", + "known_tests.request_errors", + "known_tests.request_ms", + "known_tests.response_bytes", + "known_tests.response_tests", + "library_entrypoint.abort", + "library_entrypoint.abort.integration", + "library_entrypoint.abort.runtime", + "library_entrypoint.complete", + "library_entrypoint.error", + "library_entrypoint.injector.error", + "logs_created", + "manual_api_events", + "otel.env.hiding", + "otel.env.invalid", + "otel.env.unsupported", + "profile_api.bytes", + "profile_api.errors", + "profile_api.ms", + "profile_api.requests", + "profile_api.responses", + "rasp.rule.eval", + "rasp.rule.match", + "rasp.timeout", + "request.tainted", + "server.active_sessions", + "server.memory_usage", + "server.submitted_payloads", + "span.start", + "span_pointer_calculation", + "span_pointer_calculation.issue", + "spans_created", + "spans_dropped", + "spans_enqueued_for_serialization", + "spans_finished", + "ssi_heuristic.number_of_profiles", + "ssi_heuristic.number_of_runtime_id", + "stats_api.bytes", + "stats_api.errors", + "stats_api.ms", + "stats_api.requests", + "stats_api.responses", + "stats_buckets", + "suppressed.vulnerabilities", + "telemetry_api.bytes", + "telemetry_api.errors", + "telemetry_api.ms", + "telemetry_api.requests", + "telemetry_api.responses", + "test_session", + "trace_api.bytes", + "trace_api.errors", + "trace_api.ms", + "trace_api.requests", + "trace_api.responses", + "trace_chunk_serialization.bytes", + "trace_chunk_serialization.ms", + "trace_chunk_size", + "trace_chunks_dropped", + "trace_chunks_enqueued", + "trace_chunks_enqueued_for_serialization", + "trace_chunks_sent", + "trace_partial_flush.count", + "trace_partial_flush.spans_closed", + "trace_partial_flush.spans_remaining", + "trace_segments_closed", + "trace_segments_created", + "waf.config_errors", + "waf.duration", + "waf.duration_ext", + "waf.init", + "waf.input_truncated", + "waf.requests", + "waf.truncated_value_size", + "waf.updates" +] diff --git a/internal/newtelemetry/internal/metrics/generator/generator.go b/internal/newtelemetry/internal/metrics/generator/generator.go new file mode 100644 index 0000000000..419c66f009 --- /dev/null +++ b/internal/newtelemetry/internal/metrics/generator/generator.go @@ -0,0 +1,143 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package main + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "flag" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + + "golang.org/x/exp/slices" +) + +// This represents the base64-encoded URL of api.github.com to download the configuration file. +// This can be easily decoded manually, but it is encoded to prevent the URL from being scanned by bots. +const ( + commonMetricsURL = "aHR0cHM6Ly9hcGkuZ2l0aHViLmNvbS9yZXBvcy9EYXRhRG9nL2RkLWdvL2NvbnRlbnRzL3RyYWNlL2FwcHMvdHJhY2VyLXRlbGVtZXRyeS1pbnRha2UvdGVsZW1ldHJ5LW1ldHJpY3Mvc3RhdGljL2NvbW1vbl9tZXRyaWNzLmpzb24=" + goMetricsURL = "aHR0cHM6Ly9hcGkuZ2l0aHViLmNvbS9yZXBvcy9EYXRhRG9nL2RkLWdvL2NvbnRlbnRzL3RyYWNlL2FwcHMvdHJhY2VyLXRlbGVtZXRyeS1pbnRha2UvdGVsZW1ldHJ5LW1ldHJpY3Mvc3RhdGljL2dvbGFuZ19tZXRyaWNzLmpzb24=" +) + +func base64Decode(encoded string) string { + decoded, _ := io.ReadAll(base64.NewDecoder(base64.StdEncoding, strings.NewReader(encoded))) + return string(decoded) +} + +func downloadFromDdgo(remoteURL, localPath, branch, token string, getMetricNames func(map[string]any) []string) error { + request, err := http.NewRequest(http.MethodGet, remoteURL, nil) + if err != nil { + return err + } + + // Following the documentation described here: + // https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28 + + request.Header.Add("Authorization", "Bearer "+token) + request.Header.Add("Accept", "application/vnd.github.v3.raw") + request.Header.Add("X-GitHub-Api-Version", "2022-11-28") + request.URL.Query().Add("ref", branch) + + response, err := http.DefaultClient.Do(request) + if err != nil { + return err + } + + defer response.Body.Close() + + if response.StatusCode != http.StatusOK { + return fmt.Errorf("unexpected status code: %s", response.Status) + } + + var decoded map[string]any + + if err := json.NewDecoder(response.Body).Decode(&decoded); err != nil { + return err + } + + metricNames := getMetricNames(decoded) + slices.SortStableFunc(metricNames, strings.Compare) + + fp, err := os.OpenFile(localPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + return err + } + + defer fp.Close() + + encoder := json.NewEncoder(fp) + encoder.SetIndent("", " ") + return encoder.Encode(metricNames) +} + +func getCommonMetricNames(input map[string]any) []string { + var names []string + for category, value := range input { + if strings.HasPrefix(category, "$") { + continue + } + + metrics := value.(map[string]any) + for metricKey := range metrics { + names = append(names, metricKey) + } + } + return names +} + +func getGoMetricNames(input map[string]any) []string { + var names []string + for key := range input { + if strings.HasPrefix(key, "$") { + continue + } + names = append(names, key) + } + return names +} + +func main() { + branch := flag.String("branch", "prod", "The branch to get the configuration from") + flag.Parse() + + githubToken := os.Getenv("GITHUB_TOKEN") + if githubToken == "" { + if _, err := exec.LookPath("gh"); err != nil { + fmt.Println("Please specify a GITHUB_TOKEN environment variable or install the GitHub CLI.") + os.Exit(2) + } + + var buf bytes.Buffer + cmd := exec.Command("gh", "auth", "token") + cmd.Stdout = &buf + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + fmt.Println("Failed to run `gh auth token`:", err) + os.Exit(1) + } + + githubToken = strings.TrimSpace(buf.String()) + } + + _, thisFile, _, _ := runtime.Caller(0) + dir := filepath.Dir(thisFile) + if err := downloadFromDdgo(base64Decode(commonMetricsURL), filepath.Join(dir, "..", "common_metrics.json"), *branch, githubToken, getCommonMetricNames); err != nil { + fmt.Println("Failed to download common metrics:", err) + os.Exit(1) + } + + if err := downloadFromDdgo(base64Decode(goMetricsURL), filepath.Join(dir, "..", "golang_metrics.json"), *branch, githubToken, getGoMetricNames); err != nil { + fmt.Println("Failed to download golang metrics:", err) + os.Exit(1) + } +} diff --git a/internal/newtelemetry/internal/metrics/golang_metrics.json b/internal/newtelemetry/internal/metrics/golang_metrics.json new file mode 100644 index 0000000000..54396b6e4a --- /dev/null +++ b/internal/newtelemetry/internal/metrics/golang_metrics.json @@ -0,0 +1,3 @@ +[ + "orchestrion.enabled" +] diff --git a/internal/newtelemetry/internal/metrics/known_metrics.go b/internal/newtelemetry/internal/metrics/known_metrics.go new file mode 100644 index 0000000000..b5e45c61e7 --- /dev/null +++ b/internal/newtelemetry/internal/metrics/known_metrics.go @@ -0,0 +1,53 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +//go:generate go run gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/metrics/generator + +package metrics + +import ( + _ "embed" + "encoding/json" + "slices" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" +) + +//go:embed common_metrics.json +var commonMetricsJSON []byte + +//go:embed golang_metrics.json +var golangMetricsJSON []byte + +var ( + commonMetrics = parseMetricNames(commonMetricsJSON) + golangMetrics = parseMetricNames(golangMetricsJSON) +) + +func parseMetricNames(bytes []byte) []string { + var names []string + if err := json.Unmarshal(bytes, &names); err != nil { + log.Error("telemetry: failed to parse metric names: %v", err) + } + return names +} + +// IsKnownMetricName returns true if the given metric name is a known metric by the backend +// This is linked to generated common_metrics.json file and golang_metrics.json file. If you added new metrics to the backend, you should rerun the generator. +func IsKnownMetricName(name string) bool { + return slices.Contains(commonMetrics, name) || slices.Contains(golangMetrics, name) +} + +// IsCommonMetricName returns true if the given metric name is a known common (cross-language) metric by the backend +// This is linked to the generated common_metrics.json file. If you added new metrics to the backend, you should rerun the generator. +func IsCommonMetricName(name string) bool { + return slices.Contains(commonMetrics, name) +} + +// IsLanguageMetricName returns true if the given metric name is a known Go language metric by the backend +// This is linked to the generated golang_metrics.json file. If you added new metrics to the backend, you should rerun the generator. +func IsLanguageMetricName(name string) bool { + return slices.Contains(golangMetrics, name) +} From 4cd8b71552fb8208be88e91f57a84fdd003385a4 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Tue, 28 Jan 2025 15:32:12 +0100 Subject: [PATCH 26/61] rename metrics package to knownmetrics + added a new TypedSyncMap wrapper over sync.Map + added new metrics data source + added support for count metrics end 2 end Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 3 +- internal/newtelemetry/client.go | 23 +++-- internal/newtelemetry/globalclient.go | 20 ++-- .../common_metrics.json | 0 .../generator/generator.go | 0 .../golang_metrics.json | 0 .../known_metrics.go | 2 +- .../internal/transport/generate-metrics.go | 5 +- .../newtelemetry/internal/typedsyncmap.go | 53 +++++++++++ internal/newtelemetry/logger.go | 24 +++-- internal/newtelemetry/metrics.go | 92 +++++++++++++++++++ 11 files changed, 184 insertions(+), 38 deletions(-) rename internal/newtelemetry/internal/{metrics => knownmetrics}/common_metrics.json (100%) rename internal/newtelemetry/internal/{metrics => knownmetrics}/generator/generator.go (100%) rename internal/newtelemetry/internal/{metrics => knownmetrics}/golang_metrics.json (100%) rename internal/newtelemetry/internal/{metrics => knownmetrics}/known_metrics.go (98%) create mode 100644 internal/newtelemetry/internal/typedsyncmap.go create mode 100644 internal/newtelemetry/metrics.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index 02fc7caf58..c3dfb08936 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -8,6 +8,7 @@ package newtelemetry import ( "io" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) @@ -21,7 +22,7 @@ import ( type MetricHandle interface { Submit(value float64) - flush() + payload() transport.MetricData } // Integration is an integration that is configured to be traced. diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 630628246c..af06c9415d 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -61,6 +61,9 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client dependencies: dependencies{ DependencyLoader: config.DependencyLoader, }, + metrics: metrics{ + skipAllowlist: config.Debug, + }, } client.dataSources = append(client.dataSources, @@ -74,6 +77,10 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client client.dataSources = append(client.dataSources, &client.logger) } + if config.MetricsEnabled { + client.dataSources = append(client.dataSources, &client.metrics) + } + client.flushTicker = internal.NewTicker(func() { client.Flush() }, config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) @@ -92,6 +99,7 @@ type client struct { configuration configuration dependencies dependencies logger logger + metrics metrics dataSources []interface { Payload() transport.Payload } @@ -118,19 +126,16 @@ func (c *client) MarkIntegrationAsLoaded(integration Integration) { c.integrations.Add(integration) } -func (c *client) Count(_ types.Namespace, _ string, _ map[string]string) MetricHandle { - //TODO implement me - panic("implement me") +func (c *client) Count(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + return c.metrics.LoadOrStore(namespace, transport.CountMetric, name, tags) } -func (c *client) Rate(_ types.Namespace, _ string, _ map[string]string) MetricHandle { - //TODO implement me - panic("implement me") +func (c *client) Rate(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + return c.metrics.LoadOrStore(namespace, transport.RateMetric, name, tags) } -func (c *client) Gauge(_ types.Namespace, _ string, _ map[string]string) MetricHandle { - //TODO implement me - panic("implement me") +func (c *client) Gauge(namespace types.Namespace, name string, tags map[string]string) MetricHandle { + return c.metrics.LoadOrStore(namespace, transport.GaugeMetric, name, tags) } func (c *client) Distribution(_ types.Namespace, _ string, _ map[string]string) MetricHandle { diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index e7c0472a78..2a4d153788 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -11,6 +11,7 @@ import ( globalinternal "gopkg.in/DataDog/dd-trace-go.v1/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) @@ -82,7 +83,7 @@ func Disabled() bool { // Count will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. func Count(namespace types.Namespace, name string, tags map[string]string) MetricHandle { - return newMetric(func(client Client) MetricHandle { + return globalClientNewMetric(func(client Client) MetricHandle { return client.Count(namespace, name, tags) }) } @@ -91,7 +92,7 @@ func Count(namespace types.Namespace, name string, tags map[string]string) Metri // Rate will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. func Rate(namespace types.Namespace, name string, tags map[string]string) MetricHandle { - return newMetric(func(client Client) MetricHandle { + return globalClientNewMetric(func(client Client) MetricHandle { return client.Rate(namespace, name, tags) }) } @@ -100,7 +101,7 @@ func Rate(namespace types.Namespace, name string, tags map[string]string) Metric // Gauge will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. func Gauge(namespace types.Namespace, name string, tags map[string]string) MetricHandle { - return newMetric(func(client Client) MetricHandle { + return globalClientNewMetric(func(client Client) MetricHandle { return client.Gauge(namespace, name, tags) }) } @@ -109,12 +110,12 @@ func Gauge(namespace types.Namespace, name string, tags map[string]string) Metri // Distribution will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. func Distribution(namespace types.Namespace, name string, tags map[string]string) MetricHandle { - return newMetric(func(client Client) MetricHandle { + return globalClientNewMetric(func(client Client) MetricHandle { return client.Distribution(namespace, name, tags) }) } -func newMetric(maker func(client Client) MetricHandle) MetricHandle { +func globalClientNewMetric(maker func(client Client) MetricHandle) MetricHandle { if Disabled() { // Act as a noop if telemetry is disabled return &metricsHotPointer{} @@ -229,13 +230,8 @@ func (t *metricsHotPointer) Submit(value float64) { (*inner).Submit(value) } -func (t *metricsHotPointer) flush() { - inner := t.ptr.Load() - if inner == nil || *inner == nil { - return - } - - (*inner).flush() +func (t *metricsHotPointer) payload() transport.MetricData { + return transport.MetricData{} } func (t *metricsHotPointer) swap(handle MetricHandle) { diff --git a/internal/newtelemetry/internal/metrics/common_metrics.json b/internal/newtelemetry/internal/knownmetrics/common_metrics.json similarity index 100% rename from internal/newtelemetry/internal/metrics/common_metrics.json rename to internal/newtelemetry/internal/knownmetrics/common_metrics.json diff --git a/internal/newtelemetry/internal/metrics/generator/generator.go b/internal/newtelemetry/internal/knownmetrics/generator/generator.go similarity index 100% rename from internal/newtelemetry/internal/metrics/generator/generator.go rename to internal/newtelemetry/internal/knownmetrics/generator/generator.go diff --git a/internal/newtelemetry/internal/metrics/golang_metrics.json b/internal/newtelemetry/internal/knownmetrics/golang_metrics.json similarity index 100% rename from internal/newtelemetry/internal/metrics/golang_metrics.json rename to internal/newtelemetry/internal/knownmetrics/golang_metrics.json diff --git a/internal/newtelemetry/internal/metrics/known_metrics.go b/internal/newtelemetry/internal/knownmetrics/known_metrics.go similarity index 98% rename from internal/newtelemetry/internal/metrics/known_metrics.go rename to internal/newtelemetry/internal/knownmetrics/known_metrics.go index b5e45c61e7..fbd3216785 100644 --- a/internal/newtelemetry/internal/metrics/known_metrics.go +++ b/internal/newtelemetry/internal/knownmetrics/known_metrics.go @@ -5,7 +5,7 @@ //go:generate go run gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/metrics/generator -package metrics +package knownmetrics import ( _ "embed" diff --git a/internal/newtelemetry/internal/transport/generate-metrics.go b/internal/newtelemetry/internal/transport/generate-metrics.go index 344f483ff5..f6e85aec8e 100644 --- a/internal/newtelemetry/internal/transport/generate-metrics.go +++ b/internal/newtelemetry/internal/transport/generate-metrics.go @@ -37,9 +37,10 @@ const ( type MetricData struct { Metric string `json:"metric"` // Points stores pairs of timestamps and values - Points [][2]float64 `json:"points"` + // This first value should be an int64 timestamp and the second should be a float64 value + Points [][2]any `json:"points"` // Interval is required only for gauge and rate metrics - Interval int `json:"interval,omitempty"` + Interval int64 `json:"interval,omitempty"` // Type cannot be of type distribution because there is a different payload for it Type MetricType `json:"type,omitempty"` Tags []string `json:"tags,omitempty"` diff --git a/internal/newtelemetry/internal/typedsyncmap.go b/internal/newtelemetry/internal/typedsyncmap.go new file mode 100644 index 0000000000..14f9c97682 --- /dev/null +++ b/internal/newtelemetry/internal/typedsyncmap.go @@ -0,0 +1,53 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "sync" +) + +type TypedSyncMap[K comparable, V any] struct { + sync.Map +} + +func (m *TypedSyncMap[K, V]) Load(key K) (value V, ok bool) { + v, ok := m.Map.Load(key) + if !ok { + return + } + value = v.(V) + return +} + +func (m *TypedSyncMap[K, V]) Range(f func(key K, value V) bool) { + m.Map.Range(func(key, value interface{}) bool { + return f(key.(K), value.(V)) + }) +} + +func (m *TypedSyncMap[K, V]) Len() int { + count := 0 + m.Map.Range(func(_, _ interface{}) bool { + count++ + return true + }) + return count +} + +func (m *TypedSyncMap[K, V]) LoadOrStore(key K, value V) (actual V, loaded bool) { + v, loaded := m.Map.LoadOrStore(key, value) + actual = v.(V) + return +} + +func (m *TypedSyncMap[K, V]) LoadAndDelete(key K) (value V, loaded bool) { + v, loaded := m.Map.LoadAndDelete(key) + if !loaded { + return + } + value = v.(V) + return +} diff --git a/internal/newtelemetry/logger.go b/internal/newtelemetry/logger.go index f250f8d077..7a13a50eed 100644 --- a/internal/newtelemetry/logger.go +++ b/internal/newtelemetry/logger.go @@ -8,10 +8,10 @@ package newtelemetry import ( "runtime" "strings" - "sync" "sync/atomic" "time" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) @@ -68,13 +68,13 @@ type loggerValue struct { } type logger struct { - store atomic.Pointer[sync.Map] + store atomic.Pointer[internal.TypedSyncMap[loggerKey, *loggerValue]] } func (logger *logger) Add(level LogLevel, text string, opts ...LogOption) { store := logger.store.Load() if store == nil { - store = new(sync.Map) + store = new(internal.TypedSyncMap[loggerKey, *loggerValue]) for logger.store.CompareAndSwap(nil, store) { continue } @@ -91,7 +91,7 @@ func (logger *logger) Add(level LogLevel, text string, opts ...LogOption) { val, ok := store.Load(key) if ok { - val.(*loggerValue).count.Add(1) + val.count.Add(1) return } @@ -114,16 +114,14 @@ func (logger *logger) Payload() transport.Payload { } var logs []transport.LogMessage - store.Range(func(key, value any) bool { - k := key.(loggerKey) - v := value.(*loggerValue) + store.Range(func(key loggerKey, value *loggerValue) bool { logs = append(logs, transport.LogMessage{ - Message: k.message, - Level: k.level, - Tags: k.tags, - Count: v.count.Load(), - StackTrace: v.stacktrace, - TracerTime: v.time, + Message: key.message, + Level: key.level, + Tags: key.tags, + Count: value.count.Load(), + StackTrace: value.stacktrace, + TracerTime: value.time, }) return true }) diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go new file mode 100644 index 0000000000..34b0f64c8d --- /dev/null +++ b/internal/newtelemetry/metrics.go @@ -0,0 +1,92 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "strings" + "time" + + "go.uber.org/atomic" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/knownmetrics" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" +) + +type metricKey struct { + namespace types.Namespace + kind transport.MetricType + name string + tags string +} + +type metrics struct { + store internal.TypedSyncMap[metricKey, MetricHandle] + skipAllowlist bool // Debugging feature to skip the allowlist of known metrics +} + +// LoadOrStore returns a MetricHandle for the given metric key. If the metric key does not exist, it will be created. +func (m *metrics) LoadOrStore(namespace types.Namespace, kind transport.MetricType, name string, tags map[string]string) MetricHandle { + compiledTags := "" + for k, v := range tags { + compiledTags += k + ":" + v + "," + } + key := metricKey{namespace: namespace, kind: kind, name: name, tags: strings.TrimSuffix(compiledTags, ",")} + handle, _ := m.store.LoadOrStore(key, newMetric(key)) + return handle +} + +func newMetric(key metricKey) MetricHandle { + switch key.kind { + case transport.CountMetric: + return &count{key: key} + default: + panic("unsupported metric type: " + key.kind) + } +} + +func (m *metrics) Payload() transport.Payload { + series := make([]transport.MetricData, 0, m.store.Len()) + m.store.Range(func(_ metricKey, handle MetricHandle) bool { + if payload := handle.payload(); payload.Metric != "" { + series = append(series, payload) + } + return true + }) + return transport.GenerateMetrics{Series: series, SkipAllowlist: m.skipAllowlist} +} + +type count struct { + key metricKey + submit atomic.Bool + value atomic.Float64 + timestamp atomic.Int64 +} + +func (c *count) Submit(value float64) { + // There is kind-of a race condition here, but it's not a big deal, as the value and the timestamp will be sufficiently close together + c.submit.Store(true) + c.value.Add(value) + c.timestamp.Store(time.Now().Unix()) +} + +func (c *count) payload() transport.MetricData { + if submit := c.submit.Swap(false); !submit { + return transport.MetricData{} + } + + return transport.MetricData{ + Metric: c.key.name, + Namespace: c.key.namespace, + Tags: strings.Split(c.key.tags, ","), + Type: c.key.kind, + Common: knownmetrics.IsCommonMetricName(c.key.name), + Points: [][2]any{ + {c.timestamp.Load(), c.value.Load()}, + }, + } +} From 7487c90345f976d54537be11d3636734a96aac44 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Tue, 28 Jan 2025 17:56:42 +0100 Subject: [PATCH 27/61] moved the Origin type and Namespace type to the transport package, removed the types package and added aliases for both types in the newtelemetry packag. And finished implementing gauge and rate metrics Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 43 +++++-- internal/newtelemetry/client.go | 19 ++- internal/newtelemetry/client_test.go | 63 +++++++--- internal/newtelemetry/configuration.go | 3 +- internal/newtelemetry/globalclient.go | 19 ++- .../internal/knownmetrics/common_metrics.json | 3 + .../knownmetrics/generator/generator.go | 7 +- .../internal/knownmetrics/known_metrics.go | 2 +- .../internal/transport/app_product_change.go | 6 +- .../internal/transport/conf_key_value.go | 12 +- .../internal/transport/distributions.go | 4 +- .../internal/transport/generate-metrics.go | 14 +-- .../transport}/namespace.go | 6 +- .../newtelemetry/internal/transport/origin.go | 17 +++ internal/newtelemetry/metrics.go | 117 ++++++++++++++---- internal/newtelemetry/product.go | 7 +- internal/newtelemetry/types/origin.go | 46 ------- 17 files changed, 235 insertions(+), 153 deletions(-) rename internal/newtelemetry/{types => internal/transport}/namespace.go (74%) create mode 100644 internal/newtelemetry/internal/transport/origin.go delete mode 100644 internal/newtelemetry/types/origin.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index c3dfb08936..fdc3c57e6e 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -9,7 +9,30 @@ import ( "io" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" +) + +// Namespace describes a product to distinguish telemetry coming from +// different products used by the same application +type Namespace = transport.Namespace + +const ( + NamespaceGeneral = transport.NamespaceGeneral + NamespaceTracers = transport.NamespaceTracers + NamespaceProfilers = transport.NamespaceProfilers + NamespaceAppSec = transport.NamespaceAppSec + NamespaceIAST = transport.NamespaceIAST + NamespaceTelemetry = transport.NamespaceTelemetry +) + +// Origin describes the source of a configuration change +type Origin = transport.Origin + +const ( + OriginDefault = transport.OriginDefault + OriginCode = transport.OriginCode + OriginDDConfig = transport.OriginDDConfig + OriginEnvVar = transport.OriginEnvVar + OriginRemoteConfig = transport.OriginRemoteConfig ) // MetricHandle can be used to submit different values for the same metric. @@ -43,37 +66,37 @@ type Client interface { io.Closer // Count creates a new metric handle for the given parameters that can be used to submit values. - Count(namespace types.Namespace, name string, tags map[string]string) MetricHandle + Count(namespace Namespace, name string, tags map[string]string) MetricHandle // Rate creates a new metric handle for the given parameters that can be used to submit values. - Rate(namespace types.Namespace, name string, tags map[string]string) MetricHandle + Rate(namespace Namespace, name string, tags map[string]string) MetricHandle // Gauge creates a new metric handle for the given parameters that can be used to submit values. - Gauge(namespace types.Namespace, name string, tags map[string]string) MetricHandle + Gauge(namespace Namespace, name string, tags map[string]string) MetricHandle // Distribution creates a new metric handle for the given parameters that can be used to submit values. - Distribution(namespace types.Namespace, name string, tags map[string]string) MetricHandle + Distribution(namespace Namespace, name string, tags map[string]string) MetricHandle // Log sends a telemetry log at the desired level with the given text and options. // Options include sending key-value pairs as tags, and a stack trace frozen from inside the Log function. Log(level LogLevel, text string, options ...LogOption) // ProductStarted declares a product to have started at the customer’s request - ProductStarted(product types.Namespace) + ProductStarted(product Namespace) // ProductStopped declares a product to have being stopped by the customer - ProductStopped(product types.Namespace) + ProductStopped(product Namespace) // ProductStartError declares that a product could not start because of the following error - ProductStartError(product types.Namespace, err error) + ProductStartError(product Namespace, err error) // AddAppConfig adds a key value pair to the app configuration and send the change to telemetry // value has to be json serializable and the origin is the source of the change. - AddAppConfig(key string, value any, origin types.Origin) + AddAppConfig(key string, value any, origin Origin) // AddBulkAppConfig adds a list of key value pairs to the app configuration and sends the change to telemetry. // Same as AddAppConfig but for multiple values. - AddBulkAppConfig(kvs map[string]any, origin types.Origin) + AddBulkAppConfig(kvs map[string]any, origin Origin) // MarkIntegrationAsLoaded marks an integration as loaded in the telemetry MarkIntegrationAsLoaded(integration Integration) diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index af06c9415d..a0d99b242d 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -13,7 +13,6 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/mapper" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) // NewClient creates a new telemetry client with the given service, environment, and version and config. @@ -126,40 +125,40 @@ func (c *client) MarkIntegrationAsLoaded(integration Integration) { c.integrations.Add(integration) } -func (c *client) Count(namespace types.Namespace, name string, tags map[string]string) MetricHandle { +func (c *client) Count(namespace Namespace, name string, tags map[string]string) MetricHandle { return c.metrics.LoadOrStore(namespace, transport.CountMetric, name, tags) } -func (c *client) Rate(namespace types.Namespace, name string, tags map[string]string) MetricHandle { +func (c *client) Rate(namespace Namespace, name string, tags map[string]string) MetricHandle { return c.metrics.LoadOrStore(namespace, transport.RateMetric, name, tags) } -func (c *client) Gauge(namespace types.Namespace, name string, tags map[string]string) MetricHandle { +func (c *client) Gauge(namespace Namespace, name string, tags map[string]string) MetricHandle { return c.metrics.LoadOrStore(namespace, transport.GaugeMetric, name, tags) } -func (c *client) Distribution(_ types.Namespace, _ string, _ map[string]string) MetricHandle { +func (c *client) Distribution(_ Namespace, _ string, _ map[string]string) MetricHandle { //TODO implement me panic("implement me") } -func (c *client) ProductStarted(product types.Namespace) { +func (c *client) ProductStarted(product Namespace) { c.products.Add(product, true, nil) } -func (c *client) ProductStopped(product types.Namespace) { +func (c *client) ProductStopped(product Namespace) { c.products.Add(product, false, nil) } -func (c *client) ProductStartError(product types.Namespace, err error) { +func (c *client) ProductStartError(product Namespace, err error) { c.products.Add(product, false, err) } -func (c *client) AddAppConfig(key string, value any, origin types.Origin) { +func (c *client) AddAppConfig(key string, value any, origin Origin) { c.configuration.Add(key, value, origin) } -func (c *client) AddBulkAppConfig(kvs map[string]any, origin types.Origin) { +func (c *client) AddBulkAppConfig(kvs map[string]any, origin Origin) { for key, value := range kvs { c.configuration.Add(key, value, origin) } diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 0256cf7f87..88b646ed77 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -25,7 +25,6 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" "gopkg.in/DataDog/dd-trace-go.v1/internal/osinfo" "gopkg.in/DataDog/dd-trace-go.v1/internal/version" ) @@ -110,7 +109,7 @@ func TestClientFlush(t *testing.T) { ExtendedHeartbeatInterval: time.Nanosecond, }, when: func(c *client) { - c.AddAppConfig("key", "value", types.OriginDefault) + c.AddAppConfig("key", "value", OriginDefault) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -147,7 +146,7 @@ func TestClientFlush(t *testing.T) { { name: "configuration-default", when: func(c *client) { - c.AddAppConfig("key", "value", types.OriginDefault) + c.AddAppConfig("key", "value", OriginDefault) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -156,13 +155,13 @@ func TestClientFlush(t *testing.T) { assert.Len(t, config.Configuration, 1) assert.Equal(t, config.Configuration[0].Name, "key") assert.Equal(t, config.Configuration[0].Value, "value") - assert.Equal(t, config.Configuration[0].Origin, types.OriginDefault) + assert.Equal(t, config.Configuration[0].Origin, OriginDefault) }, }, { name: "configuration-default", when: func(c *client) { - c.AddAppConfig("key", "value", types.OriginDefault) + c.AddAppConfig("key", "value", OriginDefault) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -171,7 +170,7 @@ func TestClientFlush(t *testing.T) { assert.Len(t, config.Configuration, 1) assert.Equal(t, config.Configuration[0].Name, "key") assert.Equal(t, config.Configuration[0].Value, "value") - assert.Equal(t, config.Configuration[0].Origin, types.OriginDefault) + assert.Equal(t, config.Configuration[0].Origin, OriginDefault) }, }, { @@ -184,7 +183,7 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.AppProductChange{}, payload) productChange := payload.(transport.AppProductChange) assert.Len(t, productChange.Products, 1) - assert.True(t, productChange.Products[types.Namespace("test-product")].Enabled) + assert.True(t, productChange.Products[Namespace("test-product")].Enabled) }, }, { @@ -197,8 +196,8 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.AppProductChange{}, payload) productChange := payload.(transport.AppProductChange) assert.Len(t, productChange.Products, 1) - assert.False(t, productChange.Products[types.Namespace("test-product")].Enabled) - assert.Equal(t, "test-error", productChange.Products[types.Namespace("test-product")].Error.Message) + assert.False(t, productChange.Products[Namespace("test-product")].Enabled) + assert.Equal(t, "test-error", productChange.Products[Namespace("test-product")].Error.Message) }, }, { @@ -211,7 +210,7 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.AppProductChange{}, payload) productChange := payload.(transport.AppProductChange) assert.Len(t, productChange.Products, 1) - assert.False(t, productChange.Products[types.Namespace("test-product")].Enabled) + assert.False(t, productChange.Products[Namespace("test-product")].Enabled) }, }, { @@ -261,7 +260,7 @@ func TestClientFlush(t *testing.T) { case transport.AppProductChange: assert.Equal(t, transport.RequestTypeAppProductChange, payload.RequestType) assert.Len(t, p.Products, 1) - assert.True(t, p.Products[types.Namespace("test-product")].Enabled) + assert.True(t, p.Products[Namespace("test-product")].Enabled) case transport.AppIntegrationChange: assert.Equal(t, transport.RequestTypeAppIntegrationsChange, payload.RequestType) assert.Len(t, p.Integrations, 1) @@ -293,7 +292,7 @@ func TestClientFlush(t *testing.T) { case transport.AppProductChange: assert.Equal(t, transport.RequestTypeAppProductChange, payload.RequestType) assert.Len(t, p.Products, 1) - assert.True(t, p.Products[types.Namespace("test-product")].Enabled) + assert.True(t, p.Products[Namespace("test-product")].Enabled) case transport.AppIntegrationChange: assert.Equal(t, transport.RequestTypeAppIntegrationsChange, payload.RequestType) assert.Len(t, p.Integrations, 1) @@ -332,14 +331,14 @@ func TestClientFlush(t *testing.T) { payload := payloads[0] require.IsType(t, transport.AppStarted{}, payload) appStart := payload.(transport.AppStarted) - assert.Equal(t, appStart.Products[types.Namespace("test-product")].Enabled, true) + assert.Equal(t, appStart.Products[Namespace("test-product")].Enabled, true) }, }, { name: "app-started-with-configuration", when: func(c *client) { c.appStart() - c.AddAppConfig("key", "value", types.OriginDefault) + c.AddAppConfig("key", "value", OriginDefault) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -649,6 +648,42 @@ func TestClientFlush(t *testing.T) { assert.Equal(t, logs.Logs[2].Count, uint32(1)) }, }, + { + name: "simple-count", + when: func(c *client) { + c.Count(NamespaceTracers, "init_time", nil).Submit(1) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 1) + assert.Equal(t, metrics.Series[0].Type, transport.CountMetric) + assert.Equal(t, metrics.Series[0].Namespace, NamespaceTracers) + assert.Equal(t, metrics.Series[0].Metric, "init_time") + assert.Empty(t, metrics.Series[0].Tags) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + assert.Equal(t, metrics.Series[0].Points[0][1], 1.0) + }, + }, + { + name: "simple-gauge", + when: func(c *client) { + c.Gauge(NamespaceTracers, "init_time", nil).Submit(1) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 1) + assert.Equal(t, metrics.Series[0].Type, transport.GaugeMetric) + assert.Equal(t, metrics.Series[0].Namespace, NamespaceTracers) + assert.Equal(t, metrics.Series[0].Metric, "init_time") + assert.Empty(t, metrics.Series[0].Tags) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + assert.Equal(t, metrics.Series[0].Points[0][1], 1.0) + }, + }, } { t.Run(test.name, func(t *testing.T) { t.Parallel() diff --git a/internal/newtelemetry/configuration.go b/internal/newtelemetry/configuration.go index dbf88e5393..1939bf055c 100644 --- a/internal/newtelemetry/configuration.go +++ b/internal/newtelemetry/configuration.go @@ -9,7 +9,6 @@ import ( "sync" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) type configuration struct { @@ -18,7 +17,7 @@ type configuration struct { seqID uint64 } -func (c *configuration) Add(key string, value any, origin types.Origin) { +func (c *configuration) Add(key string, value any, origin Origin) { c.mu.Lock() defer c.mu.Unlock() diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 2a4d153788..edad33e9b2 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -12,7 +12,6 @@ import ( globalinternal "gopkg.in/DataDog/dd-trace-go.v1/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) var ( @@ -82,7 +81,7 @@ func Disabled() bool { // Count creates a new metric handle for the given parameters that can be used to submit values. // Count will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. -func Count(namespace types.Namespace, name string, tags map[string]string) MetricHandle { +func Count(namespace Namespace, name string, tags map[string]string) MetricHandle { return globalClientNewMetric(func(client Client) MetricHandle { return client.Count(namespace, name, tags) }) @@ -91,7 +90,7 @@ func Count(namespace types.Namespace, name string, tags map[string]string) Metri // Rate creates a new metric handle for the given parameters that can be used to submit values. // Rate will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. -func Rate(namespace types.Namespace, name string, tags map[string]string) MetricHandle { +func Rate(namespace Namespace, name string, tags map[string]string) MetricHandle { return globalClientNewMetric(func(client Client) MetricHandle { return client.Rate(namespace, name, tags) }) @@ -100,7 +99,7 @@ func Rate(namespace types.Namespace, name string, tags map[string]string) Metric // Gauge creates a new metric handle for the given parameters that can be used to submit values. // Gauge will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. -func Gauge(namespace types.Namespace, name string, tags map[string]string) MetricHandle { +func Gauge(namespace Namespace, name string, tags map[string]string) MetricHandle { return globalClientNewMetric(func(client Client) MetricHandle { return client.Gauge(namespace, name, tags) }) @@ -109,7 +108,7 @@ func Gauge(namespace types.Namespace, name string, tags map[string]string) Metri // Distribution creates a new metric handle for the given parameters that can be used to submit values. // Distribution will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. -func Distribution(namespace types.Namespace, name string, tags map[string]string) MetricHandle { +func Distribution(namespace Namespace, name string, tags map[string]string) MetricHandle { return globalClientNewMetric(func(client Client) MetricHandle { return client.Distribution(namespace, name, tags) }) @@ -142,7 +141,7 @@ func Log(level LogLevel, text string, options ...LogOption) { // ProductStarted declares a product to have started at the customer’s request. If telemetry is disabled, it will do nothing. // If the telemetry client has not started yet, it will record the action and replay it once the client is started. -func ProductStarted(product types.Namespace) { +func ProductStarted(product Namespace) { globalClientCall(func(client Client) { client.ProductStarted(product) }) @@ -150,7 +149,7 @@ func ProductStarted(product types.Namespace) { // ProductStopped declares a product to have being stopped by the customer. If telemetry is disabled, it will do nothing. // If the telemetry client has not started yet, it will record the action and replay it once the client is started. -func ProductStopped(product types.Namespace) { +func ProductStopped(product Namespace) { globalClientCall(func(client Client) { client.ProductStopped(product) }) @@ -158,7 +157,7 @@ func ProductStopped(product types.Namespace) { // ProductStartError declares that a product could not start because of the following error. If telemetry is disabled, it will do nothing. // If the telemetry client has not started yet, it will record the action and replay it once the client is started. -func ProductStartError(product types.Namespace, err error) { +func ProductStartError(product Namespace, err error) { globalClientCall(func(client Client) { client.ProductStartError(product, err) }) @@ -167,7 +166,7 @@ func ProductStartError(product types.Namespace, err error) { // AddAppConfig adds a key value pair to the app configuration and send the change to telemetry // value has to be json serializable and the origin is the source of the change. If telemetry is disabled, it will do nothing. // If the telemetry client has not started yet, it will record the action and replay it once the client is started. -func AddAppConfig(key string, value any, origin types.Origin) { +func AddAppConfig(key string, value any, origin Origin) { globalClientCall(func(client Client) { client.AddAppConfig(key, value, origin) }) @@ -176,7 +175,7 @@ func AddAppConfig(key string, value any, origin types.Origin) { // AddBulkAppConfig adds a list of key value pairs to the app configuration and sends the change to telemetry. // Same as AddAppConfig but for multiple values. If telemetry is disabled, it will do nothing. // If the telemetry client has not started yet, it will record the action and replay it once the client is started. -func AddBulkAppConfig(kvs map[string]any, origin types.Origin) { +func AddBulkAppConfig(kvs map[string]any, origin Origin) { globalClientCall(func(client Client) { client.AddBulkAppConfig(kvs, origin) }) diff --git a/internal/newtelemetry/internal/knownmetrics/common_metrics.json b/internal/newtelemetry/internal/knownmetrics/common_metrics.json index 56568f15e9..f8d2dbfcea 100644 --- a/internal/newtelemetry/internal/knownmetrics/common_metrics.json +++ b/internal/newtelemetry/internal/knownmetrics/common_metrics.json @@ -125,6 +125,8 @@ "server.memory_usage", "server.submitted_payloads", "span.start", + "span_created", + "span_finished", "span_pointer_calculation", "span_pointer_calculation.issue", "spans_created", @@ -163,6 +165,7 @@ "trace_partial_flush.spans_remaining", "trace_segments_closed", "trace_segments_created", + "tracer_init_time", "waf.config_errors", "waf.duration", "waf.duration_ext", diff --git a/internal/newtelemetry/internal/knownmetrics/generator/generator.go b/internal/newtelemetry/internal/knownmetrics/generator/generator.go index 419c66f009..031cddecdc 100644 --- a/internal/newtelemetry/internal/knownmetrics/generator/generator.go +++ b/internal/newtelemetry/internal/knownmetrics/generator/generator.go @@ -88,8 +88,13 @@ func getCommonMetricNames(input map[string]any) []string { } metrics := value.(map[string]any) - for metricKey := range metrics { + for metricKey, value := range metrics { names = append(names, metricKey) + if aliases, ok := value.(map[string]any)["aliases"]; ok { + for _, alias := range aliases.([]any) { + names = append(names, alias.(string)) + } + } } } return names diff --git a/internal/newtelemetry/internal/knownmetrics/known_metrics.go b/internal/newtelemetry/internal/knownmetrics/known_metrics.go index fbd3216785..858c1918ac 100644 --- a/internal/newtelemetry/internal/knownmetrics/known_metrics.go +++ b/internal/newtelemetry/internal/knownmetrics/known_metrics.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2025 Datadog, Inc. -//go:generate go run gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/metrics/generator +//go:generate go run gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/knownmetrics/generator package knownmetrics diff --git a/internal/newtelemetry/internal/transport/app_product_change.go b/internal/newtelemetry/internal/transport/app_product_change.go index f52893a96a..dcbf2ec674 100644 --- a/internal/newtelemetry/internal/transport/app_product_change.go +++ b/internal/newtelemetry/internal/transport/app_product_change.go @@ -5,15 +5,11 @@ package transport -import ( - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" -) - type AppProductChange struct { Products Products `json:"products"` } -type Products map[types.Namespace]Product +type Products map[Namespace]Product func (AppProductChange) RequestType() RequestType { return RequestTypeAppProductChange diff --git a/internal/newtelemetry/internal/transport/conf_key_value.go b/internal/newtelemetry/internal/transport/conf_key_value.go index deab25d2f3..70778c7dbd 100644 --- a/internal/newtelemetry/internal/transport/conf_key_value.go +++ b/internal/newtelemetry/internal/transport/conf_key_value.go @@ -5,16 +5,12 @@ package transport -import ( - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" -) - // ConfKeyValue is a library-specific configuration value type ConfKeyValue struct { - Name string `json:"name"` - Value any `json:"value"` // can be any type of integer, float, string, or boolean - Origin types.Origin `json:"origin"` - Error Error `json:"error,omitempty"` + Name string `json:"name"` + Value any `json:"value"` // can be any type of integer, float, string, or boolean + Origin Origin `json:"origin"` + Error Error `json:"error,omitempty"` // SeqID is used to track the total number of configuration key value pairs applied across the tracer SeqID uint64 `json:"seq_id,omitempty"` diff --git a/internal/newtelemetry/internal/transport/distributions.go b/internal/newtelemetry/internal/transport/distributions.go index 2cac29b4d6..fcd11a86db 100644 --- a/internal/newtelemetry/internal/transport/distributions.go +++ b/internal/newtelemetry/internal/transport/distributions.go @@ -6,7 +6,7 @@ package transport import ( - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" ) // All objects in this file are used to define the payload of the requests sent @@ -14,7 +14,7 @@ import ( // https://github.com/DataDog/instrumentation-telemetry-api-docs/tree/dad49961203d74ec8236b68ce4b54bbb7ed8716f/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Schemas type Distributions struct { - Namespace types.Namespace `json:"namespace"` + Namespace Namespace `json:"namespace"` Series []DistributionSeries `json:"series"` SkipAllowlist bool `json:"skip_allowlist,omitempty"` } diff --git a/internal/newtelemetry/internal/transport/generate-metrics.go b/internal/newtelemetry/internal/transport/generate-metrics.go index f6e85aec8e..1c0686a8be 100644 --- a/internal/newtelemetry/internal/transport/generate-metrics.go +++ b/internal/newtelemetry/internal/transport/generate-metrics.go @@ -5,18 +5,14 @@ package transport -import ( - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" -) - // All objects in this file are used to define the payload of the requests sent // to the telemetry API. // https://github.com/DataDog/instrumentation-telemetry-api-docs/tree/dad49961203d74ec8236b68ce4b54bbb7ed8716f/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Schemas type GenerateMetrics struct { - Namespace types.Namespace `json:"namespace,omitempty"` - Series []MetricData `json:"series"` - SkipAllowlist bool `json:"skip_allowlist,omitempty"` + Namespace Namespace `json:"namespace,omitempty"` + Series []MetricData `json:"series"` + SkipAllowlist bool `json:"skip_allowlist,omitempty"` } func (GenerateMetrics) RequestType() RequestType { @@ -51,6 +47,6 @@ type MetricData struct { // NOTE: If this field isn't present in the request, the API assumes // the metric is common. So we can't "omitempty" even though the // field is technically optional. - Common bool `json:"common"` - Namespace types.Namespace `json:"namespace,omitempty"` + Common bool `json:"common"` + Namespace Namespace `json:"namespace,omitempty"` } diff --git a/internal/newtelemetry/types/namespace.go b/internal/newtelemetry/internal/transport/namespace.go similarity index 74% rename from internal/newtelemetry/types/namespace.go rename to internal/newtelemetry/internal/transport/namespace.go index c86453ecea..9c4dfe1f37 100644 --- a/internal/newtelemetry/types/namespace.go +++ b/internal/newtelemetry/internal/transport/namespace.go @@ -1,12 +1,10 @@ // Unless explicitly stated otherwise all files in this repository are licensed // under the Apache License Version 2.0. // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2024 Datadog, Inc. +// Copyright 2025 Datadog, Inc. -package types +package transport -// Namespace describes an APM product to distinguish telemetry coming from -// different products used by the same application type Namespace string const ( diff --git a/internal/newtelemetry/internal/transport/origin.go b/internal/newtelemetry/internal/transport/origin.go new file mode 100644 index 0000000000..b12ee41d72 --- /dev/null +++ b/internal/newtelemetry/internal/transport/origin.go @@ -0,0 +1,17 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package transport + +// Origin describes the source of a configuration change +type Origin string + +const ( + OriginDefault Origin = "default" + OriginCode = "code" + OriginDDConfig = "dd_config" + OriginEnvVar = "env_var" + OriginRemoteConfig = "remote_config" +) diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go index 34b0f64c8d..d5079468df 100644 --- a/internal/newtelemetry/metrics.go +++ b/internal/newtelemetry/metrics.go @@ -11,14 +11,14 @@ import ( "go.uber.org/atomic" + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/knownmetrics" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) type metricKey struct { - namespace types.Namespace + namespace Namespace kind transport.MetricType name string tags string @@ -30,23 +30,35 @@ type metrics struct { } // LoadOrStore returns a MetricHandle for the given metric key. If the metric key does not exist, it will be created. -func (m *metrics) LoadOrStore(namespace types.Namespace, kind transport.MetricType, name string, tags map[string]string) MetricHandle { +func (m *metrics) LoadOrStore(namespace Namespace, kind transport.MetricType, name string, tags map[string]string) MetricHandle { + if !knownmetrics.IsKnownMetricName(name) { + log.Debug("telemetry: metric name %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ + "The metric will still be sent.", name) + } + compiledTags := "" for k, v := range tags { compiledTags += k + ":" + v + "," } - key := metricKey{namespace: namespace, kind: kind, name: name, tags: strings.TrimSuffix(compiledTags, ",")} - handle, _ := m.store.LoadOrStore(key, newMetric(key)) - return handle -} + var ( + key = metricKey{namespace: namespace, kind: kind, name: name, tags: strings.TrimSuffix(compiledTags, ",")} + handle MetricHandle + loaded bool + ) -func newMetric(key metricKey) MetricHandle { - switch key.kind { + switch kind { case transport.CountMetric: - return &count{key: key} - default: - panic("unsupported metric type: " + key.kind) + handle, _ = m.store.LoadOrStore(key, &count{metric: metric{key: key}}) + case transport.GaugeMetric: + handle, _ = m.store.LoadOrStore(key, &gauge{metric: metric{key: key}}) + case transport.RateMetric: + handle, loaded = m.store.LoadOrStore(key, &rate{metric: metric{key: key}}) + if !loaded { + handle.(*rate).intervalStart = time.Now() + } } + + return handle } func (m *metrics) Payload() transport.Payload { @@ -57,36 +69,87 @@ func (m *metrics) Payload() transport.Payload { } return true }) + + if len(series) == 0 { + return nil + } + return transport.GenerateMetrics{Series: series, SkipAllowlist: m.skipAllowlist} } -type count struct { - key metricKey - submit atomic.Bool - value atomic.Float64 - timestamp atomic.Int64 +type metric struct { + key metricKey + + // Values set during Submit() + newSubmit atomic.Bool + value atomic.Float64 + submitTime atomic.Int64 } -func (c *count) Submit(value float64) { - // There is kind-of a race condition here, but it's not a big deal, as the value and the timestamp will be sufficiently close together - c.submit.Store(true) - c.value.Add(value) - c.timestamp.Store(time.Now().Unix()) +func (c *metric) submit() { + c.newSubmit.Store(true) + c.submitTime.Store(time.Now().Unix()) } -func (c *count) payload() transport.MetricData { - if submit := c.submit.Swap(false); !submit { +func (c *metric) payload() transport.MetricData { + if submit := c.newSubmit.Swap(false); !submit { return transport.MetricData{} } - return transport.MetricData{ + var tags []string + if c.key.tags != "" { + tags = strings.Split(c.key.tags, ",") + } + + data := transport.MetricData{ Metric: c.key.name, Namespace: c.key.namespace, - Tags: strings.Split(c.key.tags, ","), + Tags: tags, Type: c.key.kind, Common: knownmetrics.IsCommonMetricName(c.key.name), Points: [][2]any{ - {c.timestamp.Load(), c.value.Load()}, + {c.submitTime.Load(), c.value.Load()}, }, } + + return data +} + +type count struct { + metric +} + +func (c *count) Submit(value float64) { + c.submit() + c.value.Add(value) +} + +type gauge struct { + metric +} + +func (c *gauge) Submit(value float64) { + c.submit() + c.value.Store(value) +} + +type rate struct { + metric + intervalStart time.Time +} + +func (c *rate) Submit(value float64) { + c.submit() + c.value.Add(value) +} + +func (c *rate) payload() transport.MetricData { + payload := c.metric.payload() + if payload.Metric == "" { + return payload + } + + payload.Interval = int64(time.Since(c.intervalStart).Seconds()) + payload.Points[0][1] = c.value.Load() / float64(payload.Interval) + return payload } diff --git a/internal/newtelemetry/product.go b/internal/newtelemetry/product.go index 9aa215f4d5..84c53e8242 100644 --- a/internal/newtelemetry/product.go +++ b/internal/newtelemetry/product.go @@ -9,19 +9,18 @@ import ( "sync" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/types" ) type products struct { mu sync.Mutex - products map[types.Namespace]transport.Product + products map[Namespace]transport.Product } -func (p *products) Add(namespace types.Namespace, enabled bool, err error) { +func (p *products) Add(namespace Namespace, enabled bool, err error) { p.mu.Lock() defer p.mu.Unlock() if p.products == nil { - p.products = make(map[types.Namespace]transport.Product) + p.products = make(map[Namespace]transport.Product) } product := transport.Product{ diff --git a/internal/newtelemetry/types/origin.go b/internal/newtelemetry/types/origin.go deleted file mode 100644 index 9d45a10ab9..0000000000 --- a/internal/newtelemetry/types/origin.go +++ /dev/null @@ -1,46 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2024 Datadog, Inc. - -package types - -import ( - "bytes" - "fmt" -) - -type Origin int - -const ( - OriginDefault Origin = iota - OriginCode - OriginDDConfig - OriginEnvVar - OriginRemoteConfig -) - -func (o Origin) String() string { - switch o { - case OriginDefault: - return "default" - case OriginCode: - return "code" - case OriginDDConfig: - return "dd_config" - case OriginEnvVar: - return "env_var" - case OriginRemoteConfig: - return "remote_config" - default: - return fmt.Sprintf("unknown origin %d", o) - } -} - -func (o Origin) MarshalJSON() ([]byte, error) { - var b bytes.Buffer - b.WriteString(`"`) - b.WriteString(o.String()) - b.WriteString(`"`) - return b.Bytes(), nil -} From 651e8f6822d569137fc1c3ccbd31bdd6a7339bea Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Wed, 29 Jan 2025 11:27:57 +0100 Subject: [PATCH 28/61] support for distributions metrics Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 2 - internal/newtelemetry/client.go | 25 +++- internal/newtelemetry/distributions.go | 113 ++++++++++++++++++ internal/newtelemetry/internal/recorder.go | 1 + .../internal/transport/distributions.go | 8 +- internal/newtelemetry/metrics.go | 18 +-- 6 files changed, 147 insertions(+), 20 deletions(-) create mode 100644 internal/newtelemetry/distributions.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index fdc3c57e6e..5d7616c277 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -44,8 +44,6 @@ const ( // ``` type MetricHandle interface { Submit(value float64) - - payload() transport.MetricData } // Integration is an integration that is configured to be traced. diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index a0d99b242d..c8d04f9805 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -55,6 +55,7 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client flushMapper: mapper.NewDefaultMapper(config.HeartbeatInterval, config.ExtendedHeartbeatInterval), // This means that, by default, we incur dataloss if we spend ~30mins without flushing, considering we send telemetry data this looks reasonable. // This also means that in the worst case scenario, memory-wise, the app is stabilized after running for 30mins. + // TODO: tweak this value once we get real telemetry data from the telemetry client payloadQueue: internal.NewRingQueue[transport.Payload](4, 32), dependencies: dependencies{ @@ -77,7 +78,7 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client } if config.MetricsEnabled { - client.dataSources = append(client.dataSources, &client.metrics) + client.dataSources = append(client.dataSources, &client.metrics, &client.distributions) } client.flushTicker = internal.NewTicker(func() { @@ -99,6 +100,7 @@ type client struct { dependencies dependencies logger logger metrics metrics + distributions distributions dataSources []interface { Payload() transport.Payload } @@ -125,21 +127,36 @@ func (c *client) MarkIntegrationAsLoaded(integration Integration) { c.integrations.Add(integration) } +type noopMetricHandle struct{} + +func (noopMetricHandle) Submit(_ float64) {} + func (c *client) Count(namespace Namespace, name string, tags map[string]string) MetricHandle { + if !c.clientConfig.MetricsEnabled { + return noopMetricHandle{} + } return c.metrics.LoadOrStore(namespace, transport.CountMetric, name, tags) } func (c *client) Rate(namespace Namespace, name string, tags map[string]string) MetricHandle { + if !c.clientConfig.MetricsEnabled { + return noopMetricHandle{} + } return c.metrics.LoadOrStore(namespace, transport.RateMetric, name, tags) } func (c *client) Gauge(namespace Namespace, name string, tags map[string]string) MetricHandle { + if !c.clientConfig.MetricsEnabled { + return noopMetricHandle{} + } return c.metrics.LoadOrStore(namespace, transport.GaugeMetric, name, tags) } -func (c *client) Distribution(_ Namespace, _ string, _ map[string]string) MetricHandle { - //TODO implement me - panic("implement me") +func (c *client) Distribution(namespace Namespace, name string, tags map[string]string) MetricHandle { + if !c.clientConfig.MetricsEnabled { + return noopMetricHandle{} + } + return c.distributions.LoadOrStore(namespace, name, tags) } func (c *client) ProductStarted(product Namespace) { diff --git a/internal/newtelemetry/distributions.go b/internal/newtelemetry/distributions.go new file mode 100644 index 0000000000..59779a28e4 --- /dev/null +++ b/internal/newtelemetry/distributions.go @@ -0,0 +1,113 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "strings" + "sync" + "sync/atomic" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/knownmetrics" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +type distributionKey struct { + namespace Namespace + name string + tags string +} + +type distributions struct { + store internal.TypedSyncMap[distributionKey, *distribution] + skipAllowlist bool // Debugging feature to skip the allowlist of known metrics +} + +// LoadOrStore returns a MetricHandle for the given distribution metric. If the metric key does not exist, it will be created. +func (d *distributions) LoadOrStore(namespace Namespace, name string, tags map[string]string) MetricHandle { + if !knownmetrics.IsKnownMetricName(name) { + log.Debug("telemetry: metric name %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ + "The metric will still be sent.", name) + } + + compiledTags := "" + for k, v := range tags { + compiledTags += k + ":" + v + "," + } + + key := distributionKey{namespace: namespace, name: name, tags: strings.TrimSuffix(compiledTags, ",")} + + // Max size is a 2^14 array of float64 (2^3 bytes) which makes a distribution 128KB bytes array _at worse_. + // Considering we add a point per user request on a simple http server, we would be losing data after 2^14 requests per minute or about 280 requests per second or under 3ms per request. + // If this throughput is constant, the telemetry client flush ticker speed will increase to, at best, double twice to flush 15 seconds of data each time. + // Which will bring our max throughput to 1100 points per second or about 750µs per request. + // TODO: tweak this value once we get telemetry data from the telemetry client + handle, _ := d.store.LoadOrStore(key, &distribution{values: internal.NewRingQueue[float64](1<<8, 1<<14)}) + + return handle +} + +func (d *distributions) Payload() transport.Payload { + series := make([]transport.DistributionSeries, 0, d.store.Len()) + d.store.Range(func(_ distributionKey, handle *distribution) bool { + if payload := handle.payload(); payload.Metric != "" { + series = append(series, payload) + } + return true + }) + + if len(series) == 0 { + return nil + } + + return transport.Distributions{Series: series, SkipAllowlist: d.skipAllowlist} +} + +type distribution struct { + key distributionKey + + newSubmit atomic.Bool + values *internal.RingQueue[float64] +} + +var logLossOnce sync.Once + +func (d *distribution) Submit(value float64) { + d.newSubmit.Store(true) + if !d.values.Enqueue(value) { + logLossOnce.Do(func() { + log.Debug("telemetry: distribution %q is losing values because the buffer is full", d.key.name) + }) + } +} + +func (d *distribution) payload() transport.DistributionSeries { + if submit := d.newSubmit.Swap(false); !submit { + return transport.DistributionSeries{} + } + + var tags []string + if d.key.tags != "" { + tags = strings.Split(d.key.tags, ",") + } + + points := d.values.GetBuffer() + defer d.values.ReleaseBuffer(points) + + copyPoints := make([]float64, len(points)) + copy(copyPoints, points) + + data := transport.DistributionSeries{ + Metric: d.key.name, + Namespace: d.key.namespace, + Tags: tags, + Common: knownmetrics.IsCommonMetricName(d.key.name), + Points: copyPoints, + } + + return data +} diff --git a/internal/newtelemetry/internal/recorder.go b/internal/newtelemetry/internal/recorder.go index 76923377e1..edfb843524 100644 --- a/internal/newtelemetry/internal/recorder.go +++ b/internal/newtelemetry/internal/recorder.go @@ -18,6 +18,7 @@ type Recorder[T any] struct { // NewRecorder creates a new [Recorder] instance. with 512 as the maximum number of recorded functions. func NewRecorder[T any]() Recorder[T] { return Recorder[T]{ + // TODO: tweak this value once we get telemetry data from the telemetry client queue: NewRingQueue[func(T)](16, 512), } } diff --git a/internal/newtelemetry/internal/transport/distributions.go b/internal/newtelemetry/internal/transport/distributions.go index fcd11a86db..1dda074488 100644 --- a/internal/newtelemetry/internal/transport/distributions.go +++ b/internal/newtelemetry/internal/transport/distributions.go @@ -5,10 +5,6 @@ package transport -import ( - "github.com/elastic/go-elasticsearch/v8/typedapi/types" -) - // All objects in this file are used to define the payload of the requests sent // to the telemetry API. // https://github.com/DataDog/instrumentation-telemetry-api-docs/tree/dad49961203d74ec8236b68ce4b54bbb7ed8716f/GeneratedDocumentation/ApiDocs/v2/SchemaDocumentation/Schemas @@ -35,6 +31,6 @@ type DistributionSeries struct { // NOTE: If this field isn't present in the request, the API assumes // the metric is common. So we can't "omitempty" even though the // field is technically optional. - Common bool `json:"common,omitempty"` - Namespace types.Namespace `json:"namespace,omitempty"` + Common bool `json:"common,omitempty"` + Namespace Namespace `json:"namespace,omitempty"` } diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go index d5079468df..fe2142c0d5 100644 --- a/internal/newtelemetry/metrics.go +++ b/internal/newtelemetry/metrics.go @@ -24,8 +24,14 @@ type metricKey struct { tags string } +// metricsHandle is the internal equivalent of MetricHandle for Count/Rate/Gauge metrics that are sent via the payload [transport.GenerateMetrics]. +type metricHandle interface { + MetricHandle + payload() transport.MetricData +} + type metrics struct { - store internal.TypedSyncMap[metricKey, MetricHandle] + store internal.TypedSyncMap[metricKey, metricHandle] skipAllowlist bool // Debugging feature to skip the allowlist of known metrics } @@ -40,22 +46,18 @@ func (m *metrics) LoadOrStore(namespace Namespace, kind transport.MetricType, na for k, v := range tags { compiledTags += k + ":" + v + "," } + var ( key = metricKey{namespace: namespace, kind: kind, name: name, tags: strings.TrimSuffix(compiledTags, ",")} handle MetricHandle - loaded bool ) - switch kind { case transport.CountMetric: handle, _ = m.store.LoadOrStore(key, &count{metric: metric{key: key}}) case transport.GaugeMetric: handle, _ = m.store.LoadOrStore(key, &gauge{metric: metric{key: key}}) case transport.RateMetric: - handle, loaded = m.store.LoadOrStore(key, &rate{metric: metric{key: key}}) - if !loaded { - handle.(*rate).intervalStart = time.Now() - } + handle, _ = m.store.LoadOrStore(key, &rate{metric: metric{key: key}, intervalStart: time.Now()}) } return handle @@ -63,7 +65,7 @@ func (m *metrics) LoadOrStore(namespace Namespace, kind transport.MetricType, na func (m *metrics) Payload() transport.Payload { series := make([]transport.MetricData, 0, m.store.Len()) - m.store.Range(func(_ metricKey, handle MetricHandle) bool { + m.store.Range(func(_ metricKey, handle metricHandle) bool { if payload := handle.payload(); payload.Metric != "" { series = append(series, payload) } From 44eb96000f24d2ca4a3e1b139c3e7b3cee1ad8c4 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Wed, 29 Jan 2025 16:20:58 +0100 Subject: [PATCH 29/61] add tests and fix issues with distributions Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 3 + internal/newtelemetry/client_test.go | 535 +++++++++++++++++-- internal/newtelemetry/distributions.go | 10 +- internal/newtelemetry/internal/ringbuffer.go | 20 + 4 files changed, 518 insertions(+), 50 deletions(-) diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index c8d04f9805..56b35cb610 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -64,6 +64,9 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client metrics: metrics{ skipAllowlist: config.Debug, }, + distributions: distributions{ + skipAllowlist: config.Debug, + }, } client.dataSources = append(client.dataSources, diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 88b646ed77..14e4587bf2 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -476,8 +476,8 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) - assert.Equal(t, logs.Logs[0].Level, transport.LogLevelDebug) - assert.Equal(t, logs.Logs[0].Message, "test") + assert.Equal(t, transport.LogLevelDebug, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) }, }, { @@ -490,8 +490,8 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) - assert.Equal(t, logs.Logs[0].Level, transport.LogLevelWarn) - assert.Equal(t, logs.Logs[0].Message, "test") + assert.Equal(t, transport.LogLevelWarn, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) }, }, { @@ -504,8 +504,8 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) - assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) - assert.Equal(t, logs.Logs[0].Message, "test") + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) }, }, { @@ -520,9 +520,9 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) - assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) - assert.Equal(t, logs.Logs[0].Message, "test") - assert.Equal(t, logs.Logs[0].Count, uint32(3)) + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + assert.Equal(t, uint32(3), logs.Logs[0].Count) }, }, { @@ -535,9 +535,9 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) - assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) - assert.Equal(t, logs.Logs[0].Message, "test") - assert.Equal(t, logs.Logs[0].Tags, "key:value") + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + assert.Equal(t, "key:value", logs.Logs[0].Tags) }, }, { @@ -550,8 +550,8 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) - assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) - assert.Equal(t, logs.Logs[0].Message, "test") + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) tags := strings.Split(logs.Logs[0].Tags, ",") assert.Contains(t, tags, "key:value") assert.Contains(t, tags, "key2:value2") @@ -573,14 +573,14 @@ func TestClientFlush(t *testing.T) { return strings.Compare(i.Tags, j.Tags) }) - assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) - assert.Equal(t, logs.Logs[0].Message, "test") - assert.Equal(t, logs.Logs[0].Count, uint32(1)) + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + assert.Equal(t, uint32(1), logs.Logs[0].Count) assert.Empty(t, logs.Logs[0].Tags) - assert.Equal(t, logs.Logs[1].Level, transport.LogLevelError) - assert.Equal(t, logs.Logs[1].Message, "test") - assert.Equal(t, logs.Logs[1].Count, uint32(1)) + assert.Equal(t, transport.LogLevelError, logs.Logs[1].Level) + assert.Equal(t, "test", logs.Logs[1].Message) + assert.Equal(t, uint32(1), logs.Logs[1].Count) tags := strings.Split(logs.Logs[1].Tags, ",") assert.Contains(t, tags, "key:value") assert.Contains(t, tags, "key2:value2") @@ -596,8 +596,8 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) - assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) - assert.Equal(t, logs.Logs[0].Message, "test") + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) assert.Contains(t, logs.Logs[0].StackTrace, "internal/newtelemetry/client_test.go") }, }, @@ -611,8 +611,8 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.Logs{}, payload) logs := payload.(transport.Logs) require.Len(t, logs.Logs, 1) - assert.Equal(t, logs.Logs[0].Level, transport.LogLevelError) - assert.Equal(t, logs.Logs[0].Message, "test") + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) assert.Contains(t, logs.Logs[0].StackTrace, "internal/newtelemetry/client_test.go") tags := strings.Split(logs.Logs[0].Tags, ",") assert.Contains(t, tags, "key:value") @@ -637,15 +637,15 @@ func TestClientFlush(t *testing.T) { return strings.Compare(string(i.Level), string(j.Level)) }) - assert.Equal(t, logs.Logs[0].Level, transport.LogLevelDebug) - assert.Equal(t, logs.Logs[0].Message, "test") - assert.Equal(t, logs.Logs[0].Count, uint32(1)) - assert.Equal(t, logs.Logs[1].Level, transport.LogLevelError) - assert.Equal(t, logs.Logs[1].Message, "test") - assert.Equal(t, logs.Logs[1].Count, uint32(1)) - assert.Equal(t, logs.Logs[2].Level, transport.LogLevelWarn) - assert.Equal(t, logs.Logs[2].Message, "test") - assert.Equal(t, logs.Logs[2].Count, uint32(1)) + assert.Equal(t, transport.LogLevelDebug, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) + assert.Equal(t, uint32(1), logs.Logs[0].Count) + assert.Equal(t, transport.LogLevelError, logs.Logs[1].Level) + assert.Equal(t, "test", logs.Logs[1].Message) + assert.Equal(t, uint32(1), logs.Logs[1].Count) + assert.Equal(t, transport.LogLevelWarn, logs.Logs[2].Level) + assert.Equal(t, "test", logs.Logs[2].Message) + assert.Equal(t, uint32(1), logs.Logs[2].Count) }, }, { @@ -658,12 +658,89 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.GenerateMetrics{}, payload) metrics := payload.(transport.GenerateMetrics) require.Len(t, metrics.Series, 1) - assert.Equal(t, metrics.Series[0].Type, transport.CountMetric) - assert.Equal(t, metrics.Series[0].Namespace, NamespaceTracers) - assert.Equal(t, metrics.Series[0].Metric, "init_time") + assert.Equal(t, transport.CountMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time", metrics.Series[0].Metric) assert.Empty(t, metrics.Series[0].Tags) assert.NotZero(t, metrics.Series[0].Points[0][0]) - assert.Equal(t, metrics.Series[0].Points[0][1], 1.0) + assert.Equal(t, 1.0, metrics.Series[0].Points[0][1]) + }, + }, + { + name: "count-multiple-call-same-handle", + when: func(c *client) { + handle1 := c.Count(NamespaceTracers, "init_time", nil) + handle2 := c.Count(NamespaceTracers, "init_time", nil) + + handle2.Submit(1) + handle1.Submit(1) + handle1.Submit(3) + handle2.Submit(2) + handle2.Submit(10) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 1) + assert.Equal(t, transport.CountMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time", metrics.Series[0].Metric) + assert.Empty(t, metrics.Series[0].Tags) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + assert.Equal(t, 17.0, metrics.Series[0].Points[0][1]) + }, + }, + { + name: "multiple-count-by-name", + when: func(c *client) { + c.Count(NamespaceTracers, "init_time_1", nil).Submit(1) + c.Count(NamespaceTracers, "init_time_2", nil).Submit(2) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 2) + + assert.Equal(t, transport.CountMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time_"+strconv.Itoa(int(metrics.Series[0].Points[0][1].(float64))), metrics.Series[0].Metric) + + assert.Empty(t, metrics.Series[0].Tags) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + + assert.Equal(t, transport.CountMetric, metrics.Series[1].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[1].Namespace) + assert.Equal(t, "init_time_"+strconv.Itoa(int(metrics.Series[1].Points[0][1].(float64))), metrics.Series[1].Metric) + assert.Empty(t, metrics.Series[1].Tags) + assert.NotZero(t, metrics.Series[1].Points[0][0]) + }, + }, + { + name: "multiple-count-by-tags", + when: func(c *client) { + c.Count(NamespaceTracers, "init_time", map[string]string{"test": "1"}).Submit(1) + c.Count(NamespaceTracers, "init_time", map[string]string{"test": "2"}).Submit(2) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 2) + + assert.Equal(t, transport.CountMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time", metrics.Series[0].Metric) + + assert.Contains(t, metrics.Series[0].Tags, "test:"+strconv.Itoa(int(metrics.Series[0].Points[0][1].(float64)))) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + + assert.Equal(t, transport.CountMetric, metrics.Series[1].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[1].Namespace) + assert.Equal(t, "init_time", metrics.Series[1].Metric) + assert.Contains(t, metrics.Series[1].Tags, "test:"+strconv.Itoa(int(metrics.Series[1].Points[0][1].(float64)))) + assert.NotZero(t, metrics.Series[1].Points[0][0]) }, }, { @@ -676,12 +753,157 @@ func TestClientFlush(t *testing.T) { require.IsType(t, transport.GenerateMetrics{}, payload) metrics := payload.(transport.GenerateMetrics) require.Len(t, metrics.Series, 1) - assert.Equal(t, metrics.Series[0].Type, transport.GaugeMetric) - assert.Equal(t, metrics.Series[0].Namespace, NamespaceTracers) - assert.Equal(t, metrics.Series[0].Metric, "init_time") + assert.Equal(t, transport.GaugeMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time", metrics.Series[0].Metric) + assert.Empty(t, metrics.Series[0].Tags) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + assert.Equal(t, 1.0, metrics.Series[0].Points[0][1]) + }, + }, + { + name: "multiple-gauge-by-name", + when: func(c *client) { + c.Gauge(NamespaceTracers, "init_time_1", nil).Submit(1) + c.Gauge(NamespaceTracers, "init_time_2", nil).Submit(2) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 2) + + assert.Equal(t, transport.GaugeMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time_"+strconv.Itoa(int(metrics.Series[0].Points[0][1].(float64))), metrics.Series[0].Metric) + + assert.Empty(t, metrics.Series[0].Tags) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + + assert.Equal(t, transport.GaugeMetric, metrics.Series[1].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[1].Namespace) + assert.Equal(t, "init_time_"+strconv.Itoa(int(metrics.Series[1].Points[0][1].(float64))), metrics.Series[1].Metric) + assert.Empty(t, metrics.Series[1].Tags) + assert.NotZero(t, metrics.Series[1].Points[0][0]) + }, + }, + { + name: "multiple-gauge-by-tags", + when: func(c *client) { + c.Gauge(NamespaceTracers, "init_time", map[string]string{"test": "1"}).Submit(1) + c.Gauge(NamespaceTracers, "init_time", map[string]string{"test": "2"}).Submit(2) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 2) + + assert.Equal(t, transport.GaugeMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time", metrics.Series[0].Metric) + + assert.Contains(t, metrics.Series[0].Tags, "test:"+strconv.Itoa(int(metrics.Series[0].Points[0][1].(float64)))) + assert.NotZero(t, metrics.Series[0].Points[0][0]) + + assert.Equal(t, transport.GaugeMetric, metrics.Series[1].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[1].Namespace) + assert.Equal(t, "init_time", metrics.Series[1].Metric) + assert.Contains(t, metrics.Series[1].Tags, "test:"+strconv.Itoa(int(metrics.Series[1].Points[0][1].(float64)))) + assert.NotZero(t, metrics.Series[1].Points[0][0]) + }, + }, + { + name: "simple-rate", + when: func(c *client) { + handle := c.Rate(NamespaceTracers, "init_time", nil) + handle.Submit(1) + + rate := handle.(*rate) + rate.intervalStart = rate.intervalStart.Add(-time.Second) // So the rate is not +Infinity because the interval is zero + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.GenerateMetrics{}, payload) + metrics := payload.(transport.GenerateMetrics) + require.Len(t, metrics.Series, 1) + assert.Equal(t, transport.RateMetric, metrics.Series[0].Type) + assert.Equal(t, NamespaceTracers, metrics.Series[0].Namespace) + assert.Equal(t, "init_time", metrics.Series[0].Metric) assert.Empty(t, metrics.Series[0].Tags) + assert.NotZero(t, metrics.Series[0].Interval) assert.NotZero(t, metrics.Series[0].Points[0][0]) - assert.Equal(t, metrics.Series[0].Points[0][1], 1.0) + assert.LessOrEqual(t, metrics.Series[0].Points[0][1], 1.1) + }, + }, + { + name: "simple-distribution", + when: func(c *client) { + c.Distribution(NamespaceGeneral, "init_time", nil).Submit(1) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, payload, transport.Distributions{}) + distributions := payload.(transport.Distributions) + require.Len(t, distributions.Series, 1) + assert.Equal(t, NamespaceGeneral, distributions.Series[0].Namespace) + assert.Equal(t, "init_time", distributions.Series[0].Metric) + assert.Empty(t, distributions.Series[0].Tags) + require.Len(t, distributions.Series[0].Points, 1) + assert.Equal(t, 1.0, distributions.Series[0].Points[0]) + }, + }, + { + name: "multiple-distribution-by-name", + when: func(c *client) { + c.Distribution(NamespaceTracers, "init_time_1", nil).Submit(1) + c.Distribution(NamespaceTracers, "init_time_2", nil).Submit(2) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, payload, transport.Distributions{}) + distributions := payload.(transport.Distributions) + require.Len(t, distributions.Series, 2) + + assert.Equal(t, "init_time_"+strconv.Itoa(int(distributions.Series[0].Points[0])), distributions.Series[0].Metric) + assert.Equal(t, "init_time_"+strconv.Itoa(int(distributions.Series[1].Points[0])), distributions.Series[1].Metric) + }, + }, + { + name: "multiple-distribution-by-tags", + when: func(c *client) { + c.Distribution(NamespaceTracers, "init_time", map[string]string{"test": "1"}).Submit(1) + c.Distribution(NamespaceTracers, "init_time", map[string]string{"test": "2"}).Submit(2) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, payload, transport.Distributions{}) + distributions := payload.(transport.Distributions) + require.Len(t, distributions.Series, 2) + + assert.Contains(t, distributions.Series[0].Tags, "test:"+strconv.Itoa(int(distributions.Series[0].Points[0]))) + assert.Contains(t, distributions.Series[1].Tags, "test:"+strconv.Itoa(int(distributions.Series[1].Points[0]))) + }, + }, + { + name: "full-distribution", + when: func(c *client) { + handler := c.Distribution(NamespaceGeneral, "init_time", nil) + for i := 0; i < 1<<16; i++ { + handler.Submit(float64(i)) + } + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, payload, transport.Distributions{}) + distributions := payload.(transport.Distributions) + require.Len(t, distributions.Series, 1) + assert.Equal(t, NamespaceGeneral, distributions.Series[0].Namespace) + assert.Equal(t, "init_time", distributions.Series[0].Metric) + assert.Empty(t, distributions.Series[0].Tags) + + // Should not contain the first passed point + assert.NotContains(t, distributions.Series[0].Points, 0.0) }, }, } { @@ -709,6 +931,20 @@ func TestClientFlush(t *testing.T) { } } +func TestMetricsDisabled(t *testing.T) { + t.Setenv("DD_TELEMETRY_METRICS_ENABLED", "false") + + client, err := NewClient("test-service", "test-env", "1.0.0", ClientConfig{AgentURL: "http://localhost:8126"}) + require.NoError(t, err) + + defer client.Close() + + assert.NotNil(t, client.Gauge(NamespaceTracers, "init_time", nil)) + assert.NotNil(t, client.Count(NamespaceTracers, "init_time", nil)) + assert.NotNil(t, client.Rate(NamespaceTracers, "init_time", nil)) + assert.NotNil(t, client.Distribution(NamespaceGeneral, "init_time", nil)) +} + type testRoundTripper struct { roundTrip func(*http.Request) (*http.Response, error) } @@ -865,6 +1101,7 @@ func BenchmarkLogs(b *testing.B) { } b.Run("simple", func(b *testing.B) { + b.ReportAllocs() c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) require.NoError(b, err) @@ -877,6 +1114,7 @@ func BenchmarkLogs(b *testing.B) { }) b.Run("with-tags", func(b *testing.B) { + b.ReportAllocs() c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) require.NoError(b, err) @@ -889,6 +1127,7 @@ func BenchmarkLogs(b *testing.B) { }) b.Run("with-stacktrace", func(b *testing.B) { + b.ReportAllocs() c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) require.NoError(b, err) @@ -902,6 +1141,7 @@ func BenchmarkLogs(b *testing.B) { } func BenchmarkWorstCaseScenarioFloodLogging(b *testing.B) { + b.ReportAllocs() nbSameLogs := 10 nbDifferentLogs := 100 nbGoroutines := 25 @@ -953,5 +1193,216 @@ func BenchmarkWorstCaseScenarioFloodLogging(b *testing.B) { wg.Wait() } - b.Log("Called (*client).Log ", nbGoroutines*nbDifferentLogs*nbSameLogs, " times") + b.ReportMetric(float64(b.Elapsed().Nanoseconds()/int64(nbGoroutines*nbDifferentLogs*nbSameLogs*b.N)), "ns/log") +} + +func BenchmarkMetrics(b *testing.B) { + b.ReportAllocs() + clientConfig := ClientConfig{ + HeartbeatInterval: time.Hour, + ExtendedHeartbeatInterval: time.Hour, + AgentURL: "http://localhost:8126", + } + + b.Run("count+get-handle", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Count(NamespaceTracers, "init_time", nil).Submit(1) + } + }) + + b.Run("count+handle-reused", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + handle := c.Count(NamespaceTracers, "init_time", nil) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + handle.Submit(1) + } + }) + + b.Run("gauge+get-handle", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Gauge(NamespaceTracers, "init_time", nil).Submit(1) + } + }) + + b.Run("gauge+handle-reused", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + handle := c.Gauge(NamespaceTracers, "init_time", nil) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + handle.Submit(1) + } + }) + + b.Run("rate+get-handle", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Rate(NamespaceTracers, "init_time", nil).Submit(1) + } + }) + + b.Run("rate+handle-reused", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + handle := c.Rate(NamespaceTracers, "init_time", nil) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + handle.Submit(1) + } + }) + + b.Run("distribution+get-handle", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Distribution(NamespaceTracers, "init_time", nil).Submit(1) + } + }) + + b.Run("distribution+handle-reused", func(b *testing.B) { + b.ReportAllocs() + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + handle := c.Distribution(NamespaceTracers, "init_time", nil) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + handle.Submit(1) + } + }) +} + +func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { + b.ReportAllocs() + nbSameMetric := 10 + nbDifferentMetrics := 100 + nbGoroutines := 25 + + clientConfig := ClientConfig{ + HeartbeatInterval: time.Hour, + ExtendedHeartbeatInterval: time.Hour, + FlushIntervalRange: struct { + Min time.Duration + Max time.Duration + }{Min: time.Second, Max: time.Second}, + AgentURL: "http://localhost:8126", + + // Empty transport to avoid sending data to the agent + HTTPClient: &http.Client{ + Timeout: 5 * time.Second, + Transport: &testRoundTripper{ + roundTrip: func(_ *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + }, nil + }, + }, + }, + } + + b.Run("get-handle", func(b *testing.B) { + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + + for x := 0; x < b.N; x++ { + var wg sync.WaitGroup + + for i := 0; i < nbGoroutines; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < nbDifferentMetrics; j++ { + for k := 0; k < nbSameMetric; k++ { + c.Count(NamespaceTracers, "init_time", map[string]string{"test": "1"}).Submit(1) + } + } + }() + } + + wg.Wait() + } + + b.ReportMetric(float64(b.Elapsed().Nanoseconds()/int64(nbGoroutines*nbDifferentMetrics*nbSameMetric*b.N)), "ns/point") + }) + + b.Run("handle-reused", func(b *testing.B) { + c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) + require.NoError(b, err) + + defer c.Close() + + b.ResetTimer() + + for x := 0; x < b.N; x++ { + var wg sync.WaitGroup + + handle := c.Count(NamespaceTracers, "init_time", map[string]string{"test": "1"}) + for i := 0; i < nbGoroutines; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < nbDifferentMetrics; j++ { + + for k := 0; k < nbSameMetric; k++ { + handle.Submit(1) + } + } + }() + } + + wg.Wait() + } + + b.ReportMetric(float64(b.Elapsed().Nanoseconds()/int64(nbGoroutines*nbDifferentMetrics*nbSameMetric*b.N)), "ns/point") + }) + } diff --git a/internal/newtelemetry/distributions.go b/internal/newtelemetry/distributions.go index 59779a28e4..1ac85a52ee 100644 --- a/internal/newtelemetry/distributions.go +++ b/internal/newtelemetry/distributions.go @@ -46,7 +46,7 @@ func (d *distributions) LoadOrStore(namespace Namespace, name string, tags map[s // If this throughput is constant, the telemetry client flush ticker speed will increase to, at best, double twice to flush 15 seconds of data each time. // Which will bring our max throughput to 1100 points per second or about 750µs per request. // TODO: tweak this value once we get telemetry data from the telemetry client - handle, _ := d.store.LoadOrStore(key, &distribution{values: internal.NewRingQueue[float64](1<<8, 1<<14)}) + handle, _ := d.store.LoadOrStore(key, &distribution{key: key, values: internal.NewRingQueue[float64](1<<8, 1<<14)}) return handle } @@ -95,18 +95,12 @@ func (d *distribution) payload() transport.DistributionSeries { tags = strings.Split(d.key.tags, ",") } - points := d.values.GetBuffer() - defer d.values.ReleaseBuffer(points) - - copyPoints := make([]float64, len(points)) - copy(copyPoints, points) - data := transport.DistributionSeries{ Metric: d.key.name, Namespace: d.key.namespace, Tags: tags, Common: knownmetrics.IsCommonMetricName(d.key.name), - Points: copyPoints, + Points: d.values.Flush(), } return data diff --git a/internal/newtelemetry/internal/ringbuffer.go b/internal/newtelemetry/internal/ringbuffer.go index 8c0d3800a8..3654c14fde 100644 --- a/internal/newtelemetry/internal/ringbuffer.go +++ b/internal/newtelemetry/internal/ringbuffer.go @@ -81,7 +81,10 @@ func (rb *RingQueue[T]) Dequeue() T { func (rb *RingQueue[T]) GetBuffer() []T { rb.mu.Lock() defer rb.mu.Unlock() + return rb.getBufferLocked() +} +func (rb *RingQueue[T]) getBufferLocked() []T { prevBuf := rb.buffer rb.buffer = rb.pool.Get().([]T) rb.head = 0 @@ -89,6 +92,23 @@ func (rb *RingQueue[T]) GetBuffer() []T { return prevBuf } +// Flush returns a copy of the buffer and resets it. +func (rb *RingQueue[T]) Flush() []T { + rb.mu.Lock() + head, tail := rb.head, rb.tail + buf := rb.getBufferLocked() + rb.mu.Unlock() + + defer rb.ReleaseBuffer(buf) + + copyBuf := make([]T, 0, len(buf)) + for i := head; i != tail; i = (i + 1) % len(buf) { + copyBuf = append(copyBuf, buf[i]) + } + + return copyBuf +} + // ReleaseBuffer returns the buffer to the pool. func (rb *RingQueue[T]) ReleaseBuffer(buf []T) { rb.pool.Put(buf[:cap(buf)]) // Make sure nobody reduced the length of the buffer From 05b010c83575052c05607c95411b382dde7846da Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Wed, 29 Jan 2025 17:41:53 +0100 Subject: [PATCH 30/61] reworked knownmetrics to include namespace and metric kind and added internal metrics support Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 14 +- internal/newtelemetry/client.go | 53 +- internal/newtelemetry/client_config.go | 84 +- internal/newtelemetry/client_test.go | 41 +- internal/newtelemetry/distributions.go | 13 +- internal/newtelemetry/globalclient.go | 26 +- .../internal/knownmetrics/common_metrics.json | 1050 ++++++++++++++--- .../knownmetrics/generator/generator.go | 40 +- .../internal/knownmetrics/golang_metrics.json | 6 +- .../internal/knownmetrics/known_metrics.go | 32 +- internal/newtelemetry/internal/recorder.go | 16 +- .../internal/transport/namespace.go | 15 +- internal/newtelemetry/internal/writer.go | 4 +- internal/newtelemetry/metrics.go | 4 +- 14 files changed, 1118 insertions(+), 280 deletions(-) diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index 5d7616c277..ca7159befc 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -16,12 +16,14 @@ import ( type Namespace = transport.Namespace const ( - NamespaceGeneral = transport.NamespaceGeneral - NamespaceTracers = transport.NamespaceTracers - NamespaceProfilers = transport.NamespaceProfilers - NamespaceAppSec = transport.NamespaceAppSec - NamespaceIAST = transport.NamespaceIAST - NamespaceTelemetry = transport.NamespaceTelemetry + NamespaceGeneral = transport.NamespaceGeneral + NamespaceTracers = transport.NamespaceTracers + NamespaceProfilers = transport.NamespaceProfilers + NamespaceAppSec = transport.NamespaceAppSec + NamespaceIAST = transport.NamespaceIAST + NamespaceCIVisibility = transport.NamespaceCIVisibility + NamespaceMLOps = transport.NamespaceMLOps + NamespaceRUM = transport.NamespaceRUM ) // Origin describes the source of a configuration change diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 56b35cb610..8c1b879982 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -7,6 +7,8 @@ package newtelemetry import ( "errors" + "os" + "strconv" "sync" "gopkg.in/DataDog/dd-trace-go.v1/internal/log" @@ -53,10 +55,7 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client writer: writer, clientConfig: config, flushMapper: mapper.NewDefaultMapper(config.HeartbeatInterval, config.ExtendedHeartbeatInterval), - // This means that, by default, we incur dataloss if we spend ~30mins without flushing, considering we send telemetry data this looks reasonable. - // This also means that in the worst case scenario, memory-wise, the app is stabilized after running for 30mins. - // TODO: tweak this value once we get real telemetry data from the telemetry client - payloadQueue: internal.NewRingQueue[transport.Payload](4, 32), + payloadQueue: internal.NewRingQueue[transport.Payload](config.PayloadQueueSize.Min, config.PayloadQueueSize.Max), dependencies: dependencies{ DependencyLoader: config.DependencyLoader, @@ -86,7 +85,7 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client client.flushTicker = internal.NewTicker(func() { client.Flush() - }, config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) + }, config.FlushInterval.Min, config.FlushInterval.Max) return client, nil } @@ -230,6 +229,7 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { } results, err := c.writer.Flush(payload) + c.computeFlushMetrics(results, err) if err != nil { // We stop flushing when we encounter a fatal error, put the payloads in the queue and return the error log.Error("error while flushing telemetry data: %v", err) @@ -264,6 +264,49 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { return nbBytes, nil } +// computeFlushMetrics computes and submits the metrics for the flush operation using the output from the writer.Flush method. +// It will submit the number of requests, responses, errors, the number of bytes sent and the duration of the call that was successful. +func (c *client) computeFlushMetrics(results []internal.EndpointRequestResult, err error) { + if !c.clientConfig.InternalMetricsEnabled { + return + } + + indexToEndpoint := func(i int) string { + if i == 0 { + return "agent" + } + return "agentless" + } + + for i, result := range results { + c.Count(transport.NamespaceTelemetry, "telemetry_api.requests", map[string]string{"endpoint": indexToEndpoint(i)}).Submit(1) + if result.StatusCode != 0 { + c.Count(transport.NamespaceTelemetry, "telemetry_api.responses", map[string]string{"endpoint": indexToEndpoint(i), "status_code": strconv.Itoa(result.StatusCode)}).Submit(1) + } + + if result.Error != nil { + typ := "network" + if os.IsTimeout(result.Error) { + typ = "timeout" + } + var writerStatusCodeError *internal.WriterStatusCodeError + if errors.As(result.Error, &writerStatusCodeError) { + typ = "status_code" + } + c.Count(transport.NamespaceTelemetry, "telemetry_api.errors", map[string]string{"endpoint": indexToEndpoint(i), "type": typ}).Submit(1) + } + } + + if err != nil { + return + } + + successfulCall := results[len(results)-1] + endpoint := indexToEndpoint(len(results) - 1) + c.Distribution(transport.NamespaceTelemetry, "telemetry_api.bytes", map[string]string{"endpoint": endpoint}).Submit(float64(successfulCall.PayloadByteSize)) + c.Distribution(transport.NamespaceTelemetry, "telemetry_api.ms", map[string]string{"endpoint": endpoint}).Submit(float64(successfulCall.CallDuration.Milliseconds())) +} + func (c *client) appStart() { c.flushMapperMu.Lock() defer c.flushMapperMu.Unlock() diff --git a/internal/newtelemetry/client_config.go b/internal/newtelemetry/client_config.go index 7da0a5c2ee..bbee82d61a 100644 --- a/internal/newtelemetry/client_config.go +++ b/internal/newtelemetry/client_config.go @@ -17,6 +17,11 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" ) +type Range[T any] struct { + Min T + Max T +} + type ClientConfig struct { // DependencyLoader determines how dependency data is sent via telemetry. // If nil, the library should not send the app-dependencies-loaded event. @@ -24,7 +29,7 @@ type ClientConfig struct { // This can be controlled via the env var DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED DependencyLoader func() (*debug.BuildInfo, bool) - // MetricsEnabled etermines whether metrics are sent via telemetry. + // MetricsEnabled determines whether metrics are sent via telemetry. // If false, libraries should not send the generate-metrics or distributions events. // This can be controlled via the env var DD_TELEMETRY_METRICS_ENABLED MetricsEnabled bool @@ -33,6 +38,9 @@ type ClientConfig struct { // This can be controlled via the env var DD_TELEMETRY_LOG_COLLECTION_ENABLED LogsEnabled bool + // InternalMetricsEnabled determines whether client stats metrics are sent via telemetry. + InternalMetricsEnabled bool + // AgentlessURL is the full URL to the agentless telemetry endpoint. (optional) // Defaults to https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry AgentlessURL string @@ -51,13 +59,23 @@ type ClientConfig struct { // ExtendedHeartbeatInterval is the interval at which to send an extended heartbeat payload, defaults to 24h. ExtendedHeartbeatInterval time.Duration - // FlushIntervalRange is the interval at which the client flushes the data. + // FlushInterval is the interval at which the client flushes the data. // By default, the client will start to Flush at 60s intervals and will reduce the interval based on the load till it hit 15s // Both values cannot be higher than 60s because the heartbeat need to be sent at least every 60s. - FlushIntervalRange struct { - Min time.Duration - Max time.Duration - } + FlushInterval Range[time.Duration] + + // PayloadQueueSize is the size of the payload queue. + // This means that, by default, we incur dataloss if we spend ~30mins without flushing, considering we send telemetry data this looks reasonable. + // This also means that in the worst case scenario, memory-wise, the app is stabilized after running for 30mins. + // Ideally both values should be power of 2 because of the way the ring queue is implemented as it's growing + PayloadQueueSize Range[int] + + // DistributionsSize is the size of the distribution queue. + // Default max size is a 2^14 array of float64 (2^3 bytes) which makes a distribution 128KB bytes array _at worse_. + // Considering we add a point per user request on a simple http server, we would be losing data after 2^14 requests per minute or about 280 requests per second or under 3ms per request. + // If this throughput is constant, the telemetry client flush ticker speed will increase to, at best, double twice to flush 15 seconds of data each time. + // Which will bring our max throughput to 1100 points per second or about 750µs per request. + DistributionsSize Range[int] // Debug enables debug mode for the telemetry clientt and sent it to the backend so it logs the request Debug bool @@ -71,7 +89,7 @@ type ClientConfig struct { EarlyFlushPayloadSize int } -const ( +var ( // agentlessURL is the endpoint used to send telemetry in an agentless environment. It is // also the default URL in case connecting to the agent URL fails. agentlessURL = "https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry" @@ -94,6 +112,18 @@ const ( // maxPayloadSize is specified by the backend to be 5MB. The goal is to never reach this value otherwise our data will be silently dropped. maxPayloadSize = 5 * 1024 * 1024 // 5MB + + // TODO: tweak this value once we get real telemetry data from the telemetry client + defaultPayloadQueueSize = Range[int]{ + Min: 4, + Max: 21, + } + + // TODO: tweak this value once we get telemetry data from the telemetry client + distributionsSize = Range[int]{ + Min: 1 << 8, + Max: 1 << 14, + } ) // clamp squeezes a value between a minimum and maximum value. @@ -106,12 +136,12 @@ func (config ClientConfig) validateConfig() error { return fmt.Errorf("HeartbeatInterval cannot be higher than 60s, got %v", config.HeartbeatInterval) } - if config.FlushIntervalRange.Min > 60*time.Second || config.FlushIntervalRange.Max > 60*time.Second { - return fmt.Errorf("FlushIntervalRange cannot be higher than 60s, got Min: %v, Max: %v", config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) + if config.FlushInterval.Min > 60*time.Second || config.FlushInterval.Max > 60*time.Second { + return fmt.Errorf("FlushIntervalRange cannot be higher than 60s, got Min: %v, Max: %v", config.FlushInterval.Min, config.FlushInterval.Max) } - if config.FlushIntervalRange.Min > config.FlushIntervalRange.Max { - return fmt.Errorf("FlushIntervalRange Min cannot be higher than Max, got Min: %v, Max: %v", config.FlushIntervalRange.Min, config.FlushIntervalRange.Max) + if config.FlushInterval.Min > config.FlushInterval.Max { + return fmt.Errorf("FlushIntervalRange Min cannot be higher than Max, got Min: %v, Max: %v", config.FlushInterval.Min, config.FlushInterval.Max) } if config.EarlyFlushPayloadSize > maxPayloadSize || config.EarlyFlushPayloadSize <= 0 { @@ -137,16 +167,16 @@ func defaultConfig(config ClientConfig) ClientConfig { config.HeartbeatInterval = clamp(config.HeartbeatInterval, time.Microsecond, 60*time.Second) } - if config.FlushIntervalRange.Min == 0 { - config.FlushIntervalRange.Min = defaultMinFlushInterval + if config.FlushInterval.Min == 0 { + config.FlushInterval.Min = defaultMinFlushInterval } else { - config.FlushIntervalRange.Min = clamp(config.FlushIntervalRange.Min, time.Microsecond, 60*time.Second) + config.FlushInterval.Min = clamp(config.FlushInterval.Min, time.Microsecond, 60*time.Second) } - if config.FlushIntervalRange.Max == 0 { - config.FlushIntervalRange.Max = defaultMaxFlushInterval + if config.FlushInterval.Max == 0 { + config.FlushInterval.Max = defaultMaxFlushInterval } else { - config.FlushIntervalRange.Max = clamp(config.FlushIntervalRange.Max, time.Microsecond, 60*time.Second) + config.FlushInterval.Max = clamp(config.FlushInterval.Max, time.Microsecond, 60*time.Second) } if config.DependencyLoader == nil && globalinternal.BoolEnv("DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED", true) { @@ -161,6 +191,10 @@ func defaultConfig(config ClientConfig) ClientConfig { config.LogsEnabled = globalinternal.BoolEnv("DD_TELEMETRY_LOG_COLLECTION_ENABLED", true) } + if !config.InternalMetricsEnabled { + config.InternalMetricsEnabled = true + } + if config.EarlyFlushPayloadSize == 0 { config.EarlyFlushPayloadSize = defaultEarlyFlushPayloadSize } @@ -169,6 +203,22 @@ func defaultConfig(config ClientConfig) ClientConfig { config.ExtendedHeartbeatInterval = defaultExtendedHeartbeatInterval } + if config.PayloadQueueSize.Min == 0 { + config.PayloadQueueSize.Min = defaultPayloadQueueSize.Min + } + + if config.PayloadQueueSize.Max == 0 { + config.PayloadQueueSize.Max = defaultPayloadQueueSize.Max + } + + if config.DistributionsSize.Min == 0 { + config.DistributionsSize.Min = distributionsSize.Min + } + + if config.DistributionsSize.Max == 0 { + config.DistributionsSize.Max = distributionsSize.Max + } + return config } diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 14e4587bf2..42050b200c 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -886,7 +886,7 @@ func TestClientFlush(t *testing.T) { }, }, { - name: "full-distribution", + name: "distribution-overflow", when: func(c *client) { handler := c.Distribution(NamespaceGeneral, "init_time", nil) for i := 0; i < 1<<16; i++ { @@ -911,7 +911,9 @@ func TestClientFlush(t *testing.T) { t.Parallel() config := defaultConfig(test.clientConfig) config.AgentURL = "http://localhost:8126" - config.DependencyLoader = test.clientConfig.DependencyLoader // Don't use the default dependency loader + config.DependencyLoader = test.clientConfig.DependencyLoader // Don't use the default dependency loader + config.InternalMetricsEnabled = test.clientConfig.InternalMetricsEnabled // only enabled internal metrics when explicitly set + config.InternalMetricsEnabled = false c, err := newClient(tracerConfig, config) require.NoError(t, err) defer c.Close() @@ -934,15 +936,23 @@ func TestClientFlush(t *testing.T) { func TestMetricsDisabled(t *testing.T) { t.Setenv("DD_TELEMETRY_METRICS_ENABLED", "false") - client, err := NewClient("test-service", "test-env", "1.0.0", ClientConfig{AgentURL: "http://localhost:8126"}) + c, err := NewClient("test-service", "test-env", "1.0.0", ClientConfig{AgentURL: "http://localhost:8126"}) require.NoError(t, err) - defer client.Close() + recordWriter := &internal.RecordWriter{} + c.(*client).writer = recordWriter - assert.NotNil(t, client.Gauge(NamespaceTracers, "init_time", nil)) - assert.NotNil(t, client.Count(NamespaceTracers, "init_time", nil)) - assert.NotNil(t, client.Rate(NamespaceTracers, "init_time", nil)) - assert.NotNil(t, client.Distribution(NamespaceGeneral, "init_time", nil)) + defer c.Close() + + assert.NotNil(t, c.Gauge(NamespaceTracers, "init_time", nil)) + assert.NotNil(t, c.Count(NamespaceTracers, "init_time", nil)) + assert.NotNil(t, c.Rate(NamespaceTracers, "init_time", nil)) + assert.NotNil(t, c.Distribution(NamespaceGeneral, "init_time", nil)) + + c.Flush() + + payloads := recordWriter.Payloads() + require.Len(t, payloads, 0) } type testRoundTripper struct { @@ -1149,11 +1159,8 @@ func BenchmarkWorstCaseScenarioFloodLogging(b *testing.B) { clientConfig := ClientConfig{ HeartbeatInterval: time.Hour, ExtendedHeartbeatInterval: time.Hour, - FlushIntervalRange: struct { - Min time.Duration - Max time.Duration - }{Min: time.Second, Max: time.Second}, - AgentURL: "http://localhost:8126", + FlushInterval: Range[time.Duration]{Min: time.Second, Max: time.Second}, + AgentURL: "http://localhost:8126", // Empty transport to avoid sending data to the agent HTTPClient: &http.Client{ @@ -1326,11 +1333,9 @@ func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { clientConfig := ClientConfig{ HeartbeatInterval: time.Hour, ExtendedHeartbeatInterval: time.Hour, - FlushIntervalRange: struct { - Min time.Duration - Max time.Duration - }{Min: time.Second, Max: time.Second}, - AgentURL: "http://localhost:8126", + FlushInterval: Range[time.Duration]{Min: time.Second, Max: time.Second}, + DistributionsSize: Range[int]{256, -1}, + AgentURL: "http://localhost:8126", // Empty transport to avoid sending data to the agent HTTPClient: &http.Client{ diff --git a/internal/newtelemetry/distributions.go b/internal/newtelemetry/distributions.go index 1ac85a52ee..63ab031623 100644 --- a/internal/newtelemetry/distributions.go +++ b/internal/newtelemetry/distributions.go @@ -29,7 +29,7 @@ type distributions struct { // LoadOrStore returns a MetricHandle for the given distribution metric. If the metric key does not exist, it will be created. func (d *distributions) LoadOrStore(namespace Namespace, name string, tags map[string]string) MetricHandle { - if !knownmetrics.IsKnownMetricName(name) { + if !knownmetrics.IsKnownMetric(namespace, "distribution", name) { log.Debug("telemetry: metric name %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ "The metric will still be sent.", name) } @@ -41,11 +41,6 @@ func (d *distributions) LoadOrStore(namespace Namespace, name string, tags map[s key := distributionKey{namespace: namespace, name: name, tags: strings.TrimSuffix(compiledTags, ",")} - // Max size is a 2^14 array of float64 (2^3 bytes) which makes a distribution 128KB bytes array _at worse_. - // Considering we add a point per user request on a simple http server, we would be losing data after 2^14 requests per minute or about 280 requests per second or under 3ms per request. - // If this throughput is constant, the telemetry client flush ticker speed will increase to, at best, double twice to flush 15 seconds of data each time. - // Which will bring our max throughput to 1100 points per second or about 750µs per request. - // TODO: tweak this value once we get telemetry data from the telemetry client handle, _ := d.store.LoadOrStore(key, &distribution{key: key, values: internal.NewRingQueue[float64](1<<8, 1<<14)}) return handle @@ -74,12 +69,12 @@ type distribution struct { values *internal.RingQueue[float64] } -var logLossOnce sync.Once +var distrLogLossOnce sync.Once func (d *distribution) Submit(value float64) { d.newSubmit.Store(true) if !d.values.Enqueue(value) { - logLossOnce.Do(func() { + distrLogLossOnce.Do(func() { log.Debug("telemetry: distribution %q is losing values because the buffer is full", d.key.name) }) } @@ -99,7 +94,7 @@ func (d *distribution) payload() transport.DistributionSeries { Metric: d.key.name, Namespace: d.key.namespace, Tags: tags, - Common: knownmetrics.IsCommonMetricName(d.key.name), + Common: knownmetrics.IsCommonMetric(d.key.namespace, "distribution", d.key.name), Points: d.values.Flush(), } diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index edad33e9b2..133011a110 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -10,8 +10,8 @@ import ( "sync/atomic" globalinternal "gopkg.in/DataDog/dd-trace-go.v1/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) var ( @@ -181,6 +181,8 @@ func AddBulkAppConfig(kvs map[string]any, origin Origin) { }) } +var globalClientLogLossOnce sync.Once + // globalClientCall takes a function that takes a Client and calls it with the global client if it exists. // otherwise, it records the action for when the client is started. func globalClientCall(fun func(client Client)) { @@ -190,9 +192,13 @@ func globalClientCall(fun func(client Client)) { client := globalClient.Load() if client == nil || *client == nil { - globalClientRecorder.Record(func(client Client) { + if !globalClientRecorder.Record(func(client Client) { fun(client) - }) + }) { + globalClientLogLossOnce.Do(func() { + log.Debug("telemetry: global client recorder queue is full, dropping telemetry data, please start the telemetry client earlier to avoid data loss") + }) + } return } @@ -207,6 +213,8 @@ func newMetricsHotPointer(maker func(client Client) MetricHandle) *metricsHotPoi return &metricsHandleHotPointers[len(metricsHandleHotPointers)-1] } +var metricLogLossOnce sync.Once + // metricsHotPointer is a MetricHandle that holds a pointer to another MetricHandle and a recorder to replay actions done before the actual MetricHandle is set. type metricsHotPointer struct { ptr atomic.Pointer[MetricHandle] @@ -221,18 +229,18 @@ func (t *metricsHotPointer) Submit(value float64) { inner := t.ptr.Load() if inner == nil || *inner == nil { - t.recorder.Record(func(handle MetricHandle) { + if !t.recorder.Record(func(handle MetricHandle) { handle.Submit(value) - }) + }) { + metricLogLossOnce.Do(func() { + log.Debug("telemetry: metric is losing values because the telemetry client has not been started yet, dropping telemetry data, please start the telemetry client earlier to avoid data loss") + }) + } } (*inner).Submit(value) } -func (t *metricsHotPointer) payload() transport.MetricData { - return transport.MetricData{} -} - func (t *metricsHotPointer) swap(handle MetricHandle) { if t.ptr.Swap(&handle) == nil { t.recorder.Replay(handle) diff --git a/internal/newtelemetry/internal/knownmetrics/common_metrics.json b/internal/newtelemetry/internal/knownmetrics/common_metrics.json index f8d2dbfcea..8f79ae690d 100644 --- a/internal/newtelemetry/internal/knownmetrics/common_metrics.json +++ b/internal/newtelemetry/internal/knownmetrics/common_metrics.json @@ -1,177 +1,877 @@ [ - "code_coverage.errors", - "code_coverage.files", - "code_coverage.is_empty", - "code_coverage_finished", - "code_coverage_started", - "context_header.truncated", - "context_header_style.extracted", - "context_header_style.injected", - "docker_lib_injection.failure", - "docker_lib_injection.success", - "early_flake_detection.request", - "early_flake_detection.request_errors", - "early_flake_detection.request_ms", - "early_flake_detection.response_bytes", - "early_flake_detection.response_tests", - "endpoint_payload.bytes", - "endpoint_payload.dropped", - "endpoint_payload.events_count", - "endpoint_payload.events_serialization_ms", - "endpoint_payload.requests", - "endpoint_payload.requests_errors", - "endpoint_payload.requests_ms", - "evaluators.error", - "evaluators.init", - "evaluators.rule_sample_rate", - "evaluators.run", - "event_created", - "event_finished", - "events_enqueued_for_serialization", - "executed.propagation", - "executed.sink", - "executed.source", - "executed.tainted", - "exporter_fallback", - "flaky_tests.request", - "flaky_tests.request_errors", - "flaky_tests.request_ms", - "flaky_tests.response_bytes", - "flaky_tests.response_tests", - "git.command", - "git.command_errors", - "git.command_ms", - "git_requests.objects_pack", - "git_requests.objects_pack_bytes", - "git_requests.objects_pack_errors", - "git_requests.objects_pack_files", - "git_requests.objects_pack_ms", - "git_requests.search_commits", - "git_requests.search_commits_errors", - "git_requests.search_commits_ms", - "git_requests.settings", - "git_requests.settings_errors", - "git_requests.settings_ms", - "git_requests.settings_response", - "host_lib_injection.failure", - "host_lib_injection.success", - "impacted_tests_detection.request", - "impacted_tests_detection.request_errors", - "impacted_tests_detection.request_ms", - "impacted_tests_detection.response_bytes", - "impacted_tests_detection.response_files", - "init_time", - "inject.error", - "inject.language_detection", - "inject.latency.baseline", - "inject.latency.end_to_end", - "inject.latency.init_container", - "inject.skip", - "inject.success", - "injection.content_security_policy", - "injection.failed", - "injection.initialization.failed", - "injection.initialization.succeed", - "injection.installation", - "injection.installation.duration", - "injection.ms", - "injection.response.bytes", - "injection.skipped", - "injection.succeed", - "instrum.user_auth.missing_user_id", - "instrum.user_auth.missing_user_login", - "instrumented.propagation", - "instrumented.sink", - "instrumented.source", - "integration_errors", - "itr_forced_run", - "itr_skippable_tests.request", - "itr_skippable_tests.request_errors", - "itr_skippable_tests.request_ms", - "itr_skippable_tests.response_bytes", - "itr_skippable_tests.response_suites", - "itr_skippable_tests.response_tests", - "itr_skipped", - "itr_unskippable", - "json.tag.size.exceeded", - "k8s_lib_injection.failure", - "k8s_lib_injection.success", - "known_tests.request", - "known_tests.request_errors", - "known_tests.request_ms", - "known_tests.response_bytes", - "known_tests.response_tests", - "library_entrypoint.abort", - "library_entrypoint.abort.integration", - "library_entrypoint.abort.runtime", - "library_entrypoint.complete", - "library_entrypoint.error", - "library_entrypoint.injector.error", - "logs_created", - "manual_api_events", - "otel.env.hiding", - "otel.env.invalid", - "otel.env.unsupported", - "profile_api.bytes", - "profile_api.errors", - "profile_api.ms", - "profile_api.requests", - "profile_api.responses", - "rasp.rule.eval", - "rasp.rule.match", - "rasp.timeout", - "request.tainted", - "server.active_sessions", - "server.memory_usage", - "server.submitted_payloads", - "span.start", - "span_created", - "span_finished", - "span_pointer_calculation", - "span_pointer_calculation.issue", - "spans_created", - "spans_dropped", - "spans_enqueued_for_serialization", - "spans_finished", - "ssi_heuristic.number_of_profiles", - "ssi_heuristic.number_of_runtime_id", - "stats_api.bytes", - "stats_api.errors", - "stats_api.ms", - "stats_api.requests", - "stats_api.responses", - "stats_buckets", - "suppressed.vulnerabilities", - "telemetry_api.bytes", - "telemetry_api.errors", - "telemetry_api.ms", - "telemetry_api.requests", - "telemetry_api.responses", - "test_session", - "trace_api.bytes", - "trace_api.errors", - "trace_api.ms", - "trace_api.requests", - "trace_api.responses", - "trace_chunk_serialization.bytes", - "trace_chunk_serialization.ms", - "trace_chunk_size", - "trace_chunks_dropped", - "trace_chunks_enqueued", - "trace_chunks_enqueued_for_serialization", - "trace_chunks_sent", - "trace_partial_flush.count", - "trace_partial_flush.spans_closed", - "trace_partial_flush.spans_remaining", - "trace_segments_closed", - "trace_segments_created", - "tracer_init_time", - "waf.config_errors", - "waf.duration", - "waf.duration_ext", - "waf.init", - "waf.input_truncated", - "waf.requests", - "waf.truncated_value_size", - "waf.updates" + { + "namespace": "appsec", + "type": "count", + "name": "instrum.user_auth.missing_user_id" + }, + { + "namespace": "appsec", + "type": "count", + "name": "instrum.user_auth.missing_user_login" + }, + { + "namespace": "appsec", + "type": "count", + "name": "rasp.rule.eval" + }, + { + "namespace": "appsec", + "type": "count", + "name": "rasp.rule.match" + }, + { + "namespace": "appsec", + "type": "count", + "name": "rasp.timeout" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.config_errors" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.init" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.input_truncated" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.requests" + }, + { + "namespace": "appsec", + "type": "count", + "name": "waf.updates" + }, + { + "namespace": "appsec", + "type": "distribution", + "name": "waf.duration" + }, + { + "namespace": "appsec", + "type": "distribution", + "name": "waf.duration_ext" + }, + { + "namespace": "appsec", + "type": "distribution", + "name": "waf.truncated_value_size" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "code_coverage.errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "code_coverage.is_empty" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "code_coverage_finished" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "code_coverage_started" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "early_flake_detection.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "early_flake_detection.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "endpoint_payload.dropped" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "endpoint_payload.requests" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "endpoint_payload.requests_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "event_created" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "event_finished" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "events_enqueued_for_serialization" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "flaky_tests.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "flaky_tests.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git.command" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git.command_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.objects_pack" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.objects_pack_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.search_commits" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.search_commits_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.settings" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.settings_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "git_requests.settings_response" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "impacted_tests_detection.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "impacted_tests_detection.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_forced_run" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skippable_tests.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skippable_tests.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skippable_tests.response_suites" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skippable_tests.response_tests" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_skipped" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "itr_unskippable" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "known_tests.request" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "known_tests.request_errors" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "manual_api_events" + }, + { + "namespace": "civisibility", + "type": "count", + "name": "test_session" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "code_coverage.files" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "early_flake_detection.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "early_flake_detection.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "early_flake_detection.response_tests" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "endpoint_payload.bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "endpoint_payload.events_count" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "endpoint_payload.events_serialization_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "endpoint_payload.requests_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "flaky_tests.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "flaky_tests.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "flaky_tests.response_tests" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git.command_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.objects_pack_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.objects_pack_files" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.objects_pack_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.search_commits_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "git_requests.settings_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "impacted_tests_detection.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "impacted_tests_detection.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "impacted_tests_detection.response_files" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "itr_skippable_tests.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "itr_skippable_tests.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "known_tests.request_ms" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "known_tests.response_bytes" + }, + { + "namespace": "civisibility", + "type": "distribution", + "name": "known_tests.response_tests" + }, + { + "namespace": "general", + "type": "count", + "name": "logs_created" + }, + { + "namespace": "general", + "type": "distribution", + "name": "init_time" + }, + { + "namespace": "general", + "type": "distribution", + "name": "tracer_init_time" + }, + { + "namespace": "iast", + "type": "count", + "name": "executed.propagation" + }, + { + "namespace": "iast", + "type": "count", + "name": "executed.sink" + }, + { + "namespace": "iast", + "type": "count", + "name": "executed.source" + }, + { + "namespace": "iast", + "type": "count", + "name": "executed.tainted" + }, + { + "namespace": "iast", + "type": "count", + "name": "instrumented.propagation" + }, + { + "namespace": "iast", + "type": "count", + "name": "instrumented.sink" + }, + { + "namespace": "iast", + "type": "count", + "name": "instrumented.source" + }, + { + "namespace": "iast", + "type": "count", + "name": "json.tag.size.exceeded" + }, + { + "namespace": "iast", + "type": "count", + "name": "request.tainted" + }, + { + "namespace": "iast", + "type": "count", + "name": "suppressed.vulnerabilities" + }, + { + "namespace": "mlobs", + "type": "count", + "name": "evaluators.error" + }, + { + "namespace": "mlobs", + "type": "count", + "name": "evaluators.init" + }, + { + "namespace": "mlobs", + "type": "count", + "name": "evaluators.run" + }, + { + "namespace": "mlobs", + "type": "count", + "name": "span.start" + }, + { + "namespace": "mlobs", + "type": "distribution", + "name": "evaluators.rule_sample_rate" + }, + { + "namespace": "profilers", + "type": "count", + "name": "profile_api.errors" + }, + { + "namespace": "profilers", + "type": "count", + "name": "profile_api.requests" + }, + { + "namespace": "profilers", + "type": "count", + "name": "profile_api.responses" + }, + { + "namespace": "profilers", + "type": "count", + "name": "ssi_heuristic.number_of_profiles" + }, + { + "namespace": "profilers", + "type": "count", + "name": "ssi_heuristic.number_of_runtime_id" + }, + { + "namespace": "profilers", + "type": "distribution", + "name": "profile_api.bytes" + }, + { + "namespace": "profilers", + "type": "distribution", + "name": "profile_api.ms" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.content_security_policy" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.failed" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.initialization.failed" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.initialization.succeed" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.installation" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.skipped" + }, + { + "namespace": "rum", + "type": "count", + "name": "injection.succeed" + }, + { + "namespace": "rum", + "type": "distribution", + "name": "injection.installation.duration" + }, + { + "namespace": "rum", + "type": "distribution", + "name": "injection.ms" + }, + { + "namespace": "rum", + "type": "distribution", + "name": "injection.response.bytes" + }, + { + "namespace": "sidecar", + "type": "count", + "name": "server.submitted_payloads" + }, + { + "namespace": "sidecar", + "type": "distribution", + "name": "server.memory_usage" + }, + { + "namespace": "sidecar", + "type": "gauge", + "name": "server.active_sessions" + }, + { + "namespace": "telemetry", + "type": "count", + "name": "telemetry_api.errors" + }, + { + "namespace": "telemetry", + "type": "count", + "name": "telemetry_api.requests" + }, + { + "namespace": "telemetry", + "type": "count", + "name": "telemetry_api.responses" + }, + { + "namespace": "telemetry", + "type": "distribution", + "name": "telemetry_api.bytes" + }, + { + "namespace": "telemetry", + "type": "distribution", + "name": "telemetry_api.ms" + }, + { + "namespace": "tracers", + "type": "count", + "name": "context_header.truncated" + }, + { + "namespace": "tracers", + "type": "count", + "name": "context_header_style.extracted" + }, + { + "namespace": "tracers", + "type": "count", + "name": "context_header_style.injected" + }, + { + "namespace": "tracers", + "type": "count", + "name": "docker_lib_injection.failure" + }, + { + "namespace": "tracers", + "type": "count", + "name": "docker_lib_injection.success" + }, + { + "namespace": "tracers", + "type": "count", + "name": "exporter_fallback" + }, + { + "namespace": "tracers", + "type": "count", + "name": "host_lib_injection.failure" + }, + { + "namespace": "tracers", + "type": "count", + "name": "host_lib_injection.success" + }, + { + "namespace": "tracers", + "type": "count", + "name": "inject.error" + }, + { + "namespace": "tracers", + "type": "count", + "name": "inject.language_detection" + }, + { + "namespace": "tracers", + "type": "count", + "name": "inject.skip" + }, + { + "namespace": "tracers", + "type": "count", + "name": "inject.success" + }, + { + "namespace": "tracers", + "type": "count", + "name": "integration_errors" + }, + { + "namespace": "tracers", + "type": "count", + "name": "k8s_lib_injection.failure" + }, + { + "namespace": "tracers", + "type": "count", + "name": "k8s_lib_injection.success" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.abort" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.abort.integration" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.abort.runtime" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.complete" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.error" + }, + { + "namespace": "tracers", + "type": "count", + "name": "library_entrypoint.injector.error" + }, + { + "namespace": "tracers", + "type": "count", + "name": "otel.env.hiding" + }, + { + "namespace": "tracers", + "type": "count", + "name": "otel.env.invalid" + }, + { + "namespace": "tracers", + "type": "count", + "name": "otel.env.unsupported" + }, + { + "namespace": "tracers", + "type": "count", + "name": "span_created" + }, + { + "namespace": "tracers", + "type": "count", + "name": "span_finished" + }, + { + "namespace": "tracers", + "type": "count", + "name": "span_pointer_calculation" + }, + { + "namespace": "tracers", + "type": "count", + "name": "span_pointer_calculation.issue" + }, + { + "namespace": "tracers", + "type": "count", + "name": "spans_created" + }, + { + "namespace": "tracers", + "type": "count", + "name": "spans_dropped" + }, + { + "namespace": "tracers", + "type": "count", + "name": "spans_enqueued_for_serialization" + }, + { + "namespace": "tracers", + "type": "count", + "name": "spans_finished" + }, + { + "namespace": "tracers", + "type": "count", + "name": "stats_api.errors" + }, + { + "namespace": "tracers", + "type": "count", + "name": "stats_api.requests" + }, + { + "namespace": "tracers", + "type": "count", + "name": "stats_api.responses" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_api.errors" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_api.requests" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_api.responses" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_chunks_dropped" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_chunks_enqueued" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_chunks_enqueued_for_serialization" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_chunks_sent" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_partial_flush.count" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_segments_closed" + }, + { + "namespace": "tracers", + "type": "count", + "name": "trace_segments_created" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "inject.latency.baseline" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "inject.latency.end_to_end" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "inject.latency.init_container" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "stats_api.bytes" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "stats_api.ms" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_api.bytes" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_api.ms" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_chunk_serialization.bytes" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_chunk_serialization.ms" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_chunk_size" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_partial_flush.spans_closed" + }, + { + "namespace": "tracers", + "type": "distribution", + "name": "trace_partial_flush.spans_remaining" + }, + { + "namespace": "tracers", + "type": "gauge", + "name": "stats_buckets" + } ] diff --git a/internal/newtelemetry/internal/knownmetrics/generator/generator.go b/internal/newtelemetry/internal/knownmetrics/generator/generator.go index 031cddecdc..71504f31c5 100644 --- a/internal/newtelemetry/internal/knownmetrics/generator/generator.go +++ b/internal/newtelemetry/internal/knownmetrics/generator/generator.go @@ -20,6 +20,9 @@ import ( "strings" "golang.org/x/exp/slices" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/knownmetrics" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) // This represents the base64-encoded URL of api.github.com to download the configuration file. @@ -34,7 +37,7 @@ func base64Decode(encoded string) string { return string(decoded) } -func downloadFromDdgo(remoteURL, localPath, branch, token string, getMetricNames func(map[string]any) []string) error { +func downloadFromDdgo(remoteURL, localPath, branch, token string, getMetricNames func(map[string]any) []knownmetrics.Declaration) error { request, err := http.NewRequest(http.MethodGet, remoteURL, nil) if err != nil { return err @@ -66,7 +69,15 @@ func downloadFromDdgo(remoteURL, localPath, branch, token string, getMetricNames } metricNames := getMetricNames(decoded) - slices.SortStableFunc(metricNames, strings.Compare) + slices.SortStableFunc(metricNames, func(i, j knownmetrics.Declaration) int { + if i.Namespace != j.Namespace { + return strings.Compare(string(i.Namespace), string(j.Namespace)) + } + if i.Type != j.Type { + return strings.Compare(i.Type, j.Type) + } + return strings.Compare(i.Name, j.Name) + }) fp, err := os.OpenFile(localPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) if err != nil { @@ -80,8 +91,8 @@ func downloadFromDdgo(remoteURL, localPath, branch, token string, getMetricNames return encoder.Encode(metricNames) } -func getCommonMetricNames(input map[string]any) []string { - var names []string +func getCommonMetricNames(input map[string]any) []knownmetrics.Declaration { + var names []knownmetrics.Declaration for category, value := range input { if strings.HasPrefix(category, "$") { continue @@ -89,10 +100,16 @@ func getCommonMetricNames(input map[string]any) []string { metrics := value.(map[string]any) for metricKey, value := range metrics { - names = append(names, metricKey) + metric := knownmetrics.Declaration{ + Namespace: transport.Namespace(category), + Name: metricKey, + Type: value.(map[string]any)["metric_type"].(string), + } + names = append(names, metric) if aliases, ok := value.(map[string]any)["aliases"]; ok { for _, alias := range aliases.([]any) { - names = append(names, alias.(string)) + metric.Name = alias.(string) + names = append(names, metric) } } } @@ -100,13 +117,16 @@ func getCommonMetricNames(input map[string]any) []string { return names } -func getGoMetricNames(input map[string]any) []string { - var names []string - for key := range input { +func getGoMetricNames(input map[string]any) []knownmetrics.Declaration { + var names []knownmetrics.Declaration + for key, value := range input { if strings.HasPrefix(key, "$") { continue } - names = append(names, key) + names = append(names, knownmetrics.Declaration{ + Name: key, + Type: value.(map[string]any)["metric_type"].(string), + }) } return names } diff --git a/internal/newtelemetry/internal/knownmetrics/golang_metrics.json b/internal/newtelemetry/internal/knownmetrics/golang_metrics.json index 54396b6e4a..c2c581bf85 100644 --- a/internal/newtelemetry/internal/knownmetrics/golang_metrics.json +++ b/internal/newtelemetry/internal/knownmetrics/golang_metrics.json @@ -1,3 +1,7 @@ [ - "orchestrion.enabled" + { + "namespace": "", + "type": "gauge", + "name": "orchestrion.enabled" + } ] diff --git a/internal/newtelemetry/internal/knownmetrics/known_metrics.go b/internal/newtelemetry/internal/knownmetrics/known_metrics.go index 858c1918ac..14a9991752 100644 --- a/internal/newtelemetry/internal/knownmetrics/known_metrics.go +++ b/internal/newtelemetry/internal/knownmetrics/known_metrics.go @@ -13,6 +13,7 @@ import ( "slices" "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) //go:embed common_metrics.json @@ -21,33 +22,42 @@ var commonMetricsJSON []byte //go:embed golang_metrics.json var golangMetricsJSON []byte +type Declaration struct { + Namespace transport.Namespace `json:"namespace"` + Type string `json:"type"` + Name string `json:"name"` +} + var ( commonMetrics = parseMetricNames(commonMetricsJSON) golangMetrics = parseMetricNames(golangMetricsJSON) ) -func parseMetricNames(bytes []byte) []string { - var names []string +func parseMetricNames(bytes []byte) []Declaration { + var names []Declaration if err := json.Unmarshal(bytes, &names); err != nil { log.Error("telemetry: failed to parse metric names: %v", err) } return names } -// IsKnownMetricName returns true if the given metric name is a known metric by the backend +// IsKnownMetric returns true if the given metric name is a known metric by the backend // This is linked to generated common_metrics.json file and golang_metrics.json file. If you added new metrics to the backend, you should rerun the generator. -func IsKnownMetricName(name string) bool { - return slices.Contains(commonMetrics, name) || slices.Contains(golangMetrics, name) +func IsKnownMetric(namespace transport.Namespace, typ, name string) bool { + decl := Declaration{Namespace: namespace, Type: typ, Name: name} + return slices.Contains(commonMetrics, decl) || slices.Contains(golangMetrics, decl) } -// IsCommonMetricName returns true if the given metric name is a known common (cross-language) metric by the backend +// IsCommonMetric returns true if the given metric name is a known common (cross-language) metric by the backend // This is linked to the generated common_metrics.json file. If you added new metrics to the backend, you should rerun the generator. -func IsCommonMetricName(name string) bool { - return slices.Contains(commonMetrics, name) +func IsCommonMetric(namespace transport.Namespace, typ, name string) bool { + decl := Declaration{Namespace: namespace, Type: typ, Name: name} + return slices.Contains(commonMetrics, decl) } -// IsLanguageMetricName returns true if the given metric name is a known Go language metric by the backend +// IsLanguageMetric returns true if the given metric name is a known Go language metric by the backend // This is linked to the generated golang_metrics.json file. If you added new metrics to the backend, you should rerun the generator. -func IsLanguageMetricName(name string) bool { - return slices.Contains(golangMetrics, name) +func IsLanguageMetric(typ, name string) bool { + decl := Declaration{Type: typ, Name: name} + return slices.Contains(golangMetrics, decl) } diff --git a/internal/newtelemetry/internal/recorder.go b/internal/newtelemetry/internal/recorder.go index edfb843524..f7b1aa8793 100644 --- a/internal/newtelemetry/internal/recorder.go +++ b/internal/newtelemetry/internal/recorder.go @@ -5,17 +5,13 @@ package internal -import ( - "gopkg.in/DataDog/dd-trace-go.v1/internal/log" -) - // Recorder is a generic thread-safe type that records functions that could have taken place before object T was created. // Once object T is created, the Recorder can replay all the recorded functions with object T as an argument. type Recorder[T any] struct { queue *RingQueue[func(T)] } -// NewRecorder creates a new [Recorder] instance. with 512 as the maximum number of recorded functions. +// NewRecorder creates a new [Recorder] instance. with 512 as the maximum number of recorded functions before overflowing. func NewRecorder[T any]() Recorder[T] { return Recorder[T]{ // TODO: tweak this value once we get telemetry data from the telemetry client @@ -23,13 +19,13 @@ func NewRecorder[T any]() Recorder[T] { } } -func (r Recorder[T]) Record(f func(T)) { +// Record takes a function and records it in the Recorder's queue. If the queue is full, it returns false. +// Once Replay is called, all recorded functions will be replayed with object T as an argument in order of recording. +func (r Recorder[T]) Record(f func(T)) bool { if r.queue == nil { - return - } - if !r.queue.Enqueue(f) { - log.Debug("telemetry: recorder queue is full, dropping record") + return true } + return r.queue.Enqueue(f) } func (r Recorder[T]) Replay(t T) { diff --git a/internal/newtelemetry/internal/transport/namespace.go b/internal/newtelemetry/internal/transport/namespace.go index 9c4dfe1f37..02d94aca19 100644 --- a/internal/newtelemetry/internal/transport/namespace.go +++ b/internal/newtelemetry/internal/transport/namespace.go @@ -8,10 +8,13 @@ package transport type Namespace string const ( - NamespaceGeneral Namespace = "general" - NamespaceTracers Namespace = "tracers" - NamespaceProfilers Namespace = "profilers" - NamespaceAppSec Namespace = "appsec" - NamespaceIAST Namespace = "iast" - NamespaceTelemetry Namespace = "telemetry" + NamespaceGeneral Namespace = "general" + NamespaceTracers Namespace = "tracers" + NamespaceProfilers Namespace = "profilers" + NamespaceAppSec Namespace = "appsec" + NamespaceIAST Namespace = "iast" + NamespaceTelemetry Namespace = "telemetry" + NamespaceCIVisibility Namespace = "civisibility" + NamespaceMLOps Namespace = "mlops" + NamespaceRUM Namespace = "rum" ) diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go index e2302c9929..34b64759f0 100644 --- a/internal/newtelemetry/internal/writer.go +++ b/internal/newtelemetry/internal/writer.go @@ -98,6 +98,8 @@ type EndpointRequestResult struct { PayloadByteSize int // CallDuration is the duration of the call to the endpoint if the call was successful CallDuration time.Duration + // StatusCode is the status code of the response from the endpoint even if the call failed but only with an actual HTTP error + StatusCode int } type writer struct { @@ -257,7 +259,7 @@ func (w *writer) Flush(payload transport.Payload) ([]EndpointRequestResult, erro results = append(results, EndpointRequestResult{Error: &WriterStatusCodeError{ StatusCode: response.Status, Body: string(respBodyBytes), - }}) + }, StatusCode: response.StatusCode}) continue } diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go index fe2142c0d5..7a160d7032 100644 --- a/internal/newtelemetry/metrics.go +++ b/internal/newtelemetry/metrics.go @@ -37,7 +37,7 @@ type metrics struct { // LoadOrStore returns a MetricHandle for the given metric key. If the metric key does not exist, it will be created. func (m *metrics) LoadOrStore(namespace Namespace, kind transport.MetricType, name string, tags map[string]string) MetricHandle { - if !knownmetrics.IsKnownMetricName(name) { + if !knownmetrics.IsKnownMetric(namespace, string(kind), name) { log.Debug("telemetry: metric name %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ "The metric will still be sent.", name) } @@ -108,7 +108,7 @@ func (c *metric) payload() transport.MetricData { Namespace: c.key.namespace, Tags: tags, Type: c.key.kind, - Common: knownmetrics.IsCommonMetricName(c.key.name), + Common: knownmetrics.IsCommonMetric(c.key.namespace, string(c.key.kind), c.key.name), Points: [][2]any{ {c.submitTime.Load(), c.value.Load()}, }, From 2cfb4297149d543c0c9e777f05d647a131afde4c Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Wed, 29 Jan 2025 18:08:45 +0100 Subject: [PATCH 31/61] remove new dependencies that were introduced inadvertently Signed-off-by: Eliott Bouhana --- .../knownmetrics/generator/generator.go | 3 +-- internal/newtelemetry/metrics.go | 26 +++++++++++++------ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/internal/newtelemetry/internal/knownmetrics/generator/generator.go b/internal/newtelemetry/internal/knownmetrics/generator/generator.go index 71504f31c5..e0283cca0b 100644 --- a/internal/newtelemetry/internal/knownmetrics/generator/generator.go +++ b/internal/newtelemetry/internal/knownmetrics/generator/generator.go @@ -17,10 +17,9 @@ import ( "os/exec" "path/filepath" "runtime" + "slices" "strings" - "golang.org/x/exp/slices" - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/knownmetrics" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go index 7a160d7032..2b5954e2e2 100644 --- a/internal/newtelemetry/metrics.go +++ b/internal/newtelemetry/metrics.go @@ -6,11 +6,11 @@ package newtelemetry import ( + "math" "strings" + "sync/atomic" "time" - "go.uber.org/atomic" - "gopkg.in/DataDog/dd-trace-go.v1/internal/log" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/knownmetrics" @@ -84,7 +84,7 @@ type metric struct { // Values set during Submit() newSubmit atomic.Bool - value atomic.Float64 + value atomic.Uint64 // Actually a float64, but we need atomic operations submitTime atomic.Int64 } @@ -110,20 +110,30 @@ func (c *metric) payload() transport.MetricData { Type: c.key.kind, Common: knownmetrics.IsCommonMetric(c.key.namespace, string(c.key.kind), c.key.name), Points: [][2]any{ - {c.submitTime.Load(), c.value.Load()}, + {c.submitTime.Load(), math.Float64frombits(c.value.Load())}, }, } return data } +func (c *metric) add(value float64) { + for { + oldValue := math.Float64frombits(c.value.Load()) + newValue := oldValue + value + if c.value.CompareAndSwap(math.Float64bits(oldValue), math.Float64bits(newValue)) { + return + } + } +} + type count struct { metric } func (c *count) Submit(value float64) { c.submit() - c.value.Add(value) + c.add(value) } type gauge struct { @@ -132,7 +142,7 @@ type gauge struct { func (c *gauge) Submit(value float64) { c.submit() - c.value.Store(value) + c.value.Store(math.Float64bits(value)) } type rate struct { @@ -142,7 +152,7 @@ type rate struct { func (c *rate) Submit(value float64) { c.submit() - c.value.Add(value) + c.add(value) } func (c *rate) payload() transport.MetricData { @@ -152,6 +162,6 @@ func (c *rate) payload() transport.MetricData { } payload.Interval = int64(time.Since(c.intervalStart).Seconds()) - payload.Points[0][1] = c.value.Load() / float64(payload.Interval) + payload.Points[0][1] = math.Float64frombits(c.value.Load()) / float64(payload.Interval) return payload } From 5380ba3529e4e90ca22216f97d7c11363eacfb53 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 30 Jan 2025 13:03:16 +0100 Subject: [PATCH 32/61] renamed several things added send failure tests and reworked the ringqueue ground up Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 3 + internal/newtelemetry/client.go | 25 +- internal/newtelemetry/client_config.go | 6 +- internal/newtelemetry/client_test.go | 228 +++++++++++------- internal/newtelemetry/dependencies.go | 2 +- internal/newtelemetry/distributions.go | 7 +- internal/newtelemetry/globalclient.go | 9 + internal/newtelemetry/internal/ringbuffer.go | 155 +++++++----- .../newtelemetry/internal/ringbuffer_test.go | 155 ++++++++++++ .../newtelemetry/internal/transport/body.go | 6 +- internal/newtelemetry/internal/writer.go | 13 +- internal/newtelemetry/internal/writer_test.go | 23 +- internal/newtelemetry/log/log.go | 47 ++++ internal/newtelemetry/metrics.go | 73 +++--- 14 files changed, 540 insertions(+), 212 deletions(-) create mode 100644 internal/newtelemetry/internal/ringbuffer_test.go create mode 100644 internal/newtelemetry/log/log.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index ca7159befc..8a73c598dc 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -45,7 +45,10 @@ const ( // telemetry.Metric(telemetry.Appsec, "my-count", map[string]string{"tag1": "true", "tag2": "1.0"}).Submit(1.0) // ``` type MetricHandle interface { + // Submit submits a value to the metric handle. Submit(value float64) + // Get returns the last value submitted to the metric handle. + Get() float64 } // Integration is an integration that is configured to be traced. diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 8c1b879982..36d4ac5cfc 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -65,6 +65,7 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client }, distributions: distributions{ skipAllowlist: config.Debug, + queueSize: config.DistributionsSize, }, } @@ -109,11 +110,11 @@ type client struct { flushTicker *internal.Ticker - // flushMapper is the transformer to use for the next flush on the gathered payloads on this tick + // flushMapper is the transformer to use for the next flush on the gathered bodies on this tick flushMapper mapper.Mapper flushMapperMu sync.Mutex - // payloadQueue is used when we cannot flush previously built payloads + // payloadQueue is used when we cannot flush previously built bodies payloadQueue *internal.RingQueue[transport.Payload] } @@ -133,6 +134,10 @@ type noopMetricHandle struct{} func (noopMetricHandle) Submit(_ float64) {} +func (noopMetricHandle) Get() float64 { + return 0 +} + func (c *client) Count(namespace Namespace, name string, tags map[string]string) MetricHandle { if !c.clientConfig.MetricsEnabled { return noopMetricHandle{} @@ -199,9 +204,9 @@ func (c *client) Flush() { } // flush sends all the data sources to the writer by let them flow through the given transformer function. -// The transformer function is used to transform the payloads before sending them to the writer. +// The transformer function is used to transform the bodies before sending them to the writer. func (c *client) flush(payloads []transport.Payload) (int, error) { - // Transform the payloads + // Transform the bodies { c.flushMapperMu.Lock() payloads, c.flushMapper = c.flushMapper.Transform(payloads) @@ -213,9 +218,9 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { return 0, nil } + // We enqueue the new payloads to preserve the order of the payloads c.payloadQueue.Enqueue(payloads...) - payloads = c.payloadQueue.GetBuffer() - defer c.payloadQueue.ReleaseBuffer(payloads) + payloads = c.payloadQueue.Flush() var ( nbBytes int @@ -224,14 +229,10 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { ) for i, payload := range payloads { - if payload == nil { - continue - } - results, err := c.writer.Flush(payload) c.computeFlushMetrics(results, err) if err != nil { - // We stop flushing when we encounter a fatal error, put the payloads in the queue and return the error + // We stop flushing when we encounter a fatal error, put the bodies in the queue and return the error log.Error("error while flushing telemetry data: %v", err) c.payloadQueue.Enqueue(payloads[i:]...) return nbBytes, err @@ -241,7 +242,7 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { successfulCall := results[len(results)-1] if !speedIncreased && successfulCall.PayloadByteSize > c.clientConfig.EarlyFlushPayloadSize { - // We increase the speed of the flushTicker to try to flush the remaining payloads faster as we are at risk of sending too large payloads to the backend + // We increase the speed of the flushTicker to try to flush the remaining bodies faster as we are at risk of sending too large bodies to the backend c.flushTicker.IncreaseSpeed() speedIncreased = true } diff --git a/internal/newtelemetry/client_config.go b/internal/newtelemetry/client_config.go index bbee82d61a..a7dc4f0a0c 100644 --- a/internal/newtelemetry/client_config.go +++ b/internal/newtelemetry/client_config.go @@ -84,8 +84,8 @@ type ClientConfig struct { APIKey string // EarlyFlushPayloadSize is the size of the payload that will trigger an early flush. - // This is necessary because backend won't allow payloads larger than 5MB. - // The default value here will be 2MB to take into account the large inaccuracy in estimating the size of payloads + // This is necessary because backend won't allow bodies larger than 5MB. + // The default value here will be 2MB to take into account the large inaccuracy in estimating the size of bodies EarlyFlushPayloadSize int } @@ -116,7 +116,7 @@ var ( // TODO: tweak this value once we get real telemetry data from the telemetry client defaultPayloadQueueSize = Range[int]{ Min: 4, - Max: 21, + Max: 32, } // TODO: tweak this value once we get telemetry data from the telemetry client diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 42050b200c..af568ccfbd 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -6,9 +6,11 @@ package newtelemetry import ( + "bytes" "encoding/json" "errors" "fmt" + "io" "net/http" "runtime" "runtime/debug" @@ -956,13 +958,60 @@ func TestMetricsDisabled(t *testing.T) { } type testRoundTripper struct { + t *testing.T roundTrip func(*http.Request) (*http.Response, error) + bodies []transport.Body } func (t *testRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + defer req.Body.Close() + body, err := io.ReadAll(req.Body) + require.NoError(t.t, err) + req.Body = io.NopCloser(bytes.NewReader(body)) + t.bodies = append(t.bodies, parseRequest(t.t, req.Header, body)) return t.roundTrip(req) } +func parseRequest(t *testing.T, headers http.Header, raw []byte) transport.Body { + t.Helper() + + assert.Equal(t, "v2", headers.Get("DD-Telemetry-API-Version")) + assert.Equal(t, "application/json", headers.Get("Content-Type")) + assert.Equal(t, "go", headers.Get("DD-Client-Library-Language")) + assert.Equal(t, "test-env", headers.Get("DD-Agent-Env")) + assert.Equal(t, version.Tag, headers.Get("DD-Client-Library-Version")) + assert.Equal(t, globalconfig.InstrumentationInstallID(), headers.Get("DD-Agent-Install-Id")) + assert.Equal(t, globalconfig.InstrumentationInstallType(), headers.Get("DD-Agent-Install-Type")) + assert.Equal(t, globalconfig.InstrumentationInstallTime(), headers.Get("DD-Agent-Install-Time")) + + assert.NotEmpty(t, headers.Get("DD-Agent-Hostname")) + + var body transport.Body + require.NoError(t, json.Unmarshal(raw, &body)) + + assert.Equal(t, string(body.RequestType), headers.Get("DD-Telemetry-Request-Type")) + assert.Equal(t, "test-service", body.Application.ServiceName) + assert.Equal(t, "test-env", body.Application.Env) + assert.Equal(t, "1.0.0", body.Application.ServiceVersion) + assert.Equal(t, "go", body.Application.LanguageName) + assert.Equal(t, runtime.Version(), body.Application.LanguageVersion) + + assert.NotEmpty(t, body.Host.Hostname) + assert.Equal(t, osinfo.OSName(), body.Host.OS) + assert.Equal(t, osinfo.OSVersion(), body.Host.OSVersion) + assert.Equal(t, osinfo.Architecture(), body.Host.Architecture) + assert.Equal(t, osinfo.KernelName(), body.Host.KernelName) + assert.Equal(t, osinfo.KernelRelease(), body.Host.KernelRelease) + assert.Equal(t, osinfo.KernelVersion(), body.Host.KernelVersion) + + assert.Equal(t, "v2", body.APIVersion) + assert.NotZero(t, body.TracerTime) + assert.LessOrEqual(t, int64(1), body.SeqID) + assert.Equal(t, globalconfig.RuntimeID(), body.RuntimeID) + + return body +} + func TestClientEnd2End(t *testing.T) { tracerConfig := internal.TracerConfig{ Service: "test-service", @@ -970,63 +1019,20 @@ func TestClientEnd2End(t *testing.T) { Version: "1.0.0", } - parseRequest := func(t *testing.T, request *http.Request) transport.Body { - assert.Equal(t, "v2", request.Header.Get("DD-Telemetry-API-Version")) - assert.Equal(t, "application/json", request.Header.Get("Content-Type")) - assert.Equal(t, "go", request.Header.Get("DD-Client-Library-Language")) - assert.Equal(t, "test-env", request.Header.Get("DD-Agent-Env")) - assert.Equal(t, version.Tag, request.Header.Get("DD-Client-Library-Version")) - assert.Equal(t, globalconfig.InstrumentationInstallID(), request.Header.Get("DD-Agent-Install-Id")) - assert.Equal(t, globalconfig.InstrumentationInstallType(), request.Header.Get("DD-Agent-Install-Type")) - assert.Equal(t, globalconfig.InstrumentationInstallTime(), request.Header.Get("DD-Agent-Install-Time")) - assert.Equal(t, "true", request.Header.Get("DD-Telemetry-Debug-Enabled")) - - assert.NotEmpty(t, request.Header.Get("DD-Agent-Hostname")) - - var body transport.Body - require.NoError(t, json.NewDecoder(request.Body).Decode(&body)) - - assert.Equal(t, string(body.RequestType), request.Header.Get("DD-Telemetry-Request-Type")) - assert.Equal(t, "test-service", body.Application.ServiceName) - assert.Equal(t, "test-env", body.Application.Env) - assert.Equal(t, "1.0.0", body.Application.ServiceVersion) - assert.Equal(t, "go", body.Application.LanguageName) - assert.Equal(t, runtime.Version(), body.Application.LanguageVersion) - - assert.NotEmpty(t, body.Host.Hostname) - assert.Equal(t, osinfo.OSName(), body.Host.OS) - assert.Equal(t, osinfo.OSVersion(), body.Host.OSVersion) - assert.Equal(t, osinfo.Architecture(), body.Host.Architecture) - assert.Equal(t, osinfo.KernelName(), body.Host.KernelName) - assert.Equal(t, osinfo.KernelRelease(), body.Host.KernelRelease) - assert.Equal(t, osinfo.KernelVersion(), body.Host.KernelVersion) - - assert.Equal(t, true, body.Debug) - assert.Equal(t, "v2", body.APIVersion) - assert.NotZero(t, body.TracerTime) - assert.LessOrEqual(t, int64(1), body.SeqID) - assert.Equal(t, globalconfig.RuntimeID(), body.RuntimeID) - - return body - } - for _, test := range []struct { - name string - when func(*client) - expect func(*testing.T, *http.Request) (*http.Response, error) + name string + when func(*client) + roundtrip func(*testing.T, *http.Request) (*http.Response, error) + expect func(*testing.T, []transport.Body) }{ { name: "app-start", when: func(c *client) { c.appStart() }, - expect: func(t *testing.T, request *http.Request) (*http.Response, error) { - assert.Equal(t, string(transport.RequestTypeAppStarted), request.Header.Get("DD-Telemetry-Request-Type")) - body := parseRequest(t, request) - assert.Equal(t, transport.RequestTypeAppStarted, body.RequestType) - return &http.Response{ - StatusCode: http.StatusOK, - }, nil + expect: func(t *testing.T, bodies []transport.Body) { + require.Len(t, bodies, 1) + assert.Equal(t, transport.RequestTypeAppStarted, bodies[0].RequestType) }, }, { @@ -1034,13 +1040,9 @@ func TestClientEnd2End(t *testing.T) { when: func(c *client) { c.appStop() }, - expect: func(t *testing.T, request *http.Request) (*http.Response, error) { - assert.Equal(t, string(transport.RequestTypeAppClosing), request.Header.Get("DD-Telemetry-Request-Type")) - body := parseRequest(t, request) - assert.Equal(t, transport.RequestTypeAppClosing, body.RequestType) - return &http.Response{ - StatusCode: http.StatusOK, - }, nil + expect: func(t *testing.T, bodies []transport.Body) { + require.Len(t, bodies, 1) + assert.Equal(t, transport.RequestTypeAppClosing, bodies[0].RequestType) }, }, { @@ -1049,43 +1051,61 @@ func TestClientEnd2End(t *testing.T) { c.appStart() c.appStop() }, - expect: func(t *testing.T, request *http.Request) (*http.Response, error) { - body := parseRequest(t, request) - - switch request.Header.Get("DD-Telemetry-Request-Type") { - case string(transport.RequestTypeAppStarted): - payload := body.Payload.(*transport.AppStarted) - assert.Equal(t, globalconfig.InstrumentationInstallID(), payload.InstallSignature.InstallID) - assert.Equal(t, globalconfig.InstrumentationInstallType(), payload.InstallSignature.InstallType) - assert.Equal(t, globalconfig.InstrumentationInstallTime(), payload.InstallSignature.InstallTime) - case string(transport.RequestTypeAppClosing): - - default: - t.Fatalf("unexpected request type: %s", request.Header.Get("DD-Telemetry-Request-Type")) + expect: func(t *testing.T, bodies []transport.Body) { + require.Len(t, bodies, 2) + assert.Equal(t, transport.RequestTypeAppStarted, bodies[0].RequestType) + assert.Equal(t, transport.RequestTypeAppClosing, bodies[1].RequestType) + }, + }, + { + name: "fail-agent-endpoint", + when: func(c *client) { + c.appStart() + }, + roundtrip: func(_ *testing.T, req *http.Request) (*http.Response, error) { + if strings.Contains(req.URL.Host, "localhost") { + return nil, errors.New("failed") } - - return &http.Response{ - StatusCode: http.StatusOK, - }, nil + return &http.Response{StatusCode: http.StatusOK}, nil + }, + expect: func(t *testing.T, bodies []transport.Body) { + require.Len(t, bodies, 2) + require.Equal(t, transport.RequestTypeAppStarted, bodies[0].RequestType) + require.Equal(t, transport.RequestTypeAppStarted, bodies[1].RequestType) + }, + }, + { + name: "fail-all-endpoint", + when: func(c *client) { + c.appStart() + }, + roundtrip: func(_ *testing.T, _ *http.Request) (*http.Response, error) { + return nil, errors.New("failed") + }, + expect: func(t *testing.T, bodies []transport.Body) { + require.Len(t, bodies, 2) + require.Equal(t, transport.RequestTypeAppStarted, bodies[0].RequestType) + require.Equal(t, transport.RequestTypeAppStarted, bodies[1].RequestType) }, }, } { t.Run(test.name, func(t *testing.T) { t.Parallel() + rt := &testRoundTripper{ + t: t, + roundTrip: func(req *http.Request) (*http.Response, error) { + if test.roundtrip != nil { + return test.roundtrip(t, req) + } + return &http.Response{StatusCode: http.StatusOK}, nil + }, + } clientConfig := ClientConfig{ AgentURL: "http://localhost:8126", + APIKey: "apikey", HTTPClient: &http.Client{ - Timeout: 5 * time.Second, - Transport: &testRoundTripper{ - roundTrip: func(req *http.Request) (*http.Response, error) { - if test.expect != nil { - return test.expect(t, req) - } - return &http.Response{ - StatusCode: http.StatusOK, - }, nil - }, - }, + Timeout: 5 * time.Second, + Transport: rt, }, Debug: true, } @@ -1099,10 +1119,48 @@ func TestClientEnd2End(t *testing.T) { test.when(c) c.Flush() + test.expect(t, rt.bodies) }) } } +func TestSendingFailures(t *testing.T) { + cfg := ClientConfig{ + AgentURL: "http://localhost:8126", + HTTPClient: &http.Client{ + Timeout: 5 * time.Second, + Transport: &testRoundTripper{ + t: t, + roundTrip: func(_ *http.Request) (*http.Response, error) { + return nil, errors.New("failed") + }, + }, + }, + } + + c, err := newClient(internal.TracerConfig{ + Service: "test-service", + Env: "test-env", + Version: "1.0.0", + }, defaultConfig(cfg)) + + require.NoError(t, err) + defer c.Close() + + c.Log(LogError, "test") + c.Flush() + + require.False(t, c.payloadQueue.IsEmpty()) + payload := c.payloadQueue.ReversePeek() + require.NotNil(t, payload) + + assert.Equal(t, transport.RequestTypeLogs, payload.RequestType()) + logs := payload.(transport.Logs) + assert.Len(t, logs.Logs, 1) + assert.Equal(t, transport.LogLevelError, logs.Logs[0].Level) + assert.Equal(t, "test", logs.Logs[0].Message) +} + func BenchmarkLogs(b *testing.B) { clientConfig := ClientConfig{ HeartbeatInterval: time.Hour, diff --git a/internal/newtelemetry/dependencies.go b/internal/newtelemetry/dependencies.go index d830816ab6..b23d746083 100644 --- a/internal/newtelemetry/dependencies.go +++ b/internal/newtelemetry/dependencies.go @@ -36,7 +36,7 @@ func (d *dependencies) Payload() transport.Payload { // Requirement described here: // https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/main/GeneratedDocumentation/ApiDocs/v2/producing-telemetry.md#app-dependencies-loaded if len(deps) > 2000 { - log.Debug("telemetry: too many (%d) dependencies to send, sending over multiple payloads", len(deps)) + log.Debug("telemetry: too many (%d) dependencies to send, sending over multiple bodies", len(deps)) } for i := 0; i < len(deps); i += 2000 { diff --git a/internal/newtelemetry/distributions.go b/internal/newtelemetry/distributions.go index 63ab031623..1c1d7a5a6e 100644 --- a/internal/newtelemetry/distributions.go +++ b/internal/newtelemetry/distributions.go @@ -25,6 +25,7 @@ type distributionKey struct { type distributions struct { store internal.TypedSyncMap[distributionKey, *distribution] skipAllowlist bool // Debugging feature to skip the allowlist of known metrics + queueSize Range[int] } // LoadOrStore returns a MetricHandle for the given distribution metric. If the metric key does not exist, it will be created. @@ -41,7 +42,7 @@ func (d *distributions) LoadOrStore(namespace Namespace, name string, tags map[s key := distributionKey{namespace: namespace, name: name, tags: strings.TrimSuffix(compiledTags, ",")} - handle, _ := d.store.LoadOrStore(key, &distribution{key: key, values: internal.NewRingQueue[float64](1<<8, 1<<14)}) + handle, _ := d.store.LoadOrStore(key, &distribution{key: key, values: internal.NewRingQueue[float64](d.queueSize.Min, d.queueSize.Max)}) return handle } @@ -80,6 +81,10 @@ func (d *distribution) Submit(value float64) { } } +func (d *distribution) Get() float64 { + return d.values.ReversePeek() +} + func (d *distribution) payload() transport.DistributionSeries { if submit := d.newSubmit.Swap(false); !submit { return transport.DistributionSeries{} diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 133011a110..53cf41e724 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -241,6 +241,15 @@ func (t *metricsHotPointer) Submit(value float64) { (*inner).Submit(value) } +func (t *metricsHotPointer) Get() float64 { + inner := t.ptr.Load() + if inner == nil || *inner == nil { + return 0 + } + + return (*inner).Get() +} + func (t *metricsHotPointer) swap(handle MetricHandle) { if t.ptr.Swap(&handle) == nil { t.recorder.Replay(handle) diff --git a/internal/newtelemetry/internal/ringbuffer.go b/internal/newtelemetry/internal/ringbuffer.go index 3654c14fde..622865cb64 100644 --- a/internal/newtelemetry/internal/ringbuffer.go +++ b/internal/newtelemetry/internal/ringbuffer.go @@ -11,12 +11,8 @@ import ( // RingQueue is a thread-safe ring buffer can be used to store a fixed number of elements and overwrite old values when full. type RingQueue[T any] struct { - // buffer is the slice that contains the data. - buffer []T - // head is the index of the first element in the buffer. - head int - // tail is the index of the last element in the buffer. - tail int + buffer []T + head, tail, count int // mu is the lock for the buffer, head and tail. mu sync.Mutex // pool is the pool of buffers. Normally there should only be one or 2 buffers in the pool. @@ -35,97 +31,130 @@ func NewRingQueue[T any](minSize, maxSize int) *RingQueue[T] { } } -// Enqueue adds one or multiple values to the buffer. returns false if the buffer is full. -func (rb *RingQueue[T]) Enqueue(vals ...T) bool { - rb.mu.Lock() - defer rb.mu.Unlock() +// Length returns the number of elements currently stored in the queue. +func (rq *RingQueue[T]) Length() int { + return rq.count +} + +func (rq *RingQueue[T]) resizeLocked() { + newBuf := make([]T, min(rq.count*2, rq.maxBufferSize)) + + if rq.tail > rq.head { + copy(newBuf, rq.buffer[rq.head:rq.tail]) + } else { + n := copy(newBuf, rq.buffer[rq.head:]) + copy(newBuf[n:], rq.buffer[:rq.tail]) + } + + rq.head = 0 + rq.tail = rq.count + rq.buffer = newBuf +} + +func (rq *RingQueue[T]) enqueueLocked(elem T) bool { spaceLeft := true - for _, val := range vals { - spaceLeft = rb.enqueueLocked(val) + if rq.count == len(rq.buffer) { + if len(rq.buffer) == rq.maxBufferSize { + spaceLeft = false + // bitwise modulus + rq.head = (rq.head + 1) % len(rq.buffer) + rq.count-- + } else { + rq.resizeLocked() + } } + + rq.buffer[rq.tail] = elem + rq.tail = (rq.tail + 1) % len(rq.buffer) + rq.count++ return spaceLeft } -func (rb *RingQueue[T]) enqueueLocked(val T) bool { - rb.buffer[rb.tail] = val - rb.tail = (rb.tail + 1) % len(rb.buffer) +// ReversePeek returns the last element that was enqueued without removing it. +func (rq *RingQueue[T]) ReversePeek() T { + rq.mu.Lock() + defer rq.mu.Unlock() + return rq.buffer[(rq.tail-1+len(rq.buffer))%len(rq.buffer)] +} - if rb.tail == rb.head && len(rb.buffer) == rb.maxBufferSize { // We lost one element - rb.head = (rb.head + 1) % len(rb.buffer) - return false - } +// Peek returns the first element that was enqueued without removing it. +func (rq *RingQueue[T]) Peek() T { + rq.mu.Lock() + defer rq.mu.Unlock() + return rq.buffer[rq.head] +} - // We need to resize the buffer, we double the size and cap it to maxBufferSize - if rb.tail == rb.head { - newBuffer := make([]T, min(cap(rb.buffer)*2, rb.maxBufferSize)) - copy(newBuffer, rb.buffer[rb.head:]) - copy(newBuffer[len(rb.buffer)-rb.head:], rb.buffer[:rb.tail]) - rb.head = 0 - rb.tail = len(rb.buffer) - 1 - rb.buffer = newBuffer +// Enqueue adds one or multiple values to the buffer. returns false if the buffer is full. +func (rq *RingQueue[T]) Enqueue(vals ...T) bool { + rq.mu.Lock() + defer rq.mu.Unlock() + spaceLeft := true + for _, val := range vals { + spaceLeft = rq.enqueueLocked(val) } - return true + return spaceLeft } // Dequeue removes a value from the buffer. -func (rb *RingQueue[T]) Dequeue() T { - rb.mu.Lock() - defer rb.mu.Unlock() - - val := rb.buffer[rb.head] - rb.head = (rb.head + 1) % len(rb.buffer) - return val +func (rq *RingQueue[T]) Dequeue() T { + rq.mu.Lock() + defer rq.mu.Unlock() + + ret := rq.buffer[rq.head] + // bitwise modulus + rq.head = (rq.head + 1) % len(rq.buffer) + rq.count-- + return ret } // GetBuffer returns the current buffer and resets it. -func (rb *RingQueue[T]) GetBuffer() []T { - rb.mu.Lock() - defer rb.mu.Unlock() - return rb.getBufferLocked() +func (rq *RingQueue[T]) GetBuffer() []T { + rq.mu.Lock() + defer rq.mu.Unlock() + return rq.getBufferLocked() } -func (rb *RingQueue[T]) getBufferLocked() []T { - prevBuf := rb.buffer - rb.buffer = rb.pool.Get().([]T) - rb.head = 0 - rb.tail = len(rb.buffer) - 1 +func (rq *RingQueue[T]) getBufferLocked() []T { + prevBuf := rq.buffer + rq.buffer = rq.pool.Get().([]T) + rq.head, rq.tail, rq.count = 0, 0, 0 return prevBuf } // Flush returns a copy of the buffer and resets it. -func (rb *RingQueue[T]) Flush() []T { - rb.mu.Lock() - head, tail := rb.head, rb.tail - buf := rb.getBufferLocked() - rb.mu.Unlock() +func (rq *RingQueue[T]) Flush() []T { + rq.mu.Lock() + head, count := rq.head, rq.count + buf := rq.getBufferLocked() + rq.mu.Unlock() - defer rb.ReleaseBuffer(buf) + defer rq.ReleaseBuffer(buf) - copyBuf := make([]T, 0, len(buf)) - for i := head; i != tail; i = (i + 1) % len(buf) { - copyBuf = append(copyBuf, buf[i]) + copyBuf := make([]T, count) + for i := 0; i < count; i++ { + copyBuf[i] = buf[(head+i)%len(buf)] } return copyBuf } // ReleaseBuffer returns the buffer to the pool. -func (rb *RingQueue[T]) ReleaseBuffer(buf []T) { - rb.pool.Put(buf[:cap(buf)]) // Make sure nobody reduced the length of the buffer +func (rq *RingQueue[T]) ReleaseBuffer(buf []T) { + rq.pool.Put(buf[:cap(buf)]) // Make sure nobody reduced the length of the buffer } // IsEmpty returns true if the buffer is empty. -func (rb *RingQueue[T]) IsEmpty() bool { - rb.mu.Lock() - defer rb.mu.Unlock() +func (rq *RingQueue[T]) IsEmpty() bool { + rq.mu.Lock() + defer rq.mu.Unlock() - return rb.head == rb.tail + return rq.count == 0 } // IsFull returns true if the buffer is full and cannot accept more elements. -func (rb *RingQueue[T]) IsFull() bool { - rb.mu.Lock() - defer rb.mu.Unlock() +func (rq *RingQueue[T]) IsFull() bool { + rq.mu.Lock() + defer rq.mu.Unlock() - return (rb.tail+1)%len(rb.buffer) == rb.head && len(rb.buffer) == rb.maxBufferSize + return len(rq.buffer) == rq.count && len(rq.buffer) == rq.maxBufferSize } diff --git a/internal/newtelemetry/internal/ringbuffer_test.go b/internal/newtelemetry/internal/ringbuffer_test.go new file mode 100644 index 0000000000..8e07d36fca --- /dev/null +++ b/internal/newtelemetry/internal/ringbuffer_test.go @@ -0,0 +1,155 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestEnqueueSingleElement(t *testing.T) { + queue := NewRingQueue[int](3, 5) + success := queue.Enqueue(1) + assert.True(t, success) + assert.Equal(t, 1, queue.ReversePeek()) +} + +func TestEnqueueMultipleElements(t *testing.T) { + queue := NewRingQueue[int](3, 5) + success := queue.Enqueue(1, 2, 3) + assert.True(t, success) + assert.Equal(t, 3, queue.ReversePeek()) +} + +func TestEnqueueBeyondCapacity(t *testing.T) { + queue := NewRingQueue[int](3, 5) + success := queue.Enqueue(1, 2, 3, 4, 5, 6) + assert.False(t, success) + assert.Equal(t, 6, queue.ReversePeek()) +} + +func TestDequeueSingleElement(t *testing.T) { + queue := NewRingQueue[int](3, 5) + queue.Enqueue(1) + val := queue.Dequeue() + assert.Equal(t, 1, val) +} + +func TestDequeueMultipleElements(t *testing.T) { + queue := NewRingQueue[int](3, 5) + queue.Enqueue(1, 2, 3) + val1 := queue.Dequeue() + val2 := queue.Dequeue() + assert.Equal(t, 1, val1) + assert.Equal(t, 2, val2) +} + +func TestDequeueFromEmptyQueue(t *testing.T) { + queue := NewRingQueue[int](3, 5) + val := queue.Dequeue() + assert.Equal(t, 0, val) // Assuming zero value for int +} + +func TestGetBufferAndReset(t *testing.T) { + queue := NewRingQueue[int](3, 5) + queue.Enqueue(1, 2, 3) + buffer := queue.GetBuffer() + assert.Equal(t, []int{1, 2, 3}, buffer) + assert.True(t, queue.IsEmpty()) +} + +func TestFlushAndReset(t *testing.T) { + queue := NewRingQueue[int](3, 5) + queue.Enqueue(1, 2, 3) + buffer := queue.Flush() + assert.Equal(t, []int{1, 2, 3}, buffer) + assert.True(t, queue.IsEmpty()) +} + +func TestIsEmptyWhenQueueIsEmpty(t *testing.T) { + queue := NewRingQueue[int](3, 5) + assert.True(t, queue.IsEmpty()) +} + +func TestIsEmptyWhenQueueIsNotEmpty(t *testing.T) { + queue := NewRingQueue[int](3, 5) + queue.Enqueue(1) + assert.False(t, queue.IsEmpty()) +} + +func TestIsFullWhenQueueIsFull(t *testing.T) { + queue := NewRingQueue[int](3, 5) + queue.Enqueue(1, 2, 3, 4, 5) + assert.True(t, queue.IsFull()) +} + +func TestIsFullWhenQueueIsNotFull(t *testing.T) { + queue := NewRingQueue[int](3, 5) + queue.Enqueue(1, 2, 3) + assert.False(t, queue.IsFull()) +} + +func TestEnqueueToFullQueue(t *testing.T) { + queue := NewRingQueue[int](3, 3) + success := queue.Enqueue(1, 2, 3) + assert.True(t, success) + success = queue.Enqueue(4) + assert.False(t, success) + assert.Equal(t, 4, queue.ReversePeek()) +} + +func TestDequeueFromFullQueue(t *testing.T) { + queue := NewRingQueue[int](3, 3) + queue.Enqueue(1, 2, 3) + val := queue.Dequeue() + assert.Equal(t, 1, val) + assert.False(t, queue.IsFull()) +} + +func TestEnqueueAfterDequeue(t *testing.T) { + queue := NewRingQueue[int](3, 3) + queue.Enqueue(1, 2, 3) + queue.Dequeue() + success := queue.Enqueue(4) + assert.True(t, success) + assert.Equal(t, 4, queue.ReversePeek()) +} + +func TestDequeueAllElements(t *testing.T) { + queue := NewRingQueue[int](3, 3) + queue.Enqueue(1, 2, 3) + val1 := queue.Dequeue() + val2 := queue.Dequeue() + val3 := queue.Dequeue() + assert.Equal(t, 1, val1) + assert.Equal(t, 2, val2) + assert.Equal(t, 3, val3) + assert.True(t, queue.IsEmpty()) +} + +func TestEnqueueAndDequeueInterleaved(t *testing.T) { + queue := NewRingQueue[int](3, 3) + queue.Enqueue(1) + val1 := queue.Dequeue() + queue.Enqueue(2) + val2 := queue.Dequeue() + queue.Enqueue(3, 4) + val3 := queue.Dequeue() + assert.Equal(t, 1, val1) + assert.Equal(t, 2, val2) + assert.Equal(t, 3, val3) + assert.False(t, queue.IsEmpty()) +} + +func TestEnqueueWithResize(t *testing.T) { + queue := NewRingQueue[int](2, 4) + queue.Enqueue(1, 2) + assert.Equal(t, 2, len(queue.buffer)) + queue.Enqueue(3) + assert.Equal(t, 4, len(queue.buffer)) + assert.Equal(t, 3, queue.ReversePeek()) +} diff --git a/internal/newtelemetry/internal/transport/body.go b/internal/newtelemetry/internal/transport/body.go index b34f673a23..f23762fdc1 100644 --- a/internal/newtelemetry/internal/transport/body.go +++ b/internal/newtelemetry/internal/transport/body.go @@ -73,8 +73,10 @@ func (b *Body) UnmarshalJSON(bytes []byte) error { return err } - if err = json.Unmarshal(anyMap["debug"], &b.Debug); err != nil { - return err + if _, ok := anyMap["debug"]; ok { + if err = json.Unmarshal(anyMap["debug"], &b.Debug); err != nil { + return err + } } if err = json.Unmarshal(anyMap["application"], &b.Application); err != nil { diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go index 34b64759f0..36a5842969 100644 --- a/internal/newtelemetry/internal/writer.go +++ b/internal/newtelemetry/internal/writer.go @@ -173,7 +173,7 @@ func preBakeRequest(body *transport.Body, endpoint *http.Request) *http.Request "DD-Agent-Install-Time": globalconfig.InstrumentationInstallTime(), "Datadog-Container-ID": internal.ContainerID(), "Datadog-Entity-ID": internal.EntityID(), - // TODO: Add support for Cloud provider/resource-type/resource-id headers in another PR and package + // TODO: add support for Cloud provider/resource-type/resource-id headers in another PR and package // Described here: https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/cf17b41a30fbf31d54e2cfbfc983875d58b02fe1/GeneratedDocumentation/ApiDocs/v2/overview.md#setting-the-serverless-telemetry-headers } { if val == "" { @@ -224,12 +224,12 @@ func (s *SumReaderCloser) Read(p []byte) (n int, err error) { // WriterStatusCodeError is an error that is returned when the writer receives an unexpected status code from the server. type WriterStatusCodeError struct { - StatusCode string - Body string + Status string + Body string } func (w *WriterStatusCodeError) Error() string { - return fmt.Sprintf("unexpected status code: %q (received body: %q)", w.StatusCode, w.Body) + return fmt.Sprintf("unexpected status code: %q (received body: %q)", w.Status, w.Body) } func (w *writer) Flush(payload transport.Payload) ([]EndpointRequestResult, error) { @@ -257,8 +257,8 @@ func (w *writer) Flush(payload transport.Payload) ([]EndpointRequestResult, erro if response.StatusCode >= 300 || response.StatusCode < 200 { respBodyBytes, _ := io.ReadAll(response.Body) // maybe we can find an error reason in the response body results = append(results, EndpointRequestResult{Error: &WriterStatusCodeError{ - StatusCode: response.Status, - Body: string(respBodyBytes), + Status: response.Status, + Body: string(respBodyBytes), }, StatusCode: response.StatusCode}) continue } @@ -266,6 +266,7 @@ func (w *writer) Flush(payload transport.Payload) ([]EndpointRequestResult, erro results = append(results, EndpointRequestResult{ PayloadByteSize: sumReaderCloser.n, CallDuration: time.Since(now), + StatusCode: response.StatusCode, }) // We succeeded, no need to try the other endpoints diff --git a/internal/newtelemetry/internal/writer_test.go b/internal/newtelemetry/internal/writer_test.go index df4d255cc1..02a2e9ea19 100644 --- a/internal/newtelemetry/internal/writer_test.go +++ b/internal/newtelemetry/internal/writer_test.go @@ -10,6 +10,7 @@ import ( "net/http/httptest" "net/url" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -138,9 +139,11 @@ func TestWriter_Flush_Failure(t *testing.T) { config.Endpoints = append(config.Endpoints, req) writer, _ := NewWriter(config) - bytesSent, err := writer.Flush(&payload) + results, err := writer.Flush(&payload) require.Error(t, err) - assert.Zero(t, bytesSent) + assert.Len(t, results, 1) + assert.Equal(t, http.StatusBadRequest, results[0].StatusCode) + assert.ErrorContains(t, err, `400 Bad Request`) assert.True(t, marshalJSONCalled) assert.True(t, payloadReceived) } @@ -191,10 +194,20 @@ func TestWriter_Flush_MultipleEndpoints(t *testing.T) { config.Endpoints = append(config.Endpoints, req2) writer, _ := NewWriter(config) - bytesSent, err := writer.Flush(&payload) - require.ErrorContains(t, err, `telemetry/writer: unexpected status code: "500 Internal Server Error" (received body: "")`) + results, err := writer.Flush(&payload) + assert.NoError(t, err) + + assert.Len(t, results, 2) + assert.Equal(t, http.StatusInternalServerError, results[0].StatusCode) + assert.ErrorContains(t, results[0].Error, `500 Internal Server Error`) + assert.Equal(t, time.Duration(0), results[0].CallDuration) + assert.Zero(t, results[0].PayloadByteSize) + + assert.Equal(t, http.StatusOK, results[1].StatusCode) + assert.InDelta(t, time.Duration(1), results[1].CallDuration, float64(time.Millisecond)) + assert.NotZero(t, results[1].PayloadByteSize) + assert.NoError(t, results[1].Error) - assert.NotZero(t, bytesSent) assert.Equal(t, 2, marshalJSONCalled) assert.True(t, payloadReceived2) } diff --git a/internal/newtelemetry/log/log.go b/internal/newtelemetry/log/log.go new file mode 100644 index 0000000000..332004bf73 --- /dev/null +++ b/internal/newtelemetry/log/log.go @@ -0,0 +1,47 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package log + +import ( + "fmt" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry" +) + +func divideArgs(args ...any) ([]newtelemetry.LogOption, []any) { + if len(args) == 0 { + return nil, nil + } + + var options []newtelemetry.LogOption + var fmtArgs []any + for _, arg := range args { + if opt, ok := arg.(newtelemetry.LogOption); ok { + options = append(options, opt) + } else { + fmtArgs = append(fmtArgs, arg) + } + } + return options, fmtArgs +} + +// Debug sends a telemetry payload with a debug log message to the backend. +func Debug(format string, args ...any) { + options, fmtArgs := divideArgs(args...) + newtelemetry.Log(newtelemetry.LogDebug, fmt.Sprintf(format, fmtArgs...), options...) +} + +// Warn sends a telemetry payload with a warning log message to the backend. +func Warn(format string, args ...any) { + options, fmtArgs := divideArgs(args...) + newtelemetry.Log(newtelemetry.LogWarn, fmt.Sprintf(format, fmtArgs...), options...) +} + +// Error sends a telemetry payload with an error log message to the backend. +func Error(format string, args ...any) { + options, fmtArgs := divideArgs(args...) + newtelemetry.Log(newtelemetry.LogError, fmt.Sprintf(format, fmtArgs...), options...) +} diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go index 2b5954e2e2..364ed900bd 100644 --- a/internal/newtelemetry/metrics.go +++ b/internal/newtelemetry/metrics.go @@ -17,6 +17,7 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) +// metricKey is used as a key in the metrics store hash map. type metricKey struct { namespace Namespace kind transport.MetricType @@ -85,41 +86,46 @@ type metric struct { // Values set during Submit() newSubmit atomic.Bool value atomic.Uint64 // Actually a float64, but we need atomic operations - submitTime atomic.Int64 + submitTime atomic.Int64 // Unix timestamp } -func (c *metric) submit() { - c.newSubmit.Store(true) - c.submitTime.Store(time.Now().Unix()) +func (m *metric) Get() float64 { + return math.Float64frombits(m.value.Load()) } -func (c *metric) payload() transport.MetricData { - if submit := c.newSubmit.Swap(false); !submit { +func (m *metric) payload() transport.MetricData { + if submit := m.newSubmit.Swap(false); !submit { return transport.MetricData{} } var tags []string - if c.key.tags != "" { - tags = strings.Split(c.key.tags, ",") + if m.key.tags != "" { + tags = strings.Split(m.key.tags, ",") } data := transport.MetricData{ - Metric: c.key.name, - Namespace: c.key.namespace, + Metric: m.key.name, + Namespace: m.key.namespace, Tags: tags, - Type: c.key.kind, - Common: knownmetrics.IsCommonMetric(c.key.namespace, string(c.key.kind), c.key.name), + Type: m.key.kind, + Common: knownmetrics.IsCommonMetric(m.key.namespace, string(m.key.kind), m.key.name), Points: [][2]any{ - {c.submitTime.Load(), math.Float64frombits(c.value.Load())}, + {m.submitTime.Load(), math.Float64frombits(m.value.Load())}, }, } return data } -func (c *metric) add(value float64) { +type count struct { + metric +} + +func (c *count) Submit(value float64) { + c.newSubmit.Store(true) + c.submitTime.Store(time.Now().Unix()) for { - oldValue := math.Float64frombits(c.value.Load()) + oldValue := c.Get() newValue := oldValue + value if c.value.CompareAndSwap(math.Float64bits(oldValue), math.Float64bits(newValue)) { return @@ -127,22 +133,14 @@ func (c *metric) add(value float64) { } } -type count struct { - metric -} - -func (c *count) Submit(value float64) { - c.submit() - c.add(value) -} - type gauge struct { metric } -func (c *gauge) Submit(value float64) { - c.submit() - c.value.Store(math.Float64bits(value)) +func (g *gauge) Submit(value float64) { + g.newSubmit.Store(true) + g.submitTime.Store(time.Now().Unix()) + g.value.Store(math.Float64bits(value)) } type rate struct { @@ -150,18 +148,25 @@ type rate struct { intervalStart time.Time } -func (c *rate) Submit(value float64) { - c.submit() - c.add(value) +func (r *rate) Submit(value float64) { + r.newSubmit.Store(true) + r.submitTime.Store(time.Now().Unix()) + for { + oldValue := r.Get() + newValue := oldValue + value + if r.value.CompareAndSwap(math.Float64bits(oldValue), math.Float64bits(newValue)) { + return + } + } } -func (c *rate) payload() transport.MetricData { - payload := c.metric.payload() +func (r *rate) payload() transport.MetricData { + payload := r.metric.payload() if payload.Metric == "" { return payload } - payload.Interval = int64(time.Since(c.intervalStart).Seconds()) - payload.Points[0][1] = math.Float64frombits(c.value.Load()) / float64(payload.Interval) + payload.Interval = int64(time.Since(r.intervalStart).Seconds()) + payload.Points[0][1] = r.Get() / float64(payload.Interval) return payload } From f65461046d27341e4a7d44d6c51e58138979123d Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 30 Jan 2025 16:02:45 +0100 Subject: [PATCH 33/61] added mock telemetry client Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 8 +- internal/newtelemetry/client.go | 4 +- internal/newtelemetry/client_test.go | 24 +-- internal/newtelemetry/globalclient.go | 4 +- .../telemetrytest/telemetrytest.go | 201 ++++++++++++++++++ 5 files changed, 221 insertions(+), 20 deletions(-) create mode 100644 internal/newtelemetry/telemetrytest/telemetrytest.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index 8a73c598dc..066e4d1f2e 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -107,9 +107,9 @@ type Client interface { // Flush closes the client and flushes any remaining data. Flush() - // appStart sends the telemetry necessary to signal that the app is starting. - appStart() + // AppStart sends the telemetry necessary to signal that the app is starting. + AppStart() - // appStop sends the telemetry necessary to signal that the app is stopping. - appStop() + // AppStop sends the telemetry necessary to signal that the app is stopping. + AppStop() } diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 36d4ac5cfc..5361689199 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -308,13 +308,13 @@ func (c *client) computeFlushMetrics(results []internal.EndpointRequestResult, e c.Distribution(transport.NamespaceTelemetry, "telemetry_api.ms", map[string]string{"endpoint": endpoint}).Submit(float64(successfulCall.CallDuration.Milliseconds())) } -func (c *client) appStart() { +func (c *client) AppStart() { c.flushMapperMu.Lock() defer c.flushMapperMu.Unlock() c.flushMapper = mapper.NewAppStartedMapper(c.flushMapper) } -func (c *client) appStop() { +func (c *client) AppStop() { c.flushMapperMu.Lock() defer c.flushMapperMu.Unlock() c.flushMapper = mapper.NewAppClosingMapper(c.flushMapper) diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index af568ccfbd..5eb9f45ec0 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -312,7 +312,7 @@ func TestClientFlush(t *testing.T) { { name: "app-started", when: func(c *client) { - c.appStart() + c.AppStart() }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -326,7 +326,7 @@ func TestClientFlush(t *testing.T) { { name: "app-started-with-product", when: func(c *client) { - c.appStart() + c.AppStart() c.ProductStarted("test-product") }, expect: func(t *testing.T, payloads []transport.Payload) { @@ -339,7 +339,7 @@ func TestClientFlush(t *testing.T) { { name: "app-started-with-configuration", when: func(c *client) { - c.appStart() + c.AppStart() c.AddAppConfig("key", "value", OriginDefault) }, expect: func(t *testing.T, payloads []transport.Payload) { @@ -354,7 +354,7 @@ func TestClientFlush(t *testing.T) { { name: "app-started+integrations", when: func(c *client) { - c.appStart() + c.AppStart() c.MarkIntegrationAsLoaded(Integration{Name: "test-integration", Version: "1.0.0"}) }, expect: func(t *testing.T, payloads []transport.Payload) { @@ -380,7 +380,7 @@ func TestClientFlush(t *testing.T) { HeartbeatInterval: time.Nanosecond, }, when: func(c *client) { - c.appStart() + c.AppStart() }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -397,7 +397,7 @@ func TestClientFlush(t *testing.T) { { name: "app-stopped", when: func(c *client) { - c.appStop() + c.AppStop() }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -1028,7 +1028,7 @@ func TestClientEnd2End(t *testing.T) { { name: "app-start", when: func(c *client) { - c.appStart() + c.AppStart() }, expect: func(t *testing.T, bodies []transport.Body) { require.Len(t, bodies, 1) @@ -1038,7 +1038,7 @@ func TestClientEnd2End(t *testing.T) { { name: "app-stop", when: func(c *client) { - c.appStop() + c.AppStop() }, expect: func(t *testing.T, bodies []transport.Body) { require.Len(t, bodies, 1) @@ -1048,8 +1048,8 @@ func TestClientEnd2End(t *testing.T) { { name: "app-start+app-stop", when: func(c *client) { - c.appStart() - c.appStop() + c.AppStart() + c.AppStop() }, expect: func(t *testing.T, bodies []transport.Body) { require.Len(t, bodies, 2) @@ -1060,7 +1060,7 @@ func TestClientEnd2End(t *testing.T) { { name: "fail-agent-endpoint", when: func(c *client) { - c.appStart() + c.AppStart() }, roundtrip: func(_ *testing.T, req *http.Request) (*http.Response, error) { if strings.Contains(req.URL.Host, "localhost") { @@ -1077,7 +1077,7 @@ func TestClientEnd2End(t *testing.T) { { name: "fail-all-endpoint", when: func(c *client) { - c.appStart() + c.AppStart() }, roundtrip: func(_ *testing.T, _ *http.Request) (*http.Response, error) { return nil, errors.New("failed") diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 53cf41e724..b6542ca3fd 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -35,7 +35,7 @@ func StartApp(client Client) { globalClientRecorder.Replay(client) - client.appStart() + client.AppStart() } // SwapClient swaps the global client with the given client and Flush the old (*client). @@ -64,7 +64,7 @@ func StopApp() { } if client := globalClient.Swap(nil); client != nil && *client != nil { - (*client).appStop() + (*client).AppStop() (*client).Flush() (*client).Close() } diff --git a/internal/newtelemetry/telemetrytest/telemetrytest.go b/internal/newtelemetry/telemetrytest/telemetrytest.go new file mode 100644 index 0000000000..ac6a592ca9 --- /dev/null +++ b/internal/newtelemetry/telemetrytest/telemetrytest.go @@ -0,0 +1,201 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2022 Datadog, Inc. + +// Package newtelemetrytest provides a mock implementation of the newtelemetry client for testing purposes +package newtelemetrytest + +import ( + "strings" + "sync" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry" + + "github.com/stretchr/testify/mock" +) + +// MockClient implements Client and is used for testing purposes outside the newtelemetry package, +// e.g. the tracer and profiler. +type MockClient struct { + mock.Mock + mu sync.Mutex + Started bool + Stopped bool + Configuration map[string]any + Logs map[newtelemetry.LogLevel]string + Integrations []string + Products map[newtelemetry.Namespace]bool + Metrics map[metricKey]*float64 +} + +type metricKey struct { + Namespace newtelemetry.Namespace + Name string + Tags string + Kind string +} + +func (m *MockClient) Close() error { + return nil +} + +func tagsString(tags map[string]string) string { + compiledTags := "" + for k, v := range tags { + compiledTags += k + ":" + v + "," + } + return strings.TrimSuffix(compiledTags, ",") +} + +type MockMetricHandle struct { + mock.Mock + mu sync.Mutex + submit func(ptr *float64, value float64) + value *float64 +} + +func (m *MockMetricHandle) Submit(value float64) { + m.On("Submit", value) + m.mu.Lock() + defer m.mu.Unlock() + m.submit(m.value, value) +} + +func (m *MockMetricHandle) Get() float64 { + m.On("Get") + m.mu.Lock() + defer m.mu.Unlock() + return *m.value +} + +func (m *MockClient) Count(namespace newtelemetry.Namespace, name string, tags map[string]string) newtelemetry.MetricHandle { + m.mu.Lock() + defer m.mu.Unlock() + m.On("Count", namespace, name, tags) + key := metricKey{Namespace: namespace, Name: name, Tags: tagsString(tags), Kind: "count"} + if _, ok := m.Metrics[key]; !ok { + init := 0.0 + m.Metrics[key] = &init + } + + return &MockMetricHandle{value: m.Metrics[key], submit: func(ptr *float64, value float64) { + *ptr += value + }} +} + +func (m *MockClient) Rate(namespace newtelemetry.Namespace, name string, tags map[string]string) newtelemetry.MetricHandle { + m.mu.Lock() + defer m.mu.Unlock() + m.On("Rate", namespace, name, tags) + key := metricKey{Namespace: namespace, Name: name, Tags: tagsString(tags), Kind: "rate"} + if _, ok := m.Metrics[key]; !ok { + init := 0.0 + m.Metrics[key] = &init + } + + return &MockMetricHandle{value: m.Metrics[key], submit: func(ptr *float64, value float64) { + *ptr += value + }} +} + +func (m *MockClient) Gauge(namespace newtelemetry.Namespace, name string, tags map[string]string) newtelemetry.MetricHandle { + m.mu.Lock() + defer m.mu.Unlock() + m.On("Gauge", namespace, name, tags) + key := metricKey{Namespace: namespace, Name: name, Tags: tagsString(tags), Kind: "gauge"} + if _, ok := m.Metrics[key]; !ok { + init := 0.0 + m.Metrics[key] = &init + } + + return &MockMetricHandle{value: m.Metrics[key], submit: func(ptr *float64, value float64) { + *ptr = value + }} +} + +func (m *MockClient) Distribution(namespace newtelemetry.Namespace, name string, tags map[string]string) newtelemetry.MetricHandle { + m.mu.Lock() + defer m.mu.Unlock() + m.On("Distribution", namespace, name, tags) + key := metricKey{Namespace: namespace, Name: name, Tags: tagsString(tags), Kind: "distribution"} + if _, ok := m.Metrics[key]; !ok { + init := 0.0 + m.Metrics[key] = &init + } + + return &MockMetricHandle{value: m.Metrics[key], submit: func(ptr *float64, value float64) { + *ptr = value + }} +} + +func (m *MockClient) Log(level newtelemetry.LogLevel, text string, options ...newtelemetry.LogOption) { + m.mu.Lock() + defer m.mu.Unlock() + m.On("Log", level, text, options) + m.Logs[level] = text +} + +func (m *MockClient) ProductStarted(product newtelemetry.Namespace) { + m.mu.Lock() + defer m.mu.Unlock() + m.On("ProductStarted", product) + m.Products[product] = true +} + +func (m *MockClient) ProductStopped(product newtelemetry.Namespace) { + m.mu.Lock() + defer m.mu.Unlock() + m.On("ProductStopped", product) + m.Products[product] = false +} + +func (m *MockClient) ProductStartError(product newtelemetry.Namespace, err error) { + m.mu.Lock() + defer m.mu.Unlock() + m.On("ProductStartError", product, err) + m.Products[product] = false +} + +func (m *MockClient) AddAppConfig(key string, value any, origin newtelemetry.Origin) { + m.mu.Lock() + defer m.mu.Unlock() + m.On("AddAppConfig", key, value, origin) + m.Configuration[key] = value +} + +func (m *MockClient) AddBulkAppConfig(kvs map[string]any, origin newtelemetry.Origin) { + m.mu.Lock() + defer m.mu.Unlock() + m.On("AddBulkAppConfig", kvs, origin) + for k, v := range kvs { + m.Configuration[k] = v + } +} + +func (m *MockClient) MarkIntegrationAsLoaded(integration newtelemetry.Integration) { + m.mu.Lock() + defer m.mu.Unlock() + m.On("MarkIntegrationAsLoaded", integration) + m.Integrations = append(m.Integrations, integration.Name) +} + +func (m *MockClient) Flush() { + m.On("Flush") +} + +func (m *MockClient) AppStart() { + m.mu.Lock() + defer m.mu.Unlock() + m.On("AppStart") + m.Started = true +} + +func (m *MockClient) AppStop() { + m.mu.Lock() + defer m.mu.Unlock() + m.On("AppStop") + m.Stopped = true +} + +var _ newtelemetry.Client = (*MockClient)(nil) From 21d0f0116f57f169118e81c3807779effe55a2f0 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 30 Jan 2025 16:08:24 +0100 Subject: [PATCH 34/61] add GlobalClient() function to mirror old package variable GlobalClient Signed-off-by: Eliott Bouhana --- internal/newtelemetry/globalclient.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index b6542ca3fd..e9275adfa6 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -25,6 +25,15 @@ var ( metricsHandleHotPointersMu sync.Mutex ) +// GlobalClient returns the global telemetry client. +func GlobalClient() Client { + client := globalClient.Load() + if client == nil { + return nil + } + return *client +} + // StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global (*client). func StartApp(client Client) { if Disabled() || globalClient.Load() != nil { From 29fc678078304169abae949e76937360eea4eb5b Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 30 Jan 2025 16:20:41 +0100 Subject: [PATCH 35/61] rename AddAppConfig to RegisterAppConfig and rename AddBulkConfig to RegisterAppConfigs Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 20 +++++++++++++++---- internal/newtelemetry/client.go | 12 +++++------ internal/newtelemetry/client_test.go | 8 ++++---- internal/newtelemetry/configuration.go | 10 +++++----- internal/newtelemetry/globalclient.go | 16 +++++++-------- .../telemetrytest/telemetrytest.go | 16 +++++++-------- 6 files changed, 44 insertions(+), 38 deletions(-) diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index 066e4d1f2e..cf7eba5b3c 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -62,6 +62,16 @@ type Integration struct { Error string } +// Configuration is a key-value pair that is used to configure the application. +type Configuration struct { + // Key is the key of the configuration. + Key string + // Value is the value of the configuration. Need to be json serializable. + Value any + // Origin is the source of the configuration change. + Origin Origin +} + // Client constitutes all the functions available concurrently for the telemetry users. All methods are thread-safe // This is an interface for easier testing but all functions will be mirrored at the package level to call // the global client. @@ -93,13 +103,13 @@ type Client interface { // ProductStartError declares that a product could not start because of the following error ProductStartError(product Namespace, err error) - // AddAppConfig adds a key value pair to the app configuration and send the change to telemetry + // RegisterAppConfig adds a key value pair to the app configuration and send the change to telemetry // value has to be json serializable and the origin is the source of the change. - AddAppConfig(key string, value any, origin Origin) + RegisterAppConfig(key string, value any, origin Origin) - // AddBulkAppConfig adds a list of key value pairs to the app configuration and sends the change to telemetry. + // RegisterAppConfigs adds a list of key value pairs to the app configuration and sends the change to telemetry. // Same as AddAppConfig but for multiple values. - AddBulkAppConfig(kvs map[string]any, origin Origin) + RegisterAppConfigs(kvs ...Configuration) // MarkIntegrationAsLoaded marks an integration as loaded in the telemetry MarkIntegrationAsLoaded(integration Integration) @@ -108,8 +118,10 @@ type Client interface { Flush() // AppStart sends the telemetry necessary to signal that the app is starting. + // Preferred use via StartApp package level function AppStart() // AppStop sends the telemetry necessary to signal that the app is stopping. + // Preferred use via StopApp package level function AppStop() } diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 5361689199..2452fc94bb 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -178,13 +178,13 @@ func (c *client) ProductStartError(product Namespace, err error) { c.products.Add(product, false, err) } -func (c *client) AddAppConfig(key string, value any, origin Origin) { - c.configuration.Add(key, value, origin) +func (c *client) RegisterAppConfig(key string, value any, origin Origin) { + c.configuration.Add(Configuration{key, value, origin}) } -func (c *client) AddBulkAppConfig(kvs map[string]any, origin Origin) { - for key, value := range kvs { - c.configuration.Add(key, value, origin) +func (c *client) RegisterAppConfigs(kvs ...Configuration) { + for _, value := range kvs { + c.configuration.Add(value) } } @@ -324,5 +324,3 @@ func (c *client) Close() error { c.flushTicker.Stop() return nil } - -var _ Client = (*client)(nil) diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 5eb9f45ec0..f4211c544f 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -111,7 +111,7 @@ func TestClientFlush(t *testing.T) { ExtendedHeartbeatInterval: time.Nanosecond, }, when: func(c *client) { - c.AddAppConfig("key", "value", OriginDefault) + c.RegisterAppConfig("key", "value", OriginDefault) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -148,7 +148,7 @@ func TestClientFlush(t *testing.T) { { name: "configuration-default", when: func(c *client) { - c.AddAppConfig("key", "value", OriginDefault) + c.RegisterAppConfig("key", "value", OriginDefault) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -163,7 +163,7 @@ func TestClientFlush(t *testing.T) { { name: "configuration-default", when: func(c *client) { - c.AddAppConfig("key", "value", OriginDefault) + c.RegisterAppConfig("key", "value", OriginDefault) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -340,7 +340,7 @@ func TestClientFlush(t *testing.T) { name: "app-started-with-configuration", when: func(c *client) { c.AppStart() - c.AddAppConfig("key", "value", OriginDefault) + c.RegisterAppConfig("key", "value", OriginDefault) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] diff --git a/internal/newtelemetry/configuration.go b/internal/newtelemetry/configuration.go index 1939bf055c..ecbb7e33d1 100644 --- a/internal/newtelemetry/configuration.go +++ b/internal/newtelemetry/configuration.go @@ -17,7 +17,7 @@ type configuration struct { seqID uint64 } -func (c *configuration) Add(key string, value any, origin Origin) { +func (c *configuration) Add(kv Configuration) { c.mu.Lock() defer c.mu.Unlock() @@ -25,10 +25,10 @@ func (c *configuration) Add(key string, value any, origin Origin) { c.config = make(map[string]transport.ConfKeyValue) } - c.config[key] = transport.ConfKeyValue{ - Name: key, - Value: value, - Origin: origin, + c.config[kv.Key] = transport.ConfKeyValue{ + Name: kv.Key, + Value: kv.Value, + Origin: kv.Origin, } } diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index e9275adfa6..d7412ec0b9 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -172,21 +172,21 @@ func ProductStartError(product Namespace, err error) { }) } -// AddAppConfig adds a key value pair to the app configuration and send the change to telemetry +// RegisterAppConfig adds a key value pair to the app configuration and send the change to telemetry // value has to be json serializable and the origin is the source of the change. If telemetry is disabled, it will do nothing. // If the telemetry client has not started yet, it will record the action and replay it once the client is started. -func AddAppConfig(key string, value any, origin Origin) { +func RegisterAppConfig(key string, value any, origin Origin) { globalClientCall(func(client Client) { - client.AddAppConfig(key, value, origin) + client.RegisterAppConfig(key, value, origin) }) } -// AddBulkAppConfig adds a list of key value pairs to the app configuration and sends the change to telemetry. +// RegisterAppConfigs adds a list of key value pairs to the app configuration and sends the change to telemetry. // Same as AddAppConfig but for multiple values. If telemetry is disabled, it will do nothing. // If the telemetry client has not started yet, it will record the action and replay it once the client is started. -func AddBulkAppConfig(kvs map[string]any, origin Origin) { +func RegisterAppConfigs(kvs ...Configuration) { globalClientCall(func(client Client) { - client.AddBulkAppConfig(kvs, origin) + client.RegisterAppConfigs(kvs...) }) } @@ -201,9 +201,7 @@ func globalClientCall(fun func(client Client)) { client := globalClient.Load() if client == nil || *client == nil { - if !globalClientRecorder.Record(func(client Client) { - fun(client) - }) { + if !globalClientRecorder.Record(fun) { globalClientLogLossOnce.Do(func() { log.Debug("telemetry: global client recorder queue is full, dropping telemetry data, please start the telemetry client earlier to avoid data loss") }) diff --git a/internal/newtelemetry/telemetrytest/telemetrytest.go b/internal/newtelemetry/telemetrytest/telemetrytest.go index ac6a592ca9..352262d0bb 100644 --- a/internal/newtelemetry/telemetrytest/telemetrytest.go +++ b/internal/newtelemetry/telemetrytest/telemetrytest.go @@ -22,7 +22,7 @@ type MockClient struct { mu sync.Mutex Started bool Stopped bool - Configuration map[string]any + Configuration []newtelemetry.Configuration Logs map[newtelemetry.LogLevel]string Integrations []string Products map[newtelemetry.Namespace]bool @@ -157,20 +157,18 @@ func (m *MockClient) ProductStartError(product newtelemetry.Namespace, err error m.Products[product] = false } -func (m *MockClient) AddAppConfig(key string, value any, origin newtelemetry.Origin) { +func (m *MockClient) RegisterAppConfig(key string, value any, origin newtelemetry.Origin) { m.mu.Lock() defer m.mu.Unlock() - m.On("AddAppConfig", key, value, origin) - m.Configuration[key] = value + m.On("RegisterAppConfig", key, value, origin) + m.Configuration = append(m.Configuration, newtelemetry.Configuration{Key: key, Value: value, Origin: origin}) } -func (m *MockClient) AddBulkAppConfig(kvs map[string]any, origin newtelemetry.Origin) { +func (m *MockClient) RegisterAppConfigs(kvs ...newtelemetry.Configuration) { m.mu.Lock() defer m.mu.Unlock() - m.On("AddBulkAppConfig", kvs, origin) - for k, v := range kvs { - m.Configuration[k] = v - } + m.On("RegisterAppConfigs", kvs) + m.Configuration = append(m.Configuration, kvs...) } func (m *MockClient) MarkIntegrationAsLoaded(integration newtelemetry.Integration) { From c7e492829e6bea8d25d09c826753b8e92002edfc Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 30 Jan 2025 17:46:17 +0100 Subject: [PATCH 36/61] support config sanitizing and rename some stuff to facilitate the transition from the previous telemetry client Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 34 ++++--- internal/newtelemetry/client_test.go | 40 ++++++++ internal/newtelemetry/configuration.go | 97 ++++++++++++++++++- .../telemetrytest/telemetrytest.go | 2 +- 4 files changed, 156 insertions(+), 17 deletions(-) diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index cf7eba5b3c..9404fb4ed1 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -15,26 +15,32 @@ import ( // different products used by the same application type Namespace = transport.Namespace +// Goland is having a hard time with the following const block, it keeps deleting the type of the consts +// +//goland:noinspection GoVarAndConstTypeMayBeOmitted const ( - NamespaceGeneral = transport.NamespaceGeneral - NamespaceTracers = transport.NamespaceTracers - NamespaceProfilers = transport.NamespaceProfilers - NamespaceAppSec = transport.NamespaceAppSec - NamespaceIAST = transport.NamespaceIAST - NamespaceCIVisibility = transport.NamespaceCIVisibility - NamespaceMLOps = transport.NamespaceMLOps - NamespaceRUM = transport.NamespaceRUM + NamespaceGeneral Namespace = transport.NamespaceGeneral + NamespaceTracers Namespace = transport.NamespaceTracers + NamespaceProfilers Namespace = transport.NamespaceProfilers + NamespaceAppSec Namespace = transport.NamespaceAppSec + NamespaceIAST Namespace = transport.NamespaceIAST + NamespaceCIVisibility Namespace = transport.NamespaceCIVisibility + NamespaceMLOps Namespace = transport.NamespaceMLOps + NamespaceRUM Namespace = transport.NamespaceRUM ) // Origin describes the source of a configuration change type Origin = transport.Origin +// Goland is having a hard time with the following const block, it keeps deleting the type of the consts +// +//goland:noinspection GoVarAndConstTypeMayBeOmitted const ( - OriginDefault = transport.OriginDefault - OriginCode = transport.OriginCode - OriginDDConfig = transport.OriginDDConfig - OriginEnvVar = transport.OriginEnvVar - OriginRemoteConfig = transport.OriginRemoteConfig + OriginDefault Origin = transport.OriginDefault + OriginCode Origin = transport.OriginCode + OriginDDConfig Origin = transport.OriginDDConfig + OriginEnvVar Origin = transport.OriginEnvVar + OriginRemoteConfig Origin = transport.OriginRemoteConfig ) // MetricHandle can be used to submit different values for the same metric. @@ -65,7 +71,7 @@ type Integration struct { // Configuration is a key-value pair that is used to configure the application. type Configuration struct { // Key is the key of the configuration. - Key string + Name string // Value is the value of the configuration. Need to be json serializable. Value any // Origin is the source of the configuration change. diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index f4211c544f..4fd046558e 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -175,6 +175,46 @@ func TestClientFlush(t *testing.T) { assert.Equal(t, config.Configuration[0].Origin, OriginDefault) }, }, + { + name: "configuration-complex-values", + when: func(c *client) { + c.RegisterAppConfigs( + Configuration{Name: "key1", Value: []string{"value1", "value2"}, Origin: OriginDefault}, + Configuration{Name: "key2", Value: map[string]string{"key": "value", "key2": "value2"}, Origin: OriginCode}, + Configuration{Name: "key3", Value: []int{1, 2, 3}, Origin: OriginDDConfig}, + Configuration{Name: "key4", Value: struct { + A string + }{A: "1"}, Origin: OriginEnvVar}, + Configuration{Name: "key5", Value: map[int]struct{ X int }{1: {X: 1}}, Origin: OriginRemoteConfig}, + ) + }, + expect: func(t *testing.T, payloads []transport.Payload) { + payload := payloads[0] + require.IsType(t, transport.AppClientConfigurationChange{}, payload) + config := payload.(transport.AppClientConfigurationChange) + + slices.SortStableFunc(config.Configuration, func(a, b transport.ConfKeyValue) int { + return strings.Compare(a.Name, b.Name) + }) + + assert.Len(t, config.Configuration, 5) + assert.Equal(t, "key1", config.Configuration[0].Name) + assert.Equal(t, "value1,value2", config.Configuration[0].Value) + assert.Equal(t, OriginDefault, config.Configuration[0].Origin) + assert.Equal(t, "key2", config.Configuration[1].Name) + assert.Equal(t, "key:value,key2:value2", config.Configuration[1].Value) + assert.Equal(t, OriginCode, config.Configuration[1].Origin) + assert.Equal(t, "key3", config.Configuration[2].Name) + assert.Equal(t, "1,2,3", config.Configuration[2].Value) + assert.Equal(t, OriginDDConfig, config.Configuration[2].Origin) + assert.Equal(t, "key4", config.Configuration[3].Name) + assert.Equal(t, "{1}", config.Configuration[3].Value) + assert.Equal(t, OriginEnvVar, config.Configuration[3].Origin) + assert.Equal(t, "key5", config.Configuration[4].Name) + assert.Equal(t, "1:{1}", config.Configuration[4].Value) + assert.Equal(t, OriginRemoteConfig, config.Configuration[4].Origin) + }, + }, { name: "product-start", when: func(c *client) { diff --git a/internal/newtelemetry/configuration.go b/internal/newtelemetry/configuration.go index ecbb7e33d1..bdbd659ae9 100644 --- a/internal/newtelemetry/configuration.go +++ b/internal/newtelemetry/configuration.go @@ -6,6 +6,12 @@ package newtelemetry import ( + "encoding/json" + "fmt" + "math" + "reflect" + "slices" + "strings" "sync" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" @@ -25,8 +31,8 @@ func (c *configuration) Add(kv Configuration) { c.config = make(map[string]transport.ConfKeyValue) } - c.config[kv.Key] = transport.ConfKeyValue{ - Name: kv.Key, + c.config[kv.Name] = transport.ConfKeyValue{ + Name: kv.Name, Value: kv.Value, Origin: kv.Origin, } @@ -42,6 +48,10 @@ func (c *configuration) Payload() transport.Payload { configs := make([]transport.ConfKeyValue, len(c.config)) idx := 0 for _, conf := range c.config { + if conf.Origin == "" { + conf.Origin = transport.OriginDefault + } + conf.Value = sanitizeConfigValue(conf.Value) conf.SeqID = c.seqID configs[idx] = conf idx++ @@ -52,3 +62,86 @@ func (c *configuration) Payload() transport.Payload { Configuration: configs, } } + +// sanitizeConfigValue sanitizes the value of a configuration key to ensure it can be marshalled. +func sanitizeConfigValue(value any) any { + if value == nil { + return "" + } + + // Skip reflection for basic types + switch val := value.(type) { + case string, bool, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64: + return val + case float32: + if math.IsNaN(float64(val)) || math.IsInf(float64(val), 0) { + return "" + } + return val + case float64: + // https://github.com/golang/go/issues/59627 + if math.IsNaN(val) || math.IsInf(val, 0) { + return "" + } + return val + } + + valueOf := reflect.ValueOf(value) + + // Unwrap pointers and interfaces up to 10 levels deep. + for i := 0; i < 10; i++ { + if valueOf.Kind() == reflect.Ptr || valueOf.Kind() == reflect.Interface { + valueOf = valueOf.Elem() + } else { + break + } + } + + switch { + case valueOf.Kind() == reflect.Slice, valueOf.Kind() == reflect.Array: + var sb strings.Builder + for i := 0; i < valueOf.Len(); i++ { + if i > 0 { + sb.WriteString(",") + } + sb.WriteString(fmt.Sprintf("%v", valueOf.Index(i))) + } + return sb.String() + case valueOf.Kind() == reflect.Map: + kvPair := make([]struct { + key string + value string + }, valueOf.Len()) + + iter := valueOf.MapRange() + for i := 0; iter.Next(); i++ { + kvPair[i].key = fmt.Sprintf("%v", iter.Key().Interface()) + kvPair[i].value = fmt.Sprintf("%v", iter.Value().Interface()) + } + + slices.SortStableFunc(kvPair, func(a, b struct { + key string + value string + }) int { + return strings.Compare(a.key, b.key) + }) + + var sb strings.Builder + for _, k := range kvPair { + if sb.Len() > 0 { + sb.WriteString(",") + } + sb.WriteString(k.key) + sb.WriteString(":") + sb.WriteString(k.value) + } + + return sb.String() + } + + if _, ok := value.(json.Marshaler); ok { + return value + } + + return fmt.Sprintf("%v", value) +} diff --git a/internal/newtelemetry/telemetrytest/telemetrytest.go b/internal/newtelemetry/telemetrytest/telemetrytest.go index 352262d0bb..3df04eca4e 100644 --- a/internal/newtelemetry/telemetrytest/telemetrytest.go +++ b/internal/newtelemetry/telemetrytest/telemetrytest.go @@ -161,7 +161,7 @@ func (m *MockClient) RegisterAppConfig(key string, value any, origin newtelemetr m.mu.Lock() defer m.mu.Unlock() m.On("RegisterAppConfig", key, value, origin) - m.Configuration = append(m.Configuration, newtelemetry.Configuration{Key: key, Value: value, Origin: origin}) + m.Configuration = append(m.Configuration, newtelemetry.Configuration{Name: key, Value: value, Origin: origin}) } func (m *MockClient) RegisterAppConfigs(kvs ...newtelemetry.Configuration) { From 6ba3e843082100ffae5a4ffeb97647757689b9e6 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 30 Jan 2025 18:46:46 +0100 Subject: [PATCH 37/61] switch tags from map[string]string to []string Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 12 +++++--- internal/newtelemetry/client.go | 18 ++++++------ internal/newtelemetry/client_test.go | 28 +++++++++---------- internal/newtelemetry/configuration.go | 6 ++-- internal/newtelemetry/distributions.go | 9 ++---- internal/newtelemetry/globalclient.go | 8 +++--- internal/newtelemetry/logger.go | 8 ++---- internal/newtelemetry/metrics.go | 9 ++---- .../telemetrytest/telemetrytest.go | 28 +++++++------------ 9 files changed, 54 insertions(+), 72 deletions(-) diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index 9404fb4ed1..56a2b7c72d 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -85,16 +85,20 @@ type Client interface { io.Closer // Count creates a new metric handle for the given parameters that can be used to submit values. - Count(namespace Namespace, name string, tags map[string]string) MetricHandle + // Tags cannot contain commas. + Count(namespace Namespace, name string, tags []string) MetricHandle // Rate creates a new metric handle for the given parameters that can be used to submit values. - Rate(namespace Namespace, name string, tags map[string]string) MetricHandle + // Tags cannot contain commas. + Rate(namespace Namespace, name string, tags []string) MetricHandle // Gauge creates a new metric handle for the given parameters that can be used to submit values. - Gauge(namespace Namespace, name string, tags map[string]string) MetricHandle + // Tags cannot contain commas. + Gauge(namespace Namespace, name string, tags []string) MetricHandle // Distribution creates a new metric handle for the given parameters that can be used to submit values. - Distribution(namespace Namespace, name string, tags map[string]string) MetricHandle + // Tags cannot contain commas. + Distribution(namespace Namespace, name string, tags []string) MetricHandle // Log sends a telemetry log at the desired level with the given text and options. // Options include sending key-value pairs as tags, and a stack trace frozen from inside the Log function. diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 2452fc94bb..ca2abd6110 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -138,28 +138,28 @@ func (noopMetricHandle) Get() float64 { return 0 } -func (c *client) Count(namespace Namespace, name string, tags map[string]string) MetricHandle { +func (c *client) Count(namespace Namespace, name string, tags []string) MetricHandle { if !c.clientConfig.MetricsEnabled { return noopMetricHandle{} } return c.metrics.LoadOrStore(namespace, transport.CountMetric, name, tags) } -func (c *client) Rate(namespace Namespace, name string, tags map[string]string) MetricHandle { +func (c *client) Rate(namespace Namespace, name string, tags []string) MetricHandle { if !c.clientConfig.MetricsEnabled { return noopMetricHandle{} } return c.metrics.LoadOrStore(namespace, transport.RateMetric, name, tags) } -func (c *client) Gauge(namespace Namespace, name string, tags map[string]string) MetricHandle { +func (c *client) Gauge(namespace Namespace, name string, tags []string) MetricHandle { if !c.clientConfig.MetricsEnabled { return noopMetricHandle{} } return c.metrics.LoadOrStore(namespace, transport.GaugeMetric, name, tags) } -func (c *client) Distribution(namespace Namespace, name string, tags map[string]string) MetricHandle { +func (c *client) Distribution(namespace Namespace, name string, tags []string) MetricHandle { if !c.clientConfig.MetricsEnabled { return noopMetricHandle{} } @@ -280,9 +280,9 @@ func (c *client) computeFlushMetrics(results []internal.EndpointRequestResult, e } for i, result := range results { - c.Count(transport.NamespaceTelemetry, "telemetry_api.requests", map[string]string{"endpoint": indexToEndpoint(i)}).Submit(1) + c.Count(transport.NamespaceTelemetry, "telemetry_api.requests", []string{"endpoint:" + indexToEndpoint(i)}).Submit(1) if result.StatusCode != 0 { - c.Count(transport.NamespaceTelemetry, "telemetry_api.responses", map[string]string{"endpoint": indexToEndpoint(i), "status_code": strconv.Itoa(result.StatusCode)}).Submit(1) + c.Count(transport.NamespaceTelemetry, "telemetry_api.responses", []string{"endpoint:" + indexToEndpoint(i), "status_code:" + strconv.Itoa(result.StatusCode)}).Submit(1) } if result.Error != nil { @@ -294,7 +294,7 @@ func (c *client) computeFlushMetrics(results []internal.EndpointRequestResult, e if errors.As(result.Error, &writerStatusCodeError) { typ = "status_code" } - c.Count(transport.NamespaceTelemetry, "telemetry_api.errors", map[string]string{"endpoint": indexToEndpoint(i), "type": typ}).Submit(1) + c.Count(transport.NamespaceTelemetry, "telemetry_api.errors", []string{"endpoint:" + indexToEndpoint(i), "type:" + typ}).Submit(1) } } @@ -304,8 +304,8 @@ func (c *client) computeFlushMetrics(results []internal.EndpointRequestResult, e successfulCall := results[len(results)-1] endpoint := indexToEndpoint(len(results) - 1) - c.Distribution(transport.NamespaceTelemetry, "telemetry_api.bytes", map[string]string{"endpoint": endpoint}).Submit(float64(successfulCall.PayloadByteSize)) - c.Distribution(transport.NamespaceTelemetry, "telemetry_api.ms", map[string]string{"endpoint": endpoint}).Submit(float64(successfulCall.CallDuration.Milliseconds())) + c.Distribution(transport.NamespaceTelemetry, "telemetry_api.bytes", []string{"endpoint:" + endpoint}).Submit(float64(successfulCall.PayloadByteSize)) + c.Distribution(transport.NamespaceTelemetry, "telemetry_api.ms", []string{"endpoint:" + endpoint}).Submit(float64(successfulCall.CallDuration.Milliseconds())) } func (c *client) AppStart() { diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 4fd046558e..59fc13dfe8 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -570,7 +570,7 @@ func TestClientFlush(t *testing.T) { { name: "single-log-with-tag", when: func(c *client) { - c.Log(LogError, "test", WithTags(map[string]string{"key": "value"})) + c.Log(LogError, "test", WithTags([]string{"key:value"})) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -585,7 +585,7 @@ func TestClientFlush(t *testing.T) { { name: "single-log-with-tags", when: func(c *client) { - c.Log(LogError, "test", WithTags(map[string]string{"key": "value", "key2": "value2"})) + c.Log(LogError, "test", WithTags([]string{"key:value", "key2:value2"})) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -602,7 +602,7 @@ func TestClientFlush(t *testing.T) { { name: "single-log-with-tags-and-without", when: func(c *client) { - c.Log(LogError, "test", WithTags(map[string]string{"key": "value", "key2": "value2"})) + c.Log(LogError, "test", WithTags([]string{"key:value", "key2:value2"})) c.Log(LogError, "test") }, expect: func(t *testing.T, payloads []transport.Payload) { @@ -646,7 +646,7 @@ func TestClientFlush(t *testing.T) { { name: "single-log-with-stacktrace-and-tags", when: func(c *client) { - c.Log(LogError, "test", WithStacktrace(), WithTags(map[string]string{"key": "value", "key2": "value2"})) + c.Log(LogError, "test", WithStacktrace(), WithTags([]string{"key:value", "key2:value2"})) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -762,8 +762,8 @@ func TestClientFlush(t *testing.T) { { name: "multiple-count-by-tags", when: func(c *client) { - c.Count(NamespaceTracers, "init_time", map[string]string{"test": "1"}).Submit(1) - c.Count(NamespaceTracers, "init_time", map[string]string{"test": "2"}).Submit(2) + c.Count(NamespaceTracers, "init_time", []string{"test:1"}).Submit(1) + c.Count(NamespaceTracers, "init_time", []string{"test:2"}).Submit(2) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -832,8 +832,8 @@ func TestClientFlush(t *testing.T) { { name: "multiple-gauge-by-tags", when: func(c *client) { - c.Gauge(NamespaceTracers, "init_time", map[string]string{"test": "1"}).Submit(1) - c.Gauge(NamespaceTracers, "init_time", map[string]string{"test": "2"}).Submit(2) + c.Gauge(NamespaceTracers, "init_time", []string{"test:1"}).Submit(1) + c.Gauge(NamespaceTracers, "init_time", []string{"test:2"}).Submit(2) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -914,8 +914,8 @@ func TestClientFlush(t *testing.T) { { name: "multiple-distribution-by-tags", when: func(c *client) { - c.Distribution(NamespaceTracers, "init_time", map[string]string{"test": "1"}).Submit(1) - c.Distribution(NamespaceTracers, "init_time", map[string]string{"test": "2"}).Submit(2) + c.Distribution(NamespaceTracers, "init_time", []string{"test:1"}).Submit(1) + c.Distribution(NamespaceTracers, "init_time", []string{"test:2"}).Submit(2) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] @@ -1230,7 +1230,7 @@ func BenchmarkLogs(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - c.Log(LogWarn, "this is supposed to be a WARN log of representative length", WithTags(map[string]string{"key": strconv.Itoa(i % 10)})) + c.Log(LogWarn, "this is supposed to be a WARN log of representative length", WithTags([]string{"key:" + strconv.Itoa(i%10)})) } }) @@ -1289,7 +1289,7 @@ func BenchmarkWorstCaseScenarioFloodLogging(b *testing.B) { defer wg.Done() for j := 0; j < nbDifferentLogs; j++ { for k := 0; k < nbSameLogs; k++ { - c.Log(LogDebug, "this is supposed to be a DEBUG log of representative length"+strconv.Itoa(i), WithTags(map[string]string{"key": strconv.Itoa(j)})) + c.Log(LogDebug, "this is supposed to be a DEBUG log of representative length"+strconv.Itoa(i), WithTags([]string{"key:" + strconv.Itoa(j)})) } } }() @@ -1465,7 +1465,7 @@ func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { defer wg.Done() for j := 0; j < nbDifferentMetrics; j++ { for k := 0; k < nbSameMetric; k++ { - c.Count(NamespaceTracers, "init_time", map[string]string{"test": "1"}).Submit(1) + c.Count(NamespaceTracers, "init_time", []string{"test:1"}).Submit(1) } } }() @@ -1488,7 +1488,7 @@ func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { for x := 0; x < b.N; x++ { var wg sync.WaitGroup - handle := c.Count(NamespaceTracers, "init_time", map[string]string{"test": "1"}) + handle := c.Count(NamespaceTracers, "init_time", []string{"test:1"}) for i := 0; i < nbGoroutines; i++ { wg.Add(1) go func() { diff --git a/internal/newtelemetry/configuration.go b/internal/newtelemetry/configuration.go index bdbd659ae9..3cb531744f 100644 --- a/internal/newtelemetry/configuration.go +++ b/internal/newtelemetry/configuration.go @@ -51,7 +51,7 @@ func (c *configuration) Payload() transport.Payload { if conf.Origin == "" { conf.Origin = transport.OriginDefault } - conf.Value = sanitizeConfigValue(conf.Value) + conf.Value = SanitizeConfigValue(conf.Value) conf.SeqID = c.seqID configs[idx] = conf idx++ @@ -63,8 +63,8 @@ func (c *configuration) Payload() transport.Payload { } } -// sanitizeConfigValue sanitizes the value of a configuration key to ensure it can be marshalled. -func sanitizeConfigValue(value any) any { +// SanitizeConfigValue sanitizes the value of a configuration key to ensure it can be marshalled. +func SanitizeConfigValue(value any) any { if value == nil { return "" } diff --git a/internal/newtelemetry/distributions.go b/internal/newtelemetry/distributions.go index 1c1d7a5a6e..4397229142 100644 --- a/internal/newtelemetry/distributions.go +++ b/internal/newtelemetry/distributions.go @@ -29,18 +29,13 @@ type distributions struct { } // LoadOrStore returns a MetricHandle for the given distribution metric. If the metric key does not exist, it will be created. -func (d *distributions) LoadOrStore(namespace Namespace, name string, tags map[string]string) MetricHandle { +func (d *distributions) LoadOrStore(namespace Namespace, name string, tags []string) MetricHandle { if !knownmetrics.IsKnownMetric(namespace, "distribution", name) { log.Debug("telemetry: metric name %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ "The metric will still be sent.", name) } - compiledTags := "" - for k, v := range tags { - compiledTags += k + ":" + v + "," - } - - key := distributionKey{namespace: namespace, name: name, tags: strings.TrimSuffix(compiledTags, ",")} + key := distributionKey{namespace: namespace, name: name, tags: strings.Join(tags, ",")} handle, _ := d.store.LoadOrStore(key, &distribution{key: key, values: internal.NewRingQueue[float64](d.queueSize.Min, d.queueSize.Max)}) diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index d7412ec0b9..cee5435e2e 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -90,7 +90,7 @@ func Disabled() bool { // Count creates a new metric handle for the given parameters that can be used to submit values. // Count will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. -func Count(namespace Namespace, name string, tags map[string]string) MetricHandle { +func Count(namespace Namespace, name string, tags []string) MetricHandle { return globalClientNewMetric(func(client Client) MetricHandle { return client.Count(namespace, name, tags) }) @@ -99,7 +99,7 @@ func Count(namespace Namespace, name string, tags map[string]string) MetricHandl // Rate creates a new metric handle for the given parameters that can be used to submit values. // Rate will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. -func Rate(namespace Namespace, name string, tags map[string]string) MetricHandle { +func Rate(namespace Namespace, name string, tags []string) MetricHandle { return globalClientNewMetric(func(client Client) MetricHandle { return client.Rate(namespace, name, tags) }) @@ -108,7 +108,7 @@ func Rate(namespace Namespace, name string, tags map[string]string) MetricHandle // Gauge creates a new metric handle for the given parameters that can be used to submit values. // Gauge will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. -func Gauge(namespace Namespace, name string, tags map[string]string) MetricHandle { +func Gauge(namespace Namespace, name string, tags []string) MetricHandle { return globalClientNewMetric(func(client Client) MetricHandle { return client.Gauge(namespace, name, tags) }) @@ -117,7 +117,7 @@ func Gauge(namespace Namespace, name string, tags map[string]string) MetricHandl // Distribution creates a new metric handle for the given parameters that can be used to submit values. // Distribution will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. -func Distribution(namespace Namespace, name string, tags map[string]string) MetricHandle { +func Distribution(namespace Namespace, name string, tags []string) MetricHandle { return globalClientNewMetric(func(client Client) MetricHandle { return client.Distribution(namespace, name, tags) }) diff --git a/internal/newtelemetry/logger.go b/internal/newtelemetry/logger.go index 7a13a50eed..79e0eef41e 100644 --- a/internal/newtelemetry/logger.go +++ b/internal/newtelemetry/logger.go @@ -19,12 +19,8 @@ type LogOption func(key *loggerKey, value *loggerValue) // WithTags returns a LogOption that sets the tags for the telemetry log message. Tags are key-value pairs that are then // serialized into a simple "key:value,key2:value2" format. No quoting or escaping is performed. -func WithTags(tags map[string]string) LogOption { - compiledTags := "" - for k, v := range tags { - compiledTags += k + ":" + v + "," - } - compiledTags = strings.TrimSuffix(compiledTags, ",") +func WithTags(tags []string) LogOption { + compiledTags := strings.Join(tags, ",") return func(key *loggerKey, _ *loggerValue) { if key == nil { return diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go index 364ed900bd..37d453cd7f 100644 --- a/internal/newtelemetry/metrics.go +++ b/internal/newtelemetry/metrics.go @@ -37,19 +37,14 @@ type metrics struct { } // LoadOrStore returns a MetricHandle for the given metric key. If the metric key does not exist, it will be created. -func (m *metrics) LoadOrStore(namespace Namespace, kind transport.MetricType, name string, tags map[string]string) MetricHandle { +func (m *metrics) LoadOrStore(namespace Namespace, kind transport.MetricType, name string, tags []string) MetricHandle { if !knownmetrics.IsKnownMetric(namespace, string(kind), name) { log.Debug("telemetry: metric name %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ "The metric will still be sent.", name) } - compiledTags := "" - for k, v := range tags { - compiledTags += k + ":" + v + "," - } - var ( - key = metricKey{namespace: namespace, kind: kind, name: name, tags: strings.TrimSuffix(compiledTags, ",")} + key = metricKey{namespace: namespace, kind: kind, name: name, tags: strings.Join(tags, ",")} handle MetricHandle ) switch kind { diff --git a/internal/newtelemetry/telemetrytest/telemetrytest.go b/internal/newtelemetry/telemetrytest/telemetrytest.go index 3df04eca4e..2a0b5b5dbe 100644 --- a/internal/newtelemetry/telemetrytest/telemetrytest.go +++ b/internal/newtelemetry/telemetrytest/telemetrytest.go @@ -3,8 +3,8 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2022 Datadog, Inc. -// Package newtelemetrytest provides a mock implementation of the newtelemetry client for testing purposes -package newtelemetrytest +// Package telemetrytest provides a mock implementation of the newtelemetry client for testing purposes +package telemetrytest import ( "strings" @@ -40,14 +40,6 @@ func (m *MockClient) Close() error { return nil } -func tagsString(tags map[string]string) string { - compiledTags := "" - for k, v := range tags { - compiledTags += k + ":" + v + "," - } - return strings.TrimSuffix(compiledTags, ",") -} - type MockMetricHandle struct { mock.Mock mu sync.Mutex @@ -69,11 +61,11 @@ func (m *MockMetricHandle) Get() float64 { return *m.value } -func (m *MockClient) Count(namespace newtelemetry.Namespace, name string, tags map[string]string) newtelemetry.MetricHandle { +func (m *MockClient) Count(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { m.mu.Lock() defer m.mu.Unlock() m.On("Count", namespace, name, tags) - key := metricKey{Namespace: namespace, Name: name, Tags: tagsString(tags), Kind: "count"} + key := metricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: "count"} if _, ok := m.Metrics[key]; !ok { init := 0.0 m.Metrics[key] = &init @@ -84,11 +76,11 @@ func (m *MockClient) Count(namespace newtelemetry.Namespace, name string, tags m }} } -func (m *MockClient) Rate(namespace newtelemetry.Namespace, name string, tags map[string]string) newtelemetry.MetricHandle { +func (m *MockClient) Rate(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { m.mu.Lock() defer m.mu.Unlock() m.On("Rate", namespace, name, tags) - key := metricKey{Namespace: namespace, Name: name, Tags: tagsString(tags), Kind: "rate"} + key := metricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: "rate"} if _, ok := m.Metrics[key]; !ok { init := 0.0 m.Metrics[key] = &init @@ -99,11 +91,11 @@ func (m *MockClient) Rate(namespace newtelemetry.Namespace, name string, tags ma }} } -func (m *MockClient) Gauge(namespace newtelemetry.Namespace, name string, tags map[string]string) newtelemetry.MetricHandle { +func (m *MockClient) Gauge(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { m.mu.Lock() defer m.mu.Unlock() m.On("Gauge", namespace, name, tags) - key := metricKey{Namespace: namespace, Name: name, Tags: tagsString(tags), Kind: "gauge"} + key := metricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: "gauge"} if _, ok := m.Metrics[key]; !ok { init := 0.0 m.Metrics[key] = &init @@ -114,11 +106,11 @@ func (m *MockClient) Gauge(namespace newtelemetry.Namespace, name string, tags m }} } -func (m *MockClient) Distribution(namespace newtelemetry.Namespace, name string, tags map[string]string) newtelemetry.MetricHandle { +func (m *MockClient) Distribution(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { m.mu.Lock() defer m.mu.Unlock() m.On("Distribution", namespace, name, tags) - key := metricKey{Namespace: namespace, Name: name, Tags: tagsString(tags), Kind: "distribution"} + key := metricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: "distribution"} if _, ok := m.Metrics[key]; !ok { init := 0.0 m.Metrics[key] = &init From 9cf11026a9bc36b5552b26cb2386bf752060b348 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 30 Jan 2025 20:58:52 +0100 Subject: [PATCH 38/61] fix misc Signed-off-by: Eliott Bouhana --- internal/newtelemetry/globalclient.go | 19 ++++++++++ .../telemetrytest/telemetrytest.go | 38 ++++++++++--------- 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index cee5435e2e..6f3c2734cc 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -190,6 +190,24 @@ func RegisterAppConfigs(kvs ...Configuration) { }) } +// MarkIntegrationAsLoaded marks an integration as loaded in the telemetry. If telemetry is disabled +// or the client has not started yet it will record the action and replay it once the client is started. +func MarkIntegrationAsLoaded(integration Integration) { + globalClientCall(func(client Client) { + client.MarkIntegrationAsLoaded(integration) + }) +} + +// LoadIntegration marks an integration as loaded in the telemetry client. If telemetry is disabled, it will do nothing. +// If the telemetry client has not started yet, it will record the action and replay it once the client is started. +func LoadIntegration(integration string) { + globalClientCall(func(client Client) { + client.MarkIntegrationAsLoaded(Integration{ + Name: integration, + }) + }) +} + var globalClientLogLossOnce sync.Once // globalClientCall takes a function that takes a Client and calls it with the global client if it exists. @@ -243,6 +261,7 @@ func (t *metricsHotPointer) Submit(value float64) { log.Debug("telemetry: metric is losing values because the telemetry client has not been started yet, dropping telemetry data, please start the telemetry client earlier to avoid data loss") }) } + return } (*inner).Submit(value) diff --git a/internal/newtelemetry/telemetrytest/telemetrytest.go b/internal/newtelemetry/telemetrytest/telemetrytest.go index 2a0b5b5dbe..e003ebebc8 100644 --- a/internal/newtelemetry/telemetrytest/telemetrytest.go +++ b/internal/newtelemetry/telemetrytest/telemetrytest.go @@ -37,7 +37,8 @@ type metricKey struct { } func (m *MockClient) Close() error { - return nil + m.On("Close").Return() + return m.Called().Error(0) } type MockMetricHandle struct { @@ -48,14 +49,14 @@ type MockMetricHandle struct { } func (m *MockMetricHandle) Submit(value float64) { - m.On("Submit", value) + m.On("Submit", value).Return() m.mu.Lock() defer m.mu.Unlock() m.submit(m.value, value) } func (m *MockMetricHandle) Get() float64 { - m.On("Get") + m.On("Get").Return() m.mu.Lock() defer m.mu.Unlock() return *m.value @@ -64,7 +65,7 @@ func (m *MockMetricHandle) Get() float64 { func (m *MockClient) Count(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { m.mu.Lock() defer m.mu.Unlock() - m.On("Count", namespace, name, tags) + m.On("Count", namespace, name, tags).Return() key := metricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: "count"} if _, ok := m.Metrics[key]; !ok { init := 0.0 @@ -79,7 +80,7 @@ func (m *MockClient) Count(namespace newtelemetry.Namespace, name string, tags [ func (m *MockClient) Rate(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { m.mu.Lock() defer m.mu.Unlock() - m.On("Rate", namespace, name, tags) + m.On("Rate", namespace, name, tags).Return() key := metricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: "rate"} if _, ok := m.Metrics[key]; !ok { init := 0.0 @@ -94,7 +95,7 @@ func (m *MockClient) Rate(namespace newtelemetry.Namespace, name string, tags [] func (m *MockClient) Gauge(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { m.mu.Lock() defer m.mu.Unlock() - m.On("Gauge", namespace, name, tags) + m.On("Gauge", namespace, name, tags).Return() key := metricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: "gauge"} if _, ok := m.Metrics[key]; !ok { init := 0.0 @@ -109,7 +110,7 @@ func (m *MockClient) Gauge(namespace newtelemetry.Namespace, name string, tags [ func (m *MockClient) Distribution(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { m.mu.Lock() defer m.mu.Unlock() - m.On("Distribution", namespace, name, tags) + m.On("Distribution", namespace, name, tags).Return() key := metricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: "distribution"} if _, ok := m.Metrics[key]; !ok { init := 0.0 @@ -124,67 +125,70 @@ func (m *MockClient) Distribution(namespace newtelemetry.Namespace, name string, func (m *MockClient) Log(level newtelemetry.LogLevel, text string, options ...newtelemetry.LogOption) { m.mu.Lock() defer m.mu.Unlock() - m.On("Log", level, text, options) + m.On("Log", level, text, options).Return() m.Logs[level] = text } func (m *MockClient) ProductStarted(product newtelemetry.Namespace) { m.mu.Lock() defer m.mu.Unlock() - m.On("ProductStarted", product) + m.On("ProductStarted", product).Return() m.Products[product] = true } func (m *MockClient) ProductStopped(product newtelemetry.Namespace) { m.mu.Lock() defer m.mu.Unlock() - m.On("ProductStopped", product) + m.On("ProductStopped", product).Return() m.Products[product] = false } func (m *MockClient) ProductStartError(product newtelemetry.Namespace, err error) { m.mu.Lock() defer m.mu.Unlock() - m.On("ProductStartError", product, err) + m.On("ProductStartError", product, err).Return() m.Products[product] = false } func (m *MockClient) RegisterAppConfig(key string, value any, origin newtelemetry.Origin) { m.mu.Lock() defer m.mu.Unlock() - m.On("RegisterAppConfig", key, value, origin) + m.On("RegisterAppConfig", key, value, origin).Return() + m.Called(key, value, origin) m.Configuration = append(m.Configuration, newtelemetry.Configuration{Name: key, Value: value, Origin: origin}) } func (m *MockClient) RegisterAppConfigs(kvs ...newtelemetry.Configuration) { m.mu.Lock() defer m.mu.Unlock() - m.On("RegisterAppConfigs", kvs) + m.On("RegisterAppConfigs", kvs).Return() + m.Called(kvs) m.Configuration = append(m.Configuration, kvs...) } func (m *MockClient) MarkIntegrationAsLoaded(integration newtelemetry.Integration) { m.mu.Lock() defer m.mu.Unlock() - m.On("MarkIntegrationAsLoaded", integration) + m.On("MarkIntegrationAsLoaded", integration).Return() + m.Called(integration) m.Integrations = append(m.Integrations, integration.Name) } func (m *MockClient) Flush() { - m.On("Flush") + m.On("Flush").Return() } func (m *MockClient) AppStart() { m.mu.Lock() defer m.mu.Unlock() - m.On("AppStart") + m.On("AppStart").Return() m.Started = true } func (m *MockClient) AppStop() { m.mu.Lock() defer m.mu.Unlock() - m.On("AppStop") + m.On("AppStop").Return() m.Stopped = true } From cfe990a29e8dd8826b107d73c559c76dd5f09d3b Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Fri, 31 Jan 2025 15:21:01 +0100 Subject: [PATCH 39/61] rework telemetrytest to divide the mock client and the recorder client Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 2 +- internal/newtelemetry/globalclient.go | 12 +- internal/newtelemetry/telemetrytest/mock.go | 93 ++++++++ internal/newtelemetry/telemetrytest/record.go | 198 ++++++++++++++++++ .../telemetrytest/telemetrytest.go | 195 ----------------- 5 files changed, 299 insertions(+), 201 deletions(-) create mode 100644 internal/newtelemetry/telemetrytest/mock.go create mode 100644 internal/newtelemetry/telemetrytest/record.go delete mode 100644 internal/newtelemetry/telemetrytest/telemetrytest.go diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index 56a2b7c72d..2ea5677334 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -48,7 +48,7 @@ const ( // This can also be used ephemerally to submit a single metric value like this: // // ```go -// telemetry.Metric(telemetry.Appsec, "my-count", map[string]string{"tag1": "true", "tag2": "1.0"}).Submit(1.0) +// telemetry.metric(telemetry.Appsec, "my-count", map[string]string{"tag1": "true", "tag2": "1.0"}).Submit(1.0) // ``` type MetricHandle interface { // Submit submits a value to the metric handle. diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 6f3c2734cc..5dd16f01b2 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -36,7 +36,7 @@ func GlobalClient() Client { // StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global (*client). func StartApp(client Client) { - if Disabled() || globalClient.Load() != nil { + if Disabled() || GlobalClient() != nil { return } @@ -55,8 +55,10 @@ func SwapClient(client Client) { if oldClient := globalClient.Swap(&client); oldClient != nil && *oldClient != nil { (*oldClient).Close() + } - // Swap all metrics hot pointers to the actual MetricHandle + if client != nil { + // Swap all metrics hot pointers to the new MetricHandle metricsHandleHotPointersMu.Lock() defer metricsHandleHotPointersMu.Unlock() for i := range metricsHandleHotPointers { @@ -68,7 +70,7 @@ func SwapClient(client Client) { // StopApp creates the app-stopped telemetry, adding to the queue and Flush all the queue before stopping the (*client). func StopApp() { - if Disabled() || globalClient.Load() == nil { + if Disabled() || GlobalClient() == nil { return } @@ -79,12 +81,12 @@ func StopApp() { } } -var telemetryClientDisabled = globalinternal.BoolEnv("DD_INSTRUMENTATION_TELEMETRY_ENABLED", true) +var telemetryClientEnabled = globalinternal.BoolEnv("DD_INSTRUMENTATION_TELEMETRY_ENABLED", true) // Disabled returns whether instrumentation telemetry is disabled // according to the DD_INSTRUMENTATION_TELEMETRY_ENABLED env var func Disabled() bool { - return telemetryClientDisabled + return !telemetryClientEnabled } // Count creates a new metric handle for the given parameters that can be used to submit values. diff --git a/internal/newtelemetry/telemetrytest/mock.go b/internal/newtelemetry/telemetrytest/mock.go new file mode 100644 index 0000000000..fdf9855084 --- /dev/null +++ b/internal/newtelemetry/telemetrytest/mock.go @@ -0,0 +1,93 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +// Package telemetrytest provides a mock implementation of the telemetry client for testing purposes +package telemetrytest + +import ( + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry" + + "github.com/stretchr/testify/mock" +) + +// MockClient implements Client and is used for testing purposes outside the telemetry package, +// e.g. the tracer and profiler. +type MockClient struct { + mock.Mock +} + +func (m *MockClient) Close() error { + return nil +} + +type MockMetricHandle struct { + mock.Mock +} + +func (m *MockMetricHandle) Submit(value float64) { + m.Called(value) +} + +func (m *MockMetricHandle) Get() float64 { + return m.Called().Get(0).(float64) +} + +func (m *MockClient) Count(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return m.Called(namespace, name, tags).Get(0).(newtelemetry.MetricHandle) +} + +func (m *MockClient) Rate(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return m.Called(namespace, name, tags).Get(0).(newtelemetry.MetricHandle) +} + +func (m *MockClient) Gauge(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return m.Called(namespace, name, tags).Get(0).(newtelemetry.MetricHandle) +} + +func (m *MockClient) Distribution(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return m.Called(namespace, name, tags).Get(0).(newtelemetry.MetricHandle) +} + +func (m *MockClient) Log(level newtelemetry.LogLevel, text string, options ...newtelemetry.LogOption) { + m.Called(level, text, options) +} + +func (m *MockClient) ProductStarted(product newtelemetry.Namespace) { + m.Called(product) +} + +func (m *MockClient) ProductStopped(product newtelemetry.Namespace) { + m.Called(product) +} + +func (m *MockClient) ProductStartError(product newtelemetry.Namespace, err error) { + m.Called(product, err) +} + +func (m *MockClient) RegisterAppConfig(key string, value any, origin newtelemetry.Origin) { + m.Called(key, value, origin) +} + +func (m *MockClient) RegisterAppConfigs(kvs ...newtelemetry.Configuration) { + m.Called(kvs) +} + +func (m *MockClient) MarkIntegrationAsLoaded(integration newtelemetry.Integration) { + m.Called(integration) +} + +func (m *MockClient) Flush() { + m.Called() +} + +func (m *MockClient) AppStart() { + m.Called() +} + +func (m *MockClient) AppStop() { + m.Called() +} + +var _ newtelemetry.Client = (*MockClient)(nil) diff --git a/internal/newtelemetry/telemetrytest/record.go b/internal/newtelemetry/telemetrytest/record.go new file mode 100644 index 0000000000..e3b751e47a --- /dev/null +++ b/internal/newtelemetry/telemetrytest/record.go @@ -0,0 +1,198 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package telemetrytest + +import ( + "reflect" + "strings" + "sync" + "testing" + "time" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry" +) + +type MetricKey struct { + Namespace newtelemetry.Namespace + Name string + Tags string + Kind string +} + +type RecordClient struct { + mu sync.Mutex + Started bool + Stopped bool + Configuration []newtelemetry.Configuration + Logs map[newtelemetry.LogLevel]string + Integrations []newtelemetry.Integration + Products map[newtelemetry.Namespace]bool + Metrics map[MetricKey]*RecordMetricHandle +} + +func (r *RecordClient) Close() error { + return nil +} + +type RecordMetricHandle struct { + mu sync.Mutex + count float64 + rate float64 + rateStart time.Time + gauge float64 + distrib []float64 + + submit func(handle *RecordMetricHandle, value float64) + get func(handle *RecordMetricHandle) float64 +} + +func (m *RecordMetricHandle) Submit(value float64) { + m.mu.Lock() + defer m.mu.Unlock() + m.submit(m, value) +} + +func (m *RecordMetricHandle) Get() float64 { + m.mu.Lock() + defer m.mu.Unlock() + return m.get(m) +} + +func (r *RecordClient) metric(kind string, namespace newtelemetry.Namespace, name string, tags []string, submit func(handle *RecordMetricHandle, value float64), get func(handle *RecordMetricHandle) float64) *RecordMetricHandle { + r.mu.Lock() + defer r.mu.Unlock() + if r.Metrics == nil { + r.Metrics = make(map[MetricKey]*RecordMetricHandle) + } + + key := MetricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: kind} + if _, ok := r.Metrics[key]; !ok { + r.Metrics[key] = &RecordMetricHandle{submit: submit, get: get} + } + return r.Metrics[key] +} + +func (r *RecordClient) Count(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return r.metric("count", namespace, name, tags, func(handle *RecordMetricHandle, value float64) { + handle.count += value + }, func(handle *RecordMetricHandle) float64 { + return handle.count + }) +} + +func (r *RecordClient) Rate(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + handle := r.metric("rate", namespace, name, tags, func(handle *RecordMetricHandle, value float64) { + handle.count += value + handle.rate = float64(handle.count) / time.Since(handle.rateStart).Seconds() + }, func(handle *RecordMetricHandle) float64 { + return handle.rate + }) + + handle.rateStart = time.Now() + return handle +} + +func (r *RecordClient) Gauge(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return r.metric("gauge", namespace, name, tags, func(handle *RecordMetricHandle, value float64) { + handle.gauge = value + }, func(handle *RecordMetricHandle) float64 { + return handle.gauge + }) +} + +func (r *RecordClient) Distribution(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { + return r.metric("distribution", namespace, name, tags, func(handle *RecordMetricHandle, value float64) { + handle.distrib = append(handle.distrib, value) + }, func(handle *RecordMetricHandle) float64 { + var sum float64 + for _, v := range handle.distrib { + sum += v + } + return sum + }) +} + +func (r *RecordClient) Log(level newtelemetry.LogLevel, text string, _ ...newtelemetry.LogOption) { + r.mu.Lock() + defer r.mu.Unlock() + if r.Logs == nil { + r.Logs = make(map[newtelemetry.LogLevel]string) + } + + r.Logs[level] = text +} + +func (r *RecordClient) ProductStarted(product newtelemetry.Namespace) { + r.mu.Lock() + defer r.mu.Unlock() + if r.Products == nil { + r.Products = make(map[newtelemetry.Namespace]bool) + } + + r.Products[product] = true +} + +func (r *RecordClient) ProductStopped(product newtelemetry.Namespace) { + r.mu.Lock() + defer r.mu.Unlock() + if r.Products == nil { + r.Products = make(map[newtelemetry.Namespace]bool) + } + + r.Products[product] = false +} + +func (r *RecordClient) ProductStartError(product newtelemetry.Namespace, _ error) { + r.mu.Lock() + defer r.mu.Unlock() + if r.Products == nil { + r.Products = make(map[newtelemetry.Namespace]bool) + } + + r.Products[product] = false +} + +func (r *RecordClient) RegisterAppConfig(key string, value any, origin newtelemetry.Origin) { + r.mu.Lock() + defer r.mu.Unlock() + r.Configuration = append(r.Configuration, newtelemetry.Configuration{Name: key, Value: value, Origin: origin}) +} + +func (r *RecordClient) RegisterAppConfigs(kvs ...newtelemetry.Configuration) { + r.mu.Lock() + defer r.mu.Unlock() + r.Configuration = append(r.Configuration, kvs...) +} + +func (r *RecordClient) MarkIntegrationAsLoaded(integration newtelemetry.Integration) { + r.mu.Lock() + defer r.mu.Unlock() + r.Integrations = append(r.Integrations, integration) +} + +func (r *RecordClient) Flush() {} + +func (r *RecordClient) AppStart() { + r.mu.Lock() + defer r.mu.Unlock() + r.Started = true +} + +func (r *RecordClient) AppStop() { + r.mu.Lock() + defer r.mu.Unlock() + r.Stopped = true +} + +func CheckConfig(t *testing.T, cfgs []newtelemetry.Configuration, key string, value any) { + for _, c := range cfgs { + if c.Name == key && reflect.DeepEqual(c.Value, value) { + return + } + } + + t.Fatalf("could not find configuration key %s with value %v", key, value) +} diff --git a/internal/newtelemetry/telemetrytest/telemetrytest.go b/internal/newtelemetry/telemetrytest/telemetrytest.go deleted file mode 100644 index e003ebebc8..0000000000 --- a/internal/newtelemetry/telemetrytest/telemetrytest.go +++ /dev/null @@ -1,195 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2022 Datadog, Inc. - -// Package telemetrytest provides a mock implementation of the newtelemetry client for testing purposes -package telemetrytest - -import ( - "strings" - "sync" - - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry" - - "github.com/stretchr/testify/mock" -) - -// MockClient implements Client and is used for testing purposes outside the newtelemetry package, -// e.g. the tracer and profiler. -type MockClient struct { - mock.Mock - mu sync.Mutex - Started bool - Stopped bool - Configuration []newtelemetry.Configuration - Logs map[newtelemetry.LogLevel]string - Integrations []string - Products map[newtelemetry.Namespace]bool - Metrics map[metricKey]*float64 -} - -type metricKey struct { - Namespace newtelemetry.Namespace - Name string - Tags string - Kind string -} - -func (m *MockClient) Close() error { - m.On("Close").Return() - return m.Called().Error(0) -} - -type MockMetricHandle struct { - mock.Mock - mu sync.Mutex - submit func(ptr *float64, value float64) - value *float64 -} - -func (m *MockMetricHandle) Submit(value float64) { - m.On("Submit", value).Return() - m.mu.Lock() - defer m.mu.Unlock() - m.submit(m.value, value) -} - -func (m *MockMetricHandle) Get() float64 { - m.On("Get").Return() - m.mu.Lock() - defer m.mu.Unlock() - return *m.value -} - -func (m *MockClient) Count(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { - m.mu.Lock() - defer m.mu.Unlock() - m.On("Count", namespace, name, tags).Return() - key := metricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: "count"} - if _, ok := m.Metrics[key]; !ok { - init := 0.0 - m.Metrics[key] = &init - } - - return &MockMetricHandle{value: m.Metrics[key], submit: func(ptr *float64, value float64) { - *ptr += value - }} -} - -func (m *MockClient) Rate(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { - m.mu.Lock() - defer m.mu.Unlock() - m.On("Rate", namespace, name, tags).Return() - key := metricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: "rate"} - if _, ok := m.Metrics[key]; !ok { - init := 0.0 - m.Metrics[key] = &init - } - - return &MockMetricHandle{value: m.Metrics[key], submit: func(ptr *float64, value float64) { - *ptr += value - }} -} - -func (m *MockClient) Gauge(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { - m.mu.Lock() - defer m.mu.Unlock() - m.On("Gauge", namespace, name, tags).Return() - key := metricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: "gauge"} - if _, ok := m.Metrics[key]; !ok { - init := 0.0 - m.Metrics[key] = &init - } - - return &MockMetricHandle{value: m.Metrics[key], submit: func(ptr *float64, value float64) { - *ptr = value - }} -} - -func (m *MockClient) Distribution(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { - m.mu.Lock() - defer m.mu.Unlock() - m.On("Distribution", namespace, name, tags).Return() - key := metricKey{Namespace: namespace, Name: name, Tags: strings.Join(tags, ","), Kind: "distribution"} - if _, ok := m.Metrics[key]; !ok { - init := 0.0 - m.Metrics[key] = &init - } - - return &MockMetricHandle{value: m.Metrics[key], submit: func(ptr *float64, value float64) { - *ptr = value - }} -} - -func (m *MockClient) Log(level newtelemetry.LogLevel, text string, options ...newtelemetry.LogOption) { - m.mu.Lock() - defer m.mu.Unlock() - m.On("Log", level, text, options).Return() - m.Logs[level] = text -} - -func (m *MockClient) ProductStarted(product newtelemetry.Namespace) { - m.mu.Lock() - defer m.mu.Unlock() - m.On("ProductStarted", product).Return() - m.Products[product] = true -} - -func (m *MockClient) ProductStopped(product newtelemetry.Namespace) { - m.mu.Lock() - defer m.mu.Unlock() - m.On("ProductStopped", product).Return() - m.Products[product] = false -} - -func (m *MockClient) ProductStartError(product newtelemetry.Namespace, err error) { - m.mu.Lock() - defer m.mu.Unlock() - m.On("ProductStartError", product, err).Return() - m.Products[product] = false -} - -func (m *MockClient) RegisterAppConfig(key string, value any, origin newtelemetry.Origin) { - m.mu.Lock() - defer m.mu.Unlock() - m.On("RegisterAppConfig", key, value, origin).Return() - m.Called(key, value, origin) - m.Configuration = append(m.Configuration, newtelemetry.Configuration{Name: key, Value: value, Origin: origin}) -} - -func (m *MockClient) RegisterAppConfigs(kvs ...newtelemetry.Configuration) { - m.mu.Lock() - defer m.mu.Unlock() - m.On("RegisterAppConfigs", kvs).Return() - m.Called(kvs) - m.Configuration = append(m.Configuration, kvs...) -} - -func (m *MockClient) MarkIntegrationAsLoaded(integration newtelemetry.Integration) { - m.mu.Lock() - defer m.mu.Unlock() - m.On("MarkIntegrationAsLoaded", integration).Return() - m.Called(integration) - m.Integrations = append(m.Integrations, integration.Name) -} - -func (m *MockClient) Flush() { - m.On("Flush").Return() -} - -func (m *MockClient) AppStart() { - m.mu.Lock() - defer m.mu.Unlock() - m.On("AppStart").Return() - m.Started = true -} - -func (m *MockClient) AppStop() { - m.mu.Lock() - defer m.mu.Unlock() - m.On("AppStop").Return() - m.Stopped = true -} - -var _ newtelemetry.Client = (*MockClient)(nil) From 091c52241f22dcbe386f2cef773d013fd43d6486 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Fri, 31 Jan 2025 15:57:34 +0100 Subject: [PATCH 40/61] rework logger to work without sync.Map.Clear() from go 1.23 Signed-off-by: Eliott Bouhana --- internal/newtelemetry/logger.go | 45 +++++++++++---------------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/internal/newtelemetry/logger.go b/internal/newtelemetry/logger.go index 79e0eef41e..c675134005 100644 --- a/internal/newtelemetry/logger.go +++ b/internal/newtelemetry/logger.go @@ -64,18 +64,10 @@ type loggerValue struct { } type logger struct { - store atomic.Pointer[internal.TypedSyncMap[loggerKey, *loggerValue]] + store internal.TypedSyncMap[loggerKey, *loggerValue] } func (logger *logger) Add(level LogLevel, text string, opts ...LogOption) { - store := logger.store.Load() - if store == nil { - store = new(internal.TypedSyncMap[loggerKey, *loggerValue]) - for logger.store.CompareAndSwap(nil, store) { - continue - } - } - key := loggerKey{ message: text, level: level, @@ -85,32 +77,22 @@ func (logger *logger) Add(level LogLevel, text string, opts ...LogOption) { opt(&key, nil) } - val, ok := store.Load(key) - if ok { - val.count.Add(1) - return - } - - newVal := &loggerValue{ - time: time.Now().Unix(), - } - - for _, opt := range opts { - opt(&key, newVal) + value, loaded := logger.store.LoadOrStore(key, &loggerValue{}) + if !loaded { + // If we were the first to store the value, we need to set the time and apply the options + value.time = time.Now().Unix() + for _, opt := range opts { + opt(nil, value) + } } - newVal.count.Store(1) - store.Store(key, newVal) + value.count.Add(1) } func (logger *logger) Payload() transport.Payload { - store := logger.store.Swap(nil) - if store == nil { - return nil - } - var logs []transport.LogMessage - store.Range(func(key loggerKey, value *loggerValue) bool { + logger.store.Range(func(key loggerKey, value *loggerValue) bool { + logger.store.Delete(key) logs = append(logs, transport.LogMessage{ Message: key.message, Level: key.level, @@ -122,6 +104,9 @@ func (logger *logger) Payload() transport.Payload { return true }) - store.Clear() + if len(logs) == 0 { + return nil + } + return transport.Logs{Logs: logs} } From 8bc28bc47d6e984163c8303ae465cd89368b26d1 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Fri, 31 Jan 2025 16:33:03 +0100 Subject: [PATCH 41/61] remove data race from telemetry metrics hot pointer Signed-off-by: Eliott Bouhana --- internal/newtelemetry/globalclient.go | 10 +++++----- internal/newtelemetry/internal/writer_test.go | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 5dd16f01b2..e996a7d8b7 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -21,7 +21,7 @@ var ( globalClientRecorder = internal.NewRecorder[Client]() // metricsHandleHotPointers contains all the metricsHotPointer, used to replay actions done before the actual MetricHandle is set - metricsHandleHotPointers []metricsHotPointer + metricsHandleHotPointers []*metricsHotPointer metricsHandleHotPointersMu sync.Mutex ) @@ -62,8 +62,7 @@ func SwapClient(client Client) { metricsHandleHotPointersMu.Lock() defer metricsHandleHotPointersMu.Unlock() for i := range metricsHandleHotPointers { - hotPointer := &metricsHandleHotPointers[i] - hotPointer.swap(hotPointer.maker(client)) + metricsHandleHotPointers[i].swap(metricsHandleHotPointers[i].maker(client)) } } } @@ -236,8 +235,9 @@ func newMetricsHotPointer(maker func(client Client) MetricHandle) *metricsHotPoi metricsHandleHotPointersMu.Lock() defer metricsHandleHotPointersMu.Unlock() - metricsHandleHotPointers = append(metricsHandleHotPointers, metricsHotPointer{maker: maker}) - return &metricsHandleHotPointers[len(metricsHandleHotPointers)-1] + hotPtr := &metricsHotPointer{maker: maker} + metricsHandleHotPointers = append(metricsHandleHotPointers, hotPtr) + return hotPtr } var metricLogLossOnce sync.Once diff --git a/internal/newtelemetry/internal/writer_test.go b/internal/newtelemetry/internal/writer_test.go index 02a2e9ea19..bacf6accf7 100644 --- a/internal/newtelemetry/internal/writer_test.go +++ b/internal/newtelemetry/internal/writer_test.go @@ -204,7 +204,7 @@ func TestWriter_Flush_MultipleEndpoints(t *testing.T) { assert.Zero(t, results[0].PayloadByteSize) assert.Equal(t, http.StatusOK, results[1].StatusCode) - assert.InDelta(t, time.Duration(1), results[1].CallDuration, float64(time.Millisecond)) + assert.InDelta(t, time.Duration(1), results[1].CallDuration, float64(time.Second)) assert.NotZero(t, results[1].PayloadByteSize) assert.NoError(t, results[1].Error) From 681eae1a493fbedf42eea5f644ad0e7ea56a4b18 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Fri, 31 Jan 2025 17:23:26 +0100 Subject: [PATCH 42/61] flush app-started on StartApp() Signed-off-by: Eliott Bouhana --- internal/newtelemetry/globalclient.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index e996a7d8b7..97de0061dc 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -45,6 +45,10 @@ func StartApp(client Client) { globalClientRecorder.Replay(client) client.AppStart() + + go func() { + client.Flush() + }() } // SwapClient swaps the global client with the given client and Flush the old (*client). From cd8642895743a0972e16f3f21565f37cee334fe8 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 3 Feb 2025 11:01:01 +0100 Subject: [PATCH 43/61] add new benchmarks to benchmarking platform Signed-off-by: Eliott Bouhana --- .gitlab-ci.yml | 2 +- internal/newtelemetry/client_test.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c6d7819a1d..e473b8453e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,7 +9,7 @@ variables: INDEX_FILE: index.txt KUBERNETES_SERVICE_ACCOUNT_OVERWRITE: dd-trace-go FF_USE_LEGACY_KUBERNETES_EXECUTION_STRATEGY: "true" - BENCHMARK_TARGETS: "BenchmarkStartRequestSpan|BenchmarkHttpServeTrace|BenchmarkTracerAddSpans|BenchmarkStartSpan|BenchmarkSingleSpanRetention|BenchmarkOTelApiWithCustomTags|BenchmarkInjectW3C|BenchmarkExtractW3C|BenchmarkPartialFlushing|BenchmarkGraphQL|BenchmarkSampleWAFContext|BenchmarkCaptureStackTrace|BenchmarkSetTagString|BenchmarkSetTagStringPtr|BenchmarkSetTagMetric|BenchmarkSetTagStringer" + BENCHMARK_TARGETS: "BenchmarkStartRequestSpan|BenchmarkHttpServeTrace|BenchmarkTracerAddSpans|BenchmarkStartSpan|BenchmarkSingleSpanRetention|BenchmarkOTelApiWithCustomTags|BenchmarkInjectW3C|BenchmarkExtractW3C|BenchmarkPartialFlushing|BenchmarkGraphQL|BenchmarkSampleWAFContext|BenchmarkCaptureStackTrace|BenchmarkSetTagString|BenchmarkSetTagStringPtr|BenchmarkSetTagMetric|BenchmarkSetTagStringer|BenchmarkLogs|BenchmarkWorstCaseScenarioFloodLogging|BenchmarkMetrics|BenchmarkWorstCaseScenarioFloodMetrics" include: - ".gitlab/benchmarks.yml" diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 59fc13dfe8..6e30852979 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -1423,7 +1423,6 @@ func BenchmarkMetrics(b *testing.B) { } func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { - b.ReportAllocs() nbSameMetric := 10 nbDifferentMetrics := 100 nbGoroutines := 25 @@ -1449,6 +1448,7 @@ func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { } b.Run("get-handle", func(b *testing.B) { + b.ReportAllocs() c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) require.NoError(b, err) @@ -1478,6 +1478,7 @@ func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { }) b.Run("handle-reused", func(b *testing.B) { + b.ReportAllocs() c, err := NewClient("test-service", "test-env", "1.0.0", clientConfig) require.NoError(b, err) From 373e2fda340a4e4f714a4c9efa666e5938f1accc Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 3 Feb 2025 13:56:14 +0100 Subject: [PATCH 44/61] apply @RomainMuller suggestions Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 39 ++++++++++++++----- internal/newtelemetry/client_config.go | 14 +++---- .../internal/mapper/app_started.go | 6 +-- .../newtelemetry/internal/mapper/default.go | 8 ++-- internal/newtelemetry/internal/ticker.go | 4 +- .../newtelemetry/internal/transport/body.go | 1 + internal/newtelemetry/internal/writer.go | 2 +- 7 files changed, 48 insertions(+), 26 deletions(-) diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index ca2abd6110..6594a0ce76 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -192,8 +192,22 @@ func (c *client) Config() ClientConfig { return c.clientConfig } +// Flush sends all the data sources before calling flush +// This function is called by the flushTicker so it should not panic, or it will crash the whole customer application. +// If a panic occurs, we stop the telemetry and log the error. func (c *client) Flush() { - var payloads []transport.Payload + defer func() { + r := recover() + if r == nil { + return + } + log.Warn("panic while flushing telemetry data, stopping telemetry: %v", r) + if gc, ok := GlobalClient().(*client); ok && gc == c { + SwapClient(nil) + } + }() + + payloads := make([]transport.Payload, 0, 8) for _, ds := range c.dataSources { if payload := ds.Payload(); payload != nil { payloads = append(payloads, payload) @@ -203,21 +217,28 @@ func (c *client) Flush() { _, _ = c.flush(payloads) } +func (c *client) transform(payloads []transport.Payload) []transport.Payload { + c.flushMapperMu.Lock() + defer c.flushMapperMu.Unlock() + payloads, c.flushMapper = c.flushMapper.Transform(payloads) + return payloads +} + // flush sends all the data sources to the writer by let them flow through the given transformer function. // The transformer function is used to transform the bodies before sending them to the writer. func (c *client) flush(payloads []transport.Payload) (int, error) { - // Transform the bodies - { - c.flushMapperMu.Lock() - payloads, c.flushMapper = c.flushMapper.Transform(payloads) - c.flushMapperMu.Unlock() - } + payloads = c.transform(payloads) if c.payloadQueue.IsEmpty() && len(payloads) == 0 { - c.flushTicker.DecreaseSpeed() return 0, nil } + if c.payloadQueue.IsEmpty() { + c.flushTicker.CanDecreaseSpeed() + } else if c.payloadQueue.IsFull() { + c.flushTicker.CanIncreaseSpeed() + } + // We enqueue the new payloads to preserve the order of the payloads c.payloadQueue.Enqueue(payloads...) payloads = c.payloadQueue.Flush() @@ -243,7 +264,7 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { if !speedIncreased && successfulCall.PayloadByteSize > c.clientConfig.EarlyFlushPayloadSize { // We increase the speed of the flushTicker to try to flush the remaining bodies faster as we are at risk of sending too large bodies to the backend - c.flushTicker.IncreaseSpeed() + c.flushTicker.CanIncreaseSpeed() speedIncreased = true } diff --git a/internal/newtelemetry/client_config.go b/internal/newtelemetry/client_config.go index a7dc4f0a0c..2dde65f215 100644 --- a/internal/newtelemetry/client_config.go +++ b/internal/newtelemetry/client_config.go @@ -65,16 +65,9 @@ type ClientConfig struct { FlushInterval Range[time.Duration] // PayloadQueueSize is the size of the payload queue. - // This means that, by default, we incur dataloss if we spend ~30mins without flushing, considering we send telemetry data this looks reasonable. - // This also means that in the worst case scenario, memory-wise, the app is stabilized after running for 30mins. - // Ideally both values should be power of 2 because of the way the ring queue is implemented as it's growing PayloadQueueSize Range[int] // DistributionsSize is the size of the distribution queue. - // Default max size is a 2^14 array of float64 (2^3 bytes) which makes a distribution 128KB bytes array _at worse_. - // Considering we add a point per user request on a simple http server, we would be losing data after 2^14 requests per minute or about 280 requests per second or under 3ms per request. - // If this throughput is constant, the telemetry client flush ticker speed will increase to, at best, double twice to flush 15 seconds of data each time. - // Which will bring our max throughput to 1100 points per second or about 750µs per request. DistributionsSize Range[int] // Debug enables debug mode for the telemetry clientt and sent it to the backend so it logs the request @@ -114,12 +107,19 @@ var ( maxPayloadSize = 5 * 1024 * 1024 // 5MB // TODO: tweak this value once we get real telemetry data from the telemetry client + // This means that, by default, we incur dataloss if we spend ~30mins without flushing, considering we send telemetry data this looks reasonable. + // This also means that in the worst case scenario, memory-wise, the app is stabilized after running for 30mins. + // Ideally both values should be power of 2 because of the way the ring queue is implemented as it's growing defaultPayloadQueueSize = Range[int]{ Min: 4, Max: 32, } // TODO: tweak this value once we get telemetry data from the telemetry client + // Default max size is a 2^14 array of float64 (2^3 bytes) which makes a distribution 128KB bytes array _at worse_. + // Considering we add a point per user request on a simple http server, we would be losing data after 2^14 requests per minute or about 280 requests per second or under 3ms per request. + // If this throughput is constant, the telemetry client flush ticker speed will increase to, at best, double twice to flush 15 seconds of data each time. + // Which will bring our max throughput to 1100 points per second or about 750µs per request. distributionsSize = Range[int]{ Min: 1 << 8, Max: 1 << 14, diff --git a/internal/newtelemetry/internal/mapper/app_started.go b/internal/newtelemetry/internal/mapper/app_started.go index 395a401196..5e10831aa7 100644 --- a/internal/newtelemetry/internal/mapper/app_started.go +++ b/internal/newtelemetry/internal/mapper/app_started.go @@ -32,11 +32,11 @@ func (t *appStartedReducer) Transform(payloads []transport.Payload) ([]transport payloadLefts := make([]transport.Payload, 0, len(payloads)) for _, payload := range payloads { - switch payload.(type) { + switch payload := payload.(type) { case transport.AppClientConfigurationChange: - appStarted.Configuration = payload.(transport.AppClientConfigurationChange).Configuration + appStarted.Configuration = payload.Configuration case transport.AppProductChange: - appStarted.Products = payload.(transport.AppProductChange).Products + appStarted.Products = payload.Products default: payloadLefts = append(payloadLefts, payload) } diff --git a/internal/newtelemetry/internal/mapper/default.go b/internal/newtelemetry/internal/mapper/default.go index 5007469f70..c34fbd2154 100644 --- a/internal/newtelemetry/internal/mapper/default.go +++ b/internal/newtelemetry/internal/mapper/default.go @@ -71,17 +71,17 @@ func (t *heartbeatEnricher) Transform(payloads []transport.Payload) ([]transport // Composition described here: // https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/main/GeneratedDocumentation/ApiDocs/v2/producing-telemetry.md#app-extended-heartbeat for _, payload := range payloads { - switch p := payload.(type) { + switch payload := payload.(type) { case transport.AppStarted: // Should be sent only once anyway - t.extendedHeartbeat.Configuration = p.Configuration + t.extendedHeartbeat.Configuration = payload.Configuration case transport.AppDependenciesLoaded: if t.extendedHeartbeat.Dependencies == nil { - t.extendedHeartbeat.Dependencies = p.Dependencies + t.extendedHeartbeat.Dependencies = payload.Dependencies } case transport.AppIntegrationChange: // The number of integrations should be small enough so we can just append to the list - t.extendedHeartbeat.Integrations = append(t.extendedHeartbeat.Integrations, p.Integrations...) + t.extendedHeartbeat.Integrations = append(t.extendedHeartbeat.Integrations, payload.Integrations...) } } diff --git a/internal/newtelemetry/internal/ticker.go b/internal/newtelemetry/internal/ticker.go index 192673867e..6b5011c06e 100644 --- a/internal/newtelemetry/internal/ticker.go +++ b/internal/newtelemetry/internal/ticker.go @@ -44,7 +44,7 @@ func NewTicker(tickFunc TickFunc, minInterval, maxInterval time.Duration) *Ticke return ticker } -func (t *Ticker) IncreaseSpeed() { +func (t *Ticker) CanIncreaseSpeed() { t.tickSpeedMu.Lock() defer t.tickSpeedMu.Unlock() @@ -52,7 +52,7 @@ func (t *Ticker) IncreaseSpeed() { t.Reset(t.tickSpeed) } -func (t *Ticker) DecreaseSpeed() { +func (t *Ticker) CanDecreaseSpeed() { t.tickSpeedMu.Lock() defer t.tickSpeedMu.Unlock() diff --git a/internal/newtelemetry/internal/transport/body.go b/internal/newtelemetry/internal/transport/body.go index f23762fdc1..0fbae6f1fc 100644 --- a/internal/newtelemetry/internal/transport/body.go +++ b/internal/newtelemetry/internal/transport/body.go @@ -46,6 +46,7 @@ type Body struct { Host Host `json:"host"` } +// UnmarshalJSON is used to test the telemetry client end to end func (b *Body) UnmarshalJSON(bytes []byte) error { var anyMap map[string]json.RawMessage var err error diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go index 36a5842969..36722bb4fd 100644 --- a/internal/newtelemetry/internal/writer.go +++ b/internal/newtelemetry/internal/writer.go @@ -255,7 +255,7 @@ func (w *writer) Flush(payload transport.Payload) ([]EndpointRequestResult, erro defer response.Body.Close() if response.StatusCode >= 300 || response.StatusCode < 200 { - respBodyBytes, _ := io.ReadAll(response.Body) // maybe we can find an error reason in the response body + respBodyBytes, _ := io.ReadAll(io.LimitReader(response.Body, 256)) // maybe we can find an error reason in the response body results = append(results, EndpointRequestResult{Error: &WriterStatusCodeError{ Status: response.Status, Body: string(respBodyBytes), From 09cab87bcffbca7548224719044c5206166f6531 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 3 Feb 2025 16:01:10 +0100 Subject: [PATCH 45/61] fix bug found with the system-tests Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client_test.go | 43 +++++++++++++------ .../newtelemetry/internal/mapper/default.go | 2 +- .../newtelemetry/internal/transport/body.go | 14 +++--- .../internal/transport/message_batch.go | 4 +- 4 files changed, 37 insertions(+), 26 deletions(-) diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 6e30852979..dc383542b8 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -117,11 +117,11 @@ func TestClientFlush(t *testing.T) { payload := payloads[0] require.IsType(t, transport.MessageBatch{}, payload) batch := payload.(transport.MessageBatch) - require.Len(t, batch.Payload, 2) - assert.Equal(t, transport.RequestTypeAppClientConfigurationChange, batch.Payload[0].RequestType) - assert.Equal(t, transport.RequestTypeAppExtendedHeartBeat, batch.Payload[1].RequestType) + require.Len(t, batch, 2) + assert.Equal(t, transport.RequestTypeAppClientConfigurationChange, batch[0].RequestType) + assert.Equal(t, transport.RequestTypeAppExtendedHeartBeat, batch[1].RequestType) - assert.Len(t, batch.Payload[1].Payload.(transport.AppExtendedHeartbeat).Configuration, 0) + assert.Len(t, batch[1].Payload.(transport.AppExtendedHeartbeat).Configuration, 0) }, }, { @@ -137,12 +137,12 @@ func TestClientFlush(t *testing.T) { payload := payloads[0] require.IsType(t, transport.MessageBatch{}, payload) batch := payload.(transport.MessageBatch) - require.Len(t, batch.Payload, 2) - assert.Equal(t, transport.RequestTypeAppIntegrationsChange, batch.Payload[0].RequestType) - assert.Equal(t, transport.RequestTypeAppExtendedHeartBeat, batch.Payload[1].RequestType) - assert.Len(t, batch.Payload[1].Payload.(transport.AppExtendedHeartbeat).Integrations, 1) - assert.Equal(t, batch.Payload[1].Payload.(transport.AppExtendedHeartbeat).Integrations[0].Name, "test-integration") - assert.Equal(t, batch.Payload[1].Payload.(transport.AppExtendedHeartbeat).Integrations[0].Version, "1.0.0") + require.Len(t, batch, 2) + assert.Equal(t, transport.RequestTypeAppIntegrationsChange, batch[0].RequestType) + assert.Equal(t, transport.RequestTypeAppExtendedHeartBeat, batch[1].RequestType) + assert.Len(t, batch[1].Payload.(transport.AppExtendedHeartbeat).Integrations, 1) + assert.Equal(t, batch[1].Payload.(transport.AppExtendedHeartbeat).Integrations[0].Name, "test-integration") + assert.Equal(t, batch[1].Payload.(transport.AppExtendedHeartbeat).Integrations[0].Version, "1.0.0") }, }, { @@ -296,8 +296,8 @@ func TestClientFlush(t *testing.T) { payload := payloads[0] require.IsType(t, transport.MessageBatch{}, payload) batch := payload.(transport.MessageBatch) - assert.Len(t, batch.Payload, 2) - for _, payload := range batch.Payload { + assert.Len(t, batch, 2) + for _, payload := range batch { switch p := payload.Payload.(type) { case transport.AppProductChange: assert.Equal(t, transport.RequestTypeAppProductChange, payload.RequestType) @@ -328,8 +328,8 @@ func TestClientFlush(t *testing.T) { payload := payloads[0] require.IsType(t, transport.MessageBatch{}, payload) batch := payload.(transport.MessageBatch) - assert.Len(t, batch.Payload, 3) - for _, payload := range batch.Payload { + assert.Len(t, batch, 3) + for _, payload := range batch { switch p := payload.Payload.(type) { case transport.AppProductChange: assert.Equal(t, transport.RequestTypeAppProductChange, payload.RequestType) @@ -1097,6 +1097,21 @@ func TestClientEnd2End(t *testing.T) { assert.Equal(t, transport.RequestTypeAppClosing, bodies[1].RequestType) }, }, + { + name: "message-batch", + when: func(c *client) { + c.RegisterAppConfig("key", "value", OriginCode) + c.ProductStarted(NamespaceAppSec) + }, + expect: func(t *testing.T, bodies []transport.Body) { + require.Len(t, bodies, 1) + assert.Equal(t, transport.RequestTypeMessageBatch, bodies[0].RequestType) + batch := bodies[0].Payload.(transport.MessageBatch) + require.Len(t, batch, 2) + assert.Equal(t, transport.RequestTypeAppProductChange, batch[0].RequestType) + assert.Equal(t, transport.RequestTypeAppClientConfigurationChange, batch[1].RequestType) + }, + }, { name: "fail-agent-endpoint", when: func(c *client) { diff --git a/internal/newtelemetry/internal/mapper/default.go b/internal/newtelemetry/internal/mapper/default.go index c34fbd2154..12f75a5fb8 100644 --- a/internal/newtelemetry/internal/mapper/default.go +++ b/internal/newtelemetry/internal/mapper/default.go @@ -55,7 +55,7 @@ func (t *messageBatchReducer) Transform(payloads []transport.Payload) ([]transpo } } - return []transport.Payload{transport.MessageBatch{Payload: messages}}, t + return []transport.Payload{transport.MessageBatch(messages)}, t } type heartbeatEnricher struct { diff --git a/internal/newtelemetry/internal/transport/body.go b/internal/newtelemetry/internal/transport/body.go index 0fbae6f1fc..2c4a0dd956 100644 --- a/internal/newtelemetry/internal/transport/body.go +++ b/internal/newtelemetry/internal/transport/body.go @@ -89,26 +89,24 @@ func (b *Body) UnmarshalJSON(bytes []byte) error { } if b.RequestType == RequestTypeMessageBatch { - var messageBatch struct { - Payload []struct { - RequestType RequestType `json:"request_type"` - Payload json.RawMessage `json:"payload"` - } `json:"payload"` + var messageBatch []struct { + RequestType RequestType `json:"request_type"` + Payload json.RawMessage `json:"payload"` } if err = json.Unmarshal(anyMap["payload"], &messageBatch); err != nil { return err } - batch := make([]Message, len(messageBatch.Payload)) - for i, message := range messageBatch.Payload { + batch := make([]Message, len(messageBatch)) + for i, message := range messageBatch { payload, err := unmarshalPayload(message.Payload, message.RequestType) if err != nil { return err } batch[i] = Message{RequestType: message.RequestType, Payload: payload} } - b.Payload = MessageBatch{Payload: batch} + b.Payload = MessageBatch(batch) return nil } diff --git a/internal/newtelemetry/internal/transport/message_batch.go b/internal/newtelemetry/internal/transport/message_batch.go index 00369c628b..d1f9f964fe 100644 --- a/internal/newtelemetry/internal/transport/message_batch.go +++ b/internal/newtelemetry/internal/transport/message_batch.go @@ -5,9 +5,7 @@ package transport -type MessageBatch struct { - Payload []Message `json:"payload"` -} +type MessageBatch []Message func (MessageBatch) RequestType() RequestType { return RequestTypeMessageBatch From 352d7a5b7d649fb592eb51a52ab31243c2408295 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Tue, 4 Feb 2025 14:20:55 +0100 Subject: [PATCH 46/61] increase retro compat with all sanitization config value code Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client_test.go | 2 +- internal/newtelemetry/configuration.go | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index dc383542b8..e0e5f20740 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -205,7 +205,7 @@ func TestClientFlush(t *testing.T) { assert.Equal(t, "key:value,key2:value2", config.Configuration[1].Value) assert.Equal(t, OriginCode, config.Configuration[1].Origin) assert.Equal(t, "key3", config.Configuration[2].Name) - assert.Equal(t, "1,2,3", config.Configuration[2].Value) + assert.Equal(t, "[1 2 3]", config.Configuration[2].Value) assert.Equal(t, OriginDDConfig, config.Configuration[2].Origin) assert.Equal(t, "key4", config.Configuration[3].Name) assert.Equal(t, "{1}", config.Configuration[3].Value) diff --git a/internal/newtelemetry/configuration.go b/internal/newtelemetry/configuration.go index 3cb531744f..c51c0100bb 100644 --- a/internal/newtelemetry/configuration.go +++ b/internal/newtelemetry/configuration.go @@ -81,9 +81,19 @@ func SanitizeConfigValue(value any) any { case float64: // https://github.com/golang/go/issues/59627 if math.IsNaN(val) || math.IsInf(val, 0) { - return "" + return nil } return val + case []string: + return strings.Join(val, ",") // Retro compatibility with old code + } + + if _, ok := value.(json.Marshaler); ok { + return value + } + + if v, ok := value.(fmt.Stringer); ok { + return v.String() } valueOf := reflect.ValueOf(value) @@ -100,12 +110,14 @@ func SanitizeConfigValue(value any) any { switch { case valueOf.Kind() == reflect.Slice, valueOf.Kind() == reflect.Array: var sb strings.Builder + sb.WriteString("[") for i := 0; i < valueOf.Len(); i++ { if i > 0 { - sb.WriteString(",") + sb.WriteString(" ") } - sb.WriteString(fmt.Sprintf("%v", valueOf.Index(i))) + sb.WriteString(fmt.Sprintf("%v", valueOf.Index(i).Interface())) } + sb.WriteString("]") return sb.String() case valueOf.Kind() == reflect.Map: kvPair := make([]struct { @@ -139,9 +151,5 @@ func SanitizeConfigValue(value any) any { return sb.String() } - if _, ok := value.(json.Marshaler); ok { - return value - } - return fmt.Sprintf("%v", value) } From e849e5ea907a41de03864f69a859d357d83afdf8 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Wed, 5 Feb 2025 16:02:36 +0100 Subject: [PATCH 47/61] starting to apply @RomainMuller suggestions Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 8 - internal/newtelemetry/client_test.go | 11 +- internal/newtelemetry/distributions.go | 36 ++--- internal/newtelemetry/globalclient.go | 139 +++++++----------- .../knownmetrics/generator/generator.go | 6 +- .../internal/knownmetrics/known_metrics.go | 12 +- internal/newtelemetry/internal/recorder.go | 6 + .../internal/transport/generate-metrics.go | 1 + internal/newtelemetry/metrichandle.go | 70 +++++++++ internal/newtelemetry/metrics.go | 69 +++++++-- .../telemetrytest/globalclient_test.go | 130 ++++++++++++++++ internal/newtelemetry/telemetrytest/record.go | 9 +- 12 files changed, 348 insertions(+), 149 deletions(-) create mode 100644 internal/newtelemetry/metrichandle.go create mode 100644 internal/newtelemetry/telemetrytest/globalclient_test.go diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 6594a0ce76..2b703cce75 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -130,14 +130,6 @@ func (c *client) MarkIntegrationAsLoaded(integration Integration) { c.integrations.Add(integration) } -type noopMetricHandle struct{} - -func (noopMetricHandle) Submit(_ float64) {} - -func (noopMetricHandle) Get() float64 { - return 0 -} - func (c *client) Count(namespace Namespace, name string, tags []string) MetricHandle { if !c.clientConfig.MetricsEnabled { return noopMetricHandle{} diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index e0e5f20740..2d8ce19996 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -87,12 +87,15 @@ func TestClientFlush(t *testing.T) { Env: "test-env", Version: "1.0.0", } - for _, test := range []struct { + + type testParams struct { name string clientConfig ClientConfig when func(c *client) expect func(*testing.T, []transport.Payload) - }{ + } + + testcases := []testParams{ { name: "heartbeat", clientConfig: ClientConfig{ @@ -948,7 +951,9 @@ func TestClientFlush(t *testing.T) { assert.NotContains(t, distributions.Series[0].Points, 0.0) }, }, - } { + } + + for _, test := range testcases { t.Run(test.name, func(t *testing.T) { t.Parallel() config := defaultConfig(test.clientConfig) diff --git a/internal/newtelemetry/distributions.go b/internal/newtelemetry/distributions.go index 4397229142..1c735434aa 100644 --- a/internal/newtelemetry/distributions.go +++ b/internal/newtelemetry/distributions.go @@ -6,7 +6,6 @@ package newtelemetry import ( - "strings" "sync" "sync/atomic" @@ -16,35 +15,29 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) -type distributionKey struct { - namespace Namespace - name string - tags string -} - type distributions struct { - store internal.TypedSyncMap[distributionKey, *distribution] + store internal.TypedSyncMap[metricKey, *distribution] skipAllowlist bool // Debugging feature to skip the allowlist of known metrics queueSize Range[int] } // LoadOrStore returns a MetricHandle for the given distribution metric. If the metric key does not exist, it will be created. func (d *distributions) LoadOrStore(namespace Namespace, name string, tags []string) MetricHandle { - if !knownmetrics.IsKnownMetric(namespace, "distribution", name) { - log.Debug("telemetry: metric name %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ - "The metric will still be sent.", name) + kind := transport.DistMetric + key := newMetricKey(namespace, kind, name, tags) + handle, loaded := d.store.LoadOrStore(key, &distribution{key: key, values: internal.NewRingQueue[float64](d.queueSize.Min, d.queueSize.Max)}) + if !loaded { // The metric is new: validate and log issues about it + if err := validateMetricKey(namespace, kind, name, tags); err != nil { + log.Warn("telemetry: %v", err) + } } - key := distributionKey{namespace: namespace, name: name, tags: strings.Join(tags, ",")} - - handle, _ := d.store.LoadOrStore(key, &distribution{key: key, values: internal.NewRingQueue[float64](d.queueSize.Min, d.queueSize.Max)}) - return handle } func (d *distributions) Payload() transport.Payload { series := make([]transport.DistributionSeries, 0, d.store.Len()) - d.store.Range(func(_ distributionKey, handle *distribution) bool { + d.store.Range(func(_ metricKey, handle *distribution) bool { if payload := handle.payload(); payload.Metric != "" { series = append(series, payload) } @@ -59,7 +52,7 @@ func (d *distributions) Payload() transport.Payload { } type distribution struct { - key distributionKey + key metricKey newSubmit atomic.Bool values *internal.RingQueue[float64] @@ -85,16 +78,11 @@ func (d *distribution) payload() transport.DistributionSeries { return transport.DistributionSeries{} } - var tags []string - if d.key.tags != "" { - tags = strings.Split(d.key.tags, ",") - } - data := transport.DistributionSeries{ Metric: d.key.name, Namespace: d.key.namespace, - Tags: tags, - Common: knownmetrics.IsCommonMetric(d.key.namespace, "distribution", d.key.name), + Tags: d.key.SplitTags(), + Common: knownmetrics.IsCommonMetric(d.key.namespace, d.key.kind, d.key.name), Points: d.values.Flush(), } diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 97de0061dc..6cb4b678bd 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -12,6 +12,7 @@ import ( globalinternal "gopkg.in/DataDog/dd-trace-go.v1/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/log" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) var ( @@ -20,9 +21,8 @@ var ( // globalClientRecorder contains all actions done on the global client done before StartApp() with an actual client object is called globalClientRecorder = internal.NewRecorder[Client]() - // metricsHandleHotPointers contains all the metricsHotPointer, used to replay actions done before the actual MetricHandle is set - metricsHandleHotPointers []*metricsHotPointer - metricsHandleHotPointersMu sync.Mutex + // metricsHandleSwappablePointers contains all the swappableMetricHandle, used to replay actions done before the actual MetricHandle is set + metricsHandleSwappablePointers internal.TypedSyncMap[metricKey, *swappableMetricHandle] ) // GlobalClient returns the global telemetry client. @@ -34,7 +34,8 @@ func GlobalClient() Client { return *client } -// StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global (*client). +// StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global (*client) +// then calls client.Flush on the client asynchronously. func StartApp(client Client) { if Disabled() || GlobalClient() != nil { return @@ -63,11 +64,26 @@ func SwapClient(client Client) { if client != nil { // Swap all metrics hot pointers to the new MetricHandle - metricsHandleHotPointersMu.Lock() - defer metricsHandleHotPointersMu.Unlock() - for i := range metricsHandleHotPointers { - metricsHandleHotPointers[i].swap(metricsHandleHotPointers[i].maker(client)) - } + metricsHandleSwappablePointers.Range(func(_ metricKey, value *swappableMetricHandle) bool { + value.swap(value.maker(client)) + return true + }) + } +} + +// MockClient swaps the global client with the given client and clears the recorder to make sure external calls are not replayed. +// It returns a function that can be used to swap back the global client +func MockClient(client Client) func() { + globalClientRecorder.Clear() + metricsHandleSwappablePointers.Range(func(key metricKey, _ *swappableMetricHandle) bool { + metricsHandleSwappablePointers.Delete(key) + return true + }) + + oldClient := GlobalClient() + SwapClient(client) + return func() { + SwapClient(oldClient) } } @@ -96,55 +112,28 @@ func Disabled() bool { // Count will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. func Count(namespace Namespace, name string, tags []string) MetricHandle { - return globalClientNewMetric(func(client Client) MetricHandle { - return client.Count(namespace, name, tags) - }) + return globalClientNewMetric(namespace, transport.CountMetric, name, tags) } // Rate creates a new metric handle for the given parameters that can be used to submit values. // Rate will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. func Rate(namespace Namespace, name string, tags []string) MetricHandle { - return globalClientNewMetric(func(client Client) MetricHandle { - return client.Rate(namespace, name, tags) - }) + return globalClientNewMetric(namespace, transport.RateMetric, name, tags) } // Gauge creates a new metric handle for the given parameters that can be used to submit values. // Gauge will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. func Gauge(namespace Namespace, name string, tags []string) MetricHandle { - return globalClientNewMetric(func(client Client) MetricHandle { - return client.Gauge(namespace, name, tags) - }) + return globalClientNewMetric(namespace, transport.GaugeMetric, name, tags) } // Distribution creates a new metric handle for the given parameters that can be used to submit values. // Distribution will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. // The MetricHandle is then swapped with the actual MetricHandle once the client is started. func Distribution(namespace Namespace, name string, tags []string) MetricHandle { - return globalClientNewMetric(func(client Client) MetricHandle { - return client.Distribution(namespace, name, tags) - }) -} - -func globalClientNewMetric(maker func(client Client) MetricHandle) MetricHandle { - if Disabled() { - // Act as a noop if telemetry is disabled - return &metricsHotPointer{} - } - - client := globalClient.Load() - wrapper := newMetricsHotPointer(maker) - if client == nil || *client == nil { - wrapper.recorder = internal.NewRecorder[MetricHandle]() - } - - globalClientCall(func(client Client) { - wrapper.swap(maker(client)) - }) - - return wrapper + return globalClientNewMetric(namespace, transport.DistMetric, name, tags) } func Log(level LogLevel, text string, options ...LogOption) { @@ -235,57 +224,35 @@ func globalClientCall(fun func(client Client)) { fun(*client) } -func newMetricsHotPointer(maker func(client Client) MetricHandle) *metricsHotPointer { - metricsHandleHotPointersMu.Lock() - defer metricsHandleHotPointersMu.Unlock() - - hotPtr := &metricsHotPointer{maker: maker} - metricsHandleHotPointers = append(metricsHandleHotPointers, hotPtr) - return hotPtr -} - -var metricLogLossOnce sync.Once - -// metricsHotPointer is a MetricHandle that holds a pointer to another MetricHandle and a recorder to replay actions done before the actual MetricHandle is set. -type metricsHotPointer struct { - ptr atomic.Pointer[MetricHandle] - recorder internal.Recorder[MetricHandle] - maker func(client Client) MetricHandle -} - -func (t *metricsHotPointer) Submit(value float64) { +func globalClientNewMetric(namespace Namespace, kind transport.MetricType, name string, tags []string) MetricHandle { if Disabled() { - return + return noopMetricHandle{} } - inner := t.ptr.Load() - if inner == nil || *inner == nil { - if !t.recorder.Record(func(handle MetricHandle) { - handle.Submit(value) - }) { - metricLogLossOnce.Do(func() { - log.Debug("telemetry: metric is losing values because the telemetry client has not been started yet, dropping telemetry data, please start the telemetry client earlier to avoid data loss") - }) + maker := func(client Client) MetricHandle { + switch kind { + case transport.CountMetric: + return client.Count(namespace, name, tags) + case transport.RateMetric: + return client.Rate(namespace, name, tags) + case transport.GaugeMetric: + return client.Gauge(namespace, name, tags) + case transport.DistMetric: + return client.Distribution(namespace, name, tags) } - return + log.Warn("telemetry: unknown metric type %q", kind) + return nil } - - (*inner).Submit(value) -} - -func (t *metricsHotPointer) Get() float64 { - inner := t.ptr.Load() - if inner == nil || *inner == nil { - return 0 + key := newMetricKey(namespace, kind, name, tags) + wrapper := &swappableMetricHandle{maker: maker} + if client := globalClient.Load(); client == nil || *client == nil { + wrapper.recorder = internal.NewRecorder[MetricHandle]() } - - return (*inner).Get() -} - -func (t *metricsHotPointer) swap(handle MetricHandle) { - if t.ptr.Swap(&handle) == nil { - t.recorder.Replay(handle) + hotPtr, loaded := metricsHandleSwappablePointers.LoadOrStore(key, wrapper) + if !loaded { + globalClientCall(func(client Client) { + hotPtr.swap(maker(client)) + }) } + return hotPtr } - -var _ MetricHandle = (*metricsHotPointer)(nil) diff --git a/internal/newtelemetry/internal/knownmetrics/generator/generator.go b/internal/newtelemetry/internal/knownmetrics/generator/generator.go index e0283cca0b..a343a52a31 100644 --- a/internal/newtelemetry/internal/knownmetrics/generator/generator.go +++ b/internal/newtelemetry/internal/knownmetrics/generator/generator.go @@ -73,7 +73,7 @@ func downloadFromDdgo(remoteURL, localPath, branch, token string, getMetricNames return strings.Compare(string(i.Namespace), string(j.Namespace)) } if i.Type != j.Type { - return strings.Compare(i.Type, j.Type) + return strings.Compare(string(i.Type), string(j.Type)) } return strings.Compare(i.Name, j.Name) }) @@ -102,7 +102,7 @@ func getCommonMetricNames(input map[string]any) []knownmetrics.Declaration { metric := knownmetrics.Declaration{ Namespace: transport.Namespace(category), Name: metricKey, - Type: value.(map[string]any)["metric_type"].(string), + Type: transport.MetricType(value.(map[string]any)["metric_type"].(string)), } names = append(names, metric) if aliases, ok := value.(map[string]any)["aliases"]; ok { @@ -124,7 +124,7 @@ func getGoMetricNames(input map[string]any) []knownmetrics.Declaration { } names = append(names, knownmetrics.Declaration{ Name: key, - Type: value.(map[string]any)["metric_type"].(string), + Type: transport.MetricType(value.(map[string]any)["metric_type"].(string)), }) } return names diff --git a/internal/newtelemetry/internal/knownmetrics/known_metrics.go b/internal/newtelemetry/internal/knownmetrics/known_metrics.go index 14a9991752..4fb1c6b079 100644 --- a/internal/newtelemetry/internal/knownmetrics/known_metrics.go +++ b/internal/newtelemetry/internal/knownmetrics/known_metrics.go @@ -23,9 +23,9 @@ var commonMetricsJSON []byte var golangMetricsJSON []byte type Declaration struct { - Namespace transport.Namespace `json:"namespace"` - Type string `json:"type"` - Name string `json:"name"` + Namespace transport.Namespace `json:"namespace"` + Type transport.MetricType `json:"type"` + Name string `json:"name"` } var ( @@ -43,21 +43,21 @@ func parseMetricNames(bytes []byte) []Declaration { // IsKnownMetric returns true if the given metric name is a known metric by the backend // This is linked to generated common_metrics.json file and golang_metrics.json file. If you added new metrics to the backend, you should rerun the generator. -func IsKnownMetric(namespace transport.Namespace, typ, name string) bool { +func IsKnownMetric(namespace transport.Namespace, typ transport.MetricType, name string) bool { decl := Declaration{Namespace: namespace, Type: typ, Name: name} return slices.Contains(commonMetrics, decl) || slices.Contains(golangMetrics, decl) } // IsCommonMetric returns true if the given metric name is a known common (cross-language) metric by the backend // This is linked to the generated common_metrics.json file. If you added new metrics to the backend, you should rerun the generator. -func IsCommonMetric(namespace transport.Namespace, typ, name string) bool { +func IsCommonMetric(namespace transport.Namespace, typ transport.MetricType, name string) bool { decl := Declaration{Namespace: namespace, Type: typ, Name: name} return slices.Contains(commonMetrics, decl) } // IsLanguageMetric returns true if the given metric name is a known Go language metric by the backend // This is linked to the generated golang_metrics.json file. If you added new metrics to the backend, you should rerun the generator. -func IsLanguageMetric(typ, name string) bool { +func IsLanguageMetric(typ transport.MetricType, name string) bool { decl := Declaration{Type: typ, Name: name} return slices.Contains(golangMetrics, decl) } diff --git a/internal/newtelemetry/internal/recorder.go b/internal/newtelemetry/internal/recorder.go index f7b1aa8793..672bac1ecd 100644 --- a/internal/newtelemetry/internal/recorder.go +++ b/internal/newtelemetry/internal/recorder.go @@ -28,6 +28,7 @@ func (r Recorder[T]) Record(f func(T)) bool { return r.queue.Enqueue(f) } +// Replay uses T as an argument to replay all recorded functions in order of recording. func (r Recorder[T]) Replay(t T) { if r.queue == nil { return @@ -40,3 +41,8 @@ func (r Recorder[T]) Replay(t T) { f(t) } } + +// Clear clears the Recorder's queue. +func (r Recorder[T]) Clear() { + r.queue.ReleaseBuffer(r.queue.GetBuffer()) +} diff --git a/internal/newtelemetry/internal/transport/generate-metrics.go b/internal/newtelemetry/internal/transport/generate-metrics.go index 1c0686a8be..220f93a920 100644 --- a/internal/newtelemetry/internal/transport/generate-metrics.go +++ b/internal/newtelemetry/internal/transport/generate-metrics.go @@ -27,6 +27,7 @@ const ( RateMetric MetricType = "rate" CountMetric MetricType = "count" GaugeMetric MetricType = "gauge" + DistMetric MetricType = "distribution" ) // MetricData is a sequence of observations for a single named metric. diff --git a/internal/newtelemetry/metrichandle.go b/internal/newtelemetry/metrichandle.go new file mode 100644 index 0000000000..a191c953f2 --- /dev/null +++ b/internal/newtelemetry/metrichandle.go @@ -0,0 +1,70 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package newtelemetry + +import ( + "math" + "sync" + "sync/atomic" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" +) + +// noopMetricHandle is a no-op implementation of a metric handle. +type noopMetricHandle struct{} + +func (noopMetricHandle) Submit(_ float64) {} + +func (noopMetricHandle) Get() float64 { + return math.NaN() +} + +var metricLogLossOnce sync.Once + +// swappableMetricHandle is a MetricHandle that holds a pointer to another MetricHandle and a recorder to replay actions done before the actual MetricHandle is set. +type swappableMetricHandle struct { + ptr atomic.Pointer[MetricHandle] + recorder internal.Recorder[MetricHandle] + maker func(client Client) MetricHandle +} + +func (t *swappableMetricHandle) Submit(value float64) { + if Disabled() { + return + } + + inner := t.ptr.Load() + if inner == nil || *inner == nil { + if !t.recorder.Record(func(handle MetricHandle) { + handle.Submit(value) + }) { + metricLogLossOnce.Do(func() { + log.Debug("telemetry: metric is losing values because the telemetry client has not been started yet, dropping telemetry data, please start the telemetry client earlier to avoid data loss") + }) + } + return + } + + (*inner).Submit(value) +} + +func (t *swappableMetricHandle) Get() float64 { + inner := t.ptr.Load() + if inner == nil || *inner == nil { + return 0 + } + + return (*inner).Get() +} + +func (t *swappableMetricHandle) swap(handle MetricHandle) { + if t.ptr.Swap(&handle) == nil { + t.recorder.Replay(handle) + } +} + +var _ MetricHandle = (*swappableMetricHandle)(nil) diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go index 37d453cd7f..cfdf43745b 100644 --- a/internal/newtelemetry/metrics.go +++ b/internal/newtelemetry/metrics.go @@ -6,7 +6,9 @@ package newtelemetry import ( + "fmt" "math" + "sort" "strings" "sync/atomic" "time" @@ -25,6 +27,42 @@ type metricKey struct { tags string } +func (k metricKey) SplitTags() []string { + if k.tags == "" { + return nil + } + return strings.Split(k.tags, ",") +} + +func validateMetricKey(namespace Namespace, kind transport.MetricType, name string, tags []string) error { + if len(name) == 0 { + return fmt.Errorf("telemetry: metric name with tags %v should be empty", tags) + } + + if !knownmetrics.IsKnownMetric(namespace, kind, name) { + return fmt.Errorf("telemetry: metric name %q of kind %q in namespace %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ + "The metric will still be sent", name, string(kind), namespace) + } + + for _, tag := range tags { + if len(tag) == 0 { + return fmt.Errorf("telemetry: metric %q has should not have empty tags", name) + } + + if strings.Contains(tag, ",") { + return fmt.Errorf("telemetry: metric %q tag %q should not contain commas", name, tag) + } + } + + return nil +} + +// newMetricKey returns a new metricKey with the given parameters with the tags sorted and joined by commas. +func newMetricKey(namespace Namespace, kind transport.MetricType, name string, tags []string) metricKey { + sort.Strings(tags) + return metricKey{namespace: namespace, kind: kind, name: name, tags: strings.Join(tags, ",")} +} + // metricsHandle is the internal equivalent of MetricHandle for Count/Rate/Gauge metrics that are sent via the payload [transport.GenerateMetrics]. type metricHandle interface { MetricHandle @@ -38,22 +76,28 @@ type metrics struct { // LoadOrStore returns a MetricHandle for the given metric key. If the metric key does not exist, it will be created. func (m *metrics) LoadOrStore(namespace Namespace, kind transport.MetricType, name string, tags []string) MetricHandle { - if !knownmetrics.IsKnownMetric(namespace, string(kind), name) { - log.Debug("telemetry: metric name %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ - "The metric will still be sent.", name) - } var ( - key = metricKey{namespace: namespace, kind: kind, name: name, tags: strings.Join(tags, ",")} + key = newMetricKey(namespace, kind, name, tags) handle MetricHandle + loaded bool ) switch kind { case transport.CountMetric: - handle, _ = m.store.LoadOrStore(key, &count{metric: metric{key: key}}) + handle, loaded = m.store.LoadOrStore(key, &count{metric: metric{key: key}}) case transport.GaugeMetric: - handle, _ = m.store.LoadOrStore(key, &gauge{metric: metric{key: key}}) + handle, loaded = m.store.LoadOrStore(key, &gauge{metric: metric{key: key}}) case transport.RateMetric: - handle, _ = m.store.LoadOrStore(key, &rate{metric: metric{key: key}, intervalStart: time.Now()}) + handle, loaded = m.store.LoadOrStore(key, &rate{metric: metric{key: key}, intervalStart: time.Now()}) + default: + log.Warn("telemetry: unknown metric type %q", kind) + return nil + } + + if !loaded { // The metric is new: validate and log issues about it + if err := validateMetricKey(namespace, kind, name, tags); err != nil { + log.Warn("telemetry: %v", err) + } } return handle @@ -93,17 +137,12 @@ func (m *metric) payload() transport.MetricData { return transport.MetricData{} } - var tags []string - if m.key.tags != "" { - tags = strings.Split(m.key.tags, ",") - } - data := transport.MetricData{ Metric: m.key.name, Namespace: m.key.namespace, - Tags: tags, + Tags: m.key.SplitTags(), Type: m.key.kind, - Common: knownmetrics.IsCommonMetric(m.key.namespace, string(m.key.kind), m.key.name), + Common: knownmetrics.IsCommonMetric(m.key.namespace, m.key.kind, m.key.name), Points: [][2]any{ {m.submitTime.Load(), math.Float64frombits(m.value.Load())}, }, diff --git a/internal/newtelemetry/telemetrytest/globalclient_test.go b/internal/newtelemetry/telemetrytest/globalclient_test.go new file mode 100644 index 0000000000..1f6b86983b --- /dev/null +++ b/internal/newtelemetry/telemetrytest/globalclient_test.go @@ -0,0 +1,130 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package telemetrytest + +import ( + "math" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" +) + +func TestGlobalClient(t *testing.T) { + t.Run("config", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.RegisterAppConfig("key", "value", newtelemetry.OriginCode) + assert.Len(t, recorder.Configuration, 1) + assert.Equal(t, "key", recorder.Configuration[0].Name) + assert.Equal(t, "value", recorder.Configuration[0].Value) + assert.Equal(t, newtelemetry.OriginCode, recorder.Configuration[0].Origin) + }) + + t.Run("configs", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.RegisterAppConfigs(newtelemetry.Configuration{Name: "key", Value: "value", Origin: newtelemetry.OriginCode}, newtelemetry.Configuration{Name: "key2", Value: "value2", Origin: newtelemetry.OriginRemoteConfig}) + assert.Len(t, recorder.Configuration, 2) + assert.Equal(t, "key", recorder.Configuration[0].Name) + assert.Equal(t, "value", recorder.Configuration[0].Value) + assert.Equal(t, newtelemetry.OriginCode, recorder.Configuration[0].Origin) + assert.Equal(t, "key2", recorder.Configuration[1].Name) + assert.Equal(t, "value2", recorder.Configuration[1].Value) + assert.Equal(t, newtelemetry.OriginRemoteConfig, recorder.Configuration[1].Origin) + }) + + t.Run("app-stop", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.StopApp() + assert.True(t, recorder.Stopped) + }) + + t.Run("product-start", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.ProductStarted(newtelemetry.NamespaceAppSec) + assert.Len(t, recorder.Products, 1) + assert.True(t, recorder.Products[newtelemetry.NamespaceAppSec]) + }) + + t.Run("product-stopped", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.ProductStopped(newtelemetry.NamespaceAppSec) + assert.Len(t, recorder.Products, 1) + assert.False(t, recorder.Products[newtelemetry.NamespaceAppSec]) + }) + + t.Run("integration-loaded", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.LoadIntegration("test-integration") + assert.Len(t, recorder.Integrations, 1) + assert.Equal(t, "test-integration", recorder.Integrations[0].Name) + }) + + t.Run("mark-integration-as-loaded", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.MarkIntegrationAsLoaded(newtelemetry.Integration{Name: "test-integration", Version: "1.0.0"}) + assert.Len(t, recorder.Integrations, 1) + assert.Equal(t, "test-integration", recorder.Integrations[0].Name) + assert.Equal(t, "1.0.0", recorder.Integrations[0].Version) + }) + + t.Run("count", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.Count(newtelemetry.NamespaceTracers, "init_time", nil).Submit(1) + assert.Len(t, recorder.Metrics, 1) + require.Contains(t, recorder.Metrics, MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceTracers, Kind: string(transport.CountMetric)}) + assert.Equal(t, 1.0, recorder.Metrics[MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceTracers, Kind: string(transport.CountMetric)}].Get()) + }) + + t.Run("gauge", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.Gauge(newtelemetry.NamespaceTracers, "init_time", nil).Submit(1) + assert.Len(t, recorder.Metrics, 1) + require.Contains(t, recorder.Metrics, MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceTracers, Kind: string(transport.GaugeMetric)}) + assert.Equal(t, 1.0, recorder.Metrics[MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceTracers, Kind: string(transport.GaugeMetric)}].Get()) + }) + + t.Run("rate", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.Rate(newtelemetry.NamespaceTracers, "init_time", nil).Submit(1) + + assert.Len(t, recorder.Metrics, 1) + require.Contains(t, recorder.Metrics, MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceTracers, Kind: string(transport.RateMetric)}) + assert.False(t, math.IsNaN(recorder.Metrics[MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceTracers, Kind: string(transport.RateMetric)}].Get())) + }) + + t.Run("distribution", func(t *testing.T) { + recorder := new(RecordClient) + defer newtelemetry.MockClient(recorder)() + + newtelemetry.Distribution(newtelemetry.NamespaceGeneral, "init_time", nil).Submit(1) + assert.Len(t, recorder.Metrics, 1) + require.Contains(t, recorder.Metrics, MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceGeneral, Kind: string(transport.DistMetric)}) + assert.Equal(t, 1.0, recorder.Metrics[MetricKey{Name: "init_time", Namespace: newtelemetry.NamespaceGeneral, Kind: string(transport.DistMetric)}].Get()) + }) +} diff --git a/internal/newtelemetry/telemetrytest/record.go b/internal/newtelemetry/telemetrytest/record.go index e3b751e47a..b73d398f54 100644 --- a/internal/newtelemetry/telemetrytest/record.go +++ b/internal/newtelemetry/telemetrytest/record.go @@ -13,6 +13,7 @@ import ( "time" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry" + "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) type MetricKey struct { @@ -76,7 +77,7 @@ func (r *RecordClient) metric(kind string, namespace newtelemetry.Namespace, nam } func (r *RecordClient) Count(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { - return r.metric("count", namespace, name, tags, func(handle *RecordMetricHandle, value float64) { + return r.metric(string(transport.CountMetric), namespace, name, tags, func(handle *RecordMetricHandle, value float64) { handle.count += value }, func(handle *RecordMetricHandle) float64 { return handle.count @@ -84,7 +85,7 @@ func (r *RecordClient) Count(namespace newtelemetry.Namespace, name string, tags } func (r *RecordClient) Rate(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { - handle := r.metric("rate", namespace, name, tags, func(handle *RecordMetricHandle, value float64) { + handle := r.metric(string(transport.RateMetric), namespace, name, tags, func(handle *RecordMetricHandle, value float64) { handle.count += value handle.rate = float64(handle.count) / time.Since(handle.rateStart).Seconds() }, func(handle *RecordMetricHandle) float64 { @@ -96,7 +97,7 @@ func (r *RecordClient) Rate(namespace newtelemetry.Namespace, name string, tags } func (r *RecordClient) Gauge(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { - return r.metric("gauge", namespace, name, tags, func(handle *RecordMetricHandle, value float64) { + return r.metric(string(transport.GaugeMetric), namespace, name, tags, func(handle *RecordMetricHandle, value float64) { handle.gauge = value }, func(handle *RecordMetricHandle) float64 { return handle.gauge @@ -104,7 +105,7 @@ func (r *RecordClient) Gauge(namespace newtelemetry.Namespace, name string, tags } func (r *RecordClient) Distribution(namespace newtelemetry.Namespace, name string, tags []string) newtelemetry.MetricHandle { - return r.metric("distribution", namespace, name, tags, func(handle *RecordMetricHandle, value float64) { + return r.metric(string(transport.DistMetric), namespace, name, tags, func(handle *RecordMetricHandle, value float64) { handle.distrib = append(handle.distrib, value) }, func(handle *RecordMetricHandle) float64 { var sum float64 From 4798807e9f960fe314d59543ba9b0b64e9266ad3 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Wed, 5 Feb 2025 18:05:47 +0100 Subject: [PATCH 48/61] rework metrics Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 11 ++- internal/newtelemetry/client_test.go | 5 +- internal/newtelemetry/distributions.go | 10 +- internal/newtelemetry/logger.go | 2 - internal/newtelemetry/metrics.go | 128 +++++++++++++++++-------- 5 files changed, 101 insertions(+), 55 deletions(-) diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index 2ea5677334..fb24b1db09 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -78,25 +78,28 @@ type Configuration struct { Origin Origin } +// LogOption is a function that modifies the log message that is sent to the telemetry. +type LogOption func(key *loggerKey, value *loggerValue) + // Client constitutes all the functions available concurrently for the telemetry users. All methods are thread-safe // This is an interface for easier testing but all functions will be mirrored at the package level to call // the global client. type Client interface { io.Closer - // Count creates a new metric handle for the given parameters that can be used to submit values. + // Count obtains the metric handle for the given parameters, or creates a new one if none was created just yet. // Tags cannot contain commas. Count(namespace Namespace, name string, tags []string) MetricHandle - // Rate creates a new metric handle for the given parameters that can be used to submit values. + // Rate obtains the metric handle for the given parameters, or creates a new one if none was created just yet. // Tags cannot contain commas. Rate(namespace Namespace, name string, tags []string) MetricHandle - // Gauge creates a new metric handle for the given parameters that can be used to submit values. + // Gauge obtains the metric handle for the given parameters, or creates a new one if none was created just yet. // Tags cannot contain commas. Gauge(namespace Namespace, name string, tags []string) MetricHandle - // Distribution creates a new metric handle for the given parameters that can be used to submit values. + // Distribution obtains the metric handle for the given parameters, or creates a new one if none was created just yet. // Tags cannot contain commas. Distribution(namespace Namespace, name string, tags []string) MetricHandle diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 2d8ce19996..5455ad49e4 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -865,7 +865,10 @@ func TestClientFlush(t *testing.T) { handle.Submit(1) rate := handle.(*rate) - rate.intervalStart = rate.intervalStart.Add(-time.Second) // So the rate is not +Infinity because the interval is zero + // So the rate is not +Infinity because the interval is zero + now := rate.intervalStart.Load() + sub := now.Add(-time.Second) + rate.intervalStart.Store(&sub) }, expect: func(t *testing.T, payloads []transport.Payload) { payload := payloads[0] diff --git a/internal/newtelemetry/distributions.go b/internal/newtelemetry/distributions.go index 1c735434aa..ff7b20ff42 100644 --- a/internal/newtelemetry/distributions.go +++ b/internal/newtelemetry/distributions.go @@ -7,7 +7,6 @@ package newtelemetry import ( "sync" - "sync/atomic" "gopkg.in/DataDog/dd-trace-go.v1/internal/log" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" @@ -52,16 +51,13 @@ func (d *distributions) Payload() transport.Payload { } type distribution struct { - key metricKey - - newSubmit atomic.Bool - values *internal.RingQueue[float64] + key metricKey + values *internal.RingQueue[float64] } var distrLogLossOnce sync.Once func (d *distribution) Submit(value float64) { - d.newSubmit.Store(true) if !d.values.Enqueue(value) { distrLogLossOnce.Do(func() { log.Debug("telemetry: distribution %q is losing values because the buffer is full", d.key.name) @@ -74,7 +70,7 @@ func (d *distribution) Get() float64 { } func (d *distribution) payload() transport.DistributionSeries { - if submit := d.newSubmit.Swap(false); !submit { + if d.values.IsEmpty() { return transport.DistributionSeries{} } diff --git a/internal/newtelemetry/logger.go b/internal/newtelemetry/logger.go index c675134005..f65b9d1a1a 100644 --- a/internal/newtelemetry/logger.go +++ b/internal/newtelemetry/logger.go @@ -15,8 +15,6 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" ) -type LogOption func(key *loggerKey, value *loggerValue) - // WithTags returns a LogOption that sets the tags for the telemetry log message. Tags are key-value pairs that are then // serialized into a simple "key:value,key2:value2" format. No quoting or escaping is performed. func WithTags(tags []string) LogOption { diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go index cfdf43745b..91ef809b0f 100644 --- a/internal/newtelemetry/metrics.go +++ b/internal/newtelemetry/metrics.go @@ -10,6 +10,7 @@ import ( "math" "sort" "strings" + "sync" "sync/atomic" "time" @@ -88,7 +89,13 @@ func (m *metrics) LoadOrStore(namespace Namespace, kind transport.MetricType, na case transport.GaugeMetric: handle, loaded = m.store.LoadOrStore(key, &gauge{metric: metric{key: key}}) case transport.RateMetric: - handle, loaded = m.store.LoadOrStore(key, &rate{metric: metric{key: key}, intervalStart: time.Now()}) + handle, loaded = m.store.LoadOrStore(key, &rate{metric: metric{key: key}}) + if !loaded { + // Initialize the interval start for rate metrics + r := handle.(*rate) + now := time.Now() + r.intervalStart.Store(&now) + } default: log.Warn("telemetry: unknown metric type %q", kind) return nil @@ -119,49 +126,84 @@ func (m *metrics) Payload() transport.Payload { return transport.GenerateMetrics{Series: series, SkipAllowlist: m.skipAllowlist} } -type metric struct { - key metricKey +type metricPoint struct { + value float64 + time time.Time +} - // Values set during Submit() - newSubmit atomic.Bool - value atomic.Uint64 // Actually a float64, but we need atomic operations - submitTime atomic.Int64 // Unix timestamp +var metricPointPool = sync.Pool{ + New: func() any { + return &metricPoint{value: math.NaN()} + }, +} + +type metric struct { + key metricKey + ptr atomic.Pointer[metricPoint] + intervalStart atomic.Pointer[time.Time] } func (m *metric) Get() float64 { - return math.Float64frombits(m.value.Load()) + if ptr := m.ptr.Load(); ptr != nil { + return ptr.value + } + + return math.NaN() } func (m *metric) payload() transport.MetricData { - if submit := m.newSubmit.Swap(false); !submit { + point := m.ptr.Swap(nil) + if point == nil { return transport.MetricData{} } - data := transport.MetricData{ + var ( + value = point.value + intervalStart = m.intervalStart.Swap(nil) + intervalSeconds float64 + ) + + // Rate metric only + if intervalStart != nil { + intervalSeconds = time.Since(*intervalStart).Seconds() + if int64(intervalSeconds) == 0 { // Interval for rate is too small, we prefer not sending data over sending something wrong + return transport.MetricData{} + } + + value = value / intervalSeconds + } + + return transport.MetricData{ Metric: m.key.name, Namespace: m.key.namespace, Tags: m.key.SplitTags(), Type: m.key.kind, Common: knownmetrics.IsCommonMetric(m.key.namespace, m.key.kind, m.key.name), + Interval: int64(intervalSeconds), Points: [][2]any{ - {m.submitTime.Load(), math.Float64frombits(m.value.Load())}, + {point.time.Unix(), value}, }, } - - return data } type count struct { metric } -func (c *count) Submit(value float64) { - c.newSubmit.Store(true) - c.submitTime.Store(time.Now().Unix()) +func (m *count) Submit(newValue float64) { + newPoint := metricPointPool.Get().(*metricPoint) + newPoint.time = time.Now() for { - oldValue := c.Get() - newValue := oldValue + value - if c.value.CompareAndSwap(math.Float64bits(oldValue), math.Float64bits(newValue)) { + oldPoint := m.ptr.Load() + var oldValue float64 + if oldPoint != nil { + oldValue = oldPoint.value + } + newPoint.value = oldValue + newValue + if m.ptr.CompareAndSwap(oldPoint, newPoint) { + if oldPoint != nil { + metricPointPool.Put(oldPoint) + } return } } @@ -171,36 +213,40 @@ type gauge struct { metric } -func (g *gauge) Submit(value float64) { - g.newSubmit.Store(true) - g.submitTime.Store(time.Now().Unix()) - g.value.Store(math.Float64bits(value)) +func (m *gauge) Submit(value float64) { + newPoint := metricPointPool.Get().(*metricPoint) + newPoint.time = time.Now() + newPoint.value = value + for { + oldPoint := m.ptr.Load() + if m.ptr.CompareAndSwap(oldPoint, newPoint) { + if oldPoint != nil { + metricPointPool.Put(oldPoint) + } + return + } + } } type rate struct { metric - intervalStart time.Time } -func (r *rate) Submit(value float64) { - r.newSubmit.Store(true) - r.submitTime.Store(time.Now().Unix()) +func (m *rate) Submit(newValue float64) { + newPoint := metricPointPool.Get().(*metricPoint) + newPoint.time = time.Now() for { - oldValue := r.Get() - newValue := oldValue + value - if r.value.CompareAndSwap(math.Float64bits(oldValue), math.Float64bits(newValue)) { + oldPoint := m.ptr.Load() + var oldValue float64 + if oldPoint != nil { + oldValue = oldPoint.value + } + newPoint.value = oldValue + newValue + if m.ptr.CompareAndSwap(oldPoint, newPoint) { + if oldPoint != nil { + metricPointPool.Put(oldPoint) + } return } } } - -func (r *rate) payload() transport.MetricData { - payload := r.metric.payload() - if payload.Metric == "" { - return payload - } - - payload.Interval = int64(time.Since(r.intervalStart).Seconds()) - payload.Points[0][1] = r.Get() / float64(payload.Interval) - return payload -} From 4fd7f2cae42b7689b94355545612e61e4d05c43c Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 6 Feb 2025 11:16:09 +0100 Subject: [PATCH 49/61] new metric implementation using atomic.Pointer and sync.Pool Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 4 + internal/newtelemetry/client_test.go | 55 ++++---- internal/newtelemetry/globalclient.go | 4 +- internal/newtelemetry/internal/ringbuffer.go | 12 +- internal/newtelemetry/metrics.go | 129 ++++++++++--------- 5 files changed, 114 insertions(+), 90 deletions(-) diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 2b703cce75..01ac852efc 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -62,6 +62,9 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client }, metrics: metrics{ skipAllowlist: config.Debug, + pool: sync.Pool{ + New: func() any { return &metricPoint{} }, + }, }, distributions: distributions{ skipAllowlist: config.Debug, @@ -194,6 +197,7 @@ func (c *client) Flush() { return } log.Warn("panic while flushing telemetry data, stopping telemetry: %v", r) + telemetryClientEnabled = false if gc, ok := GlobalClient().(*client); ok && gc == c { SwapClient(nil) } diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 5455ad49e4..2b0a25c51e 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -1271,6 +1271,12 @@ func BenchmarkLogs(b *testing.B) { }) } +type noopTransport struct{} + +func (noopTransport) RoundTrip(*http.Request) (*http.Response, error) { + return &http.Response{StatusCode: http.StatusOK}, nil +} + func BenchmarkWorstCaseScenarioFloodLogging(b *testing.B) { b.ReportAllocs() nbSameLogs := 10 @@ -1285,14 +1291,8 @@ func BenchmarkWorstCaseScenarioFloodLogging(b *testing.B) { // Empty transport to avoid sending data to the agent HTTPClient: &http.Client{ - Timeout: 5 * time.Second, - Transport: &testRoundTripper{ - roundTrip: func(_ *http.Request) (*http.Response, error) { - return &http.Response{ - StatusCode: http.StatusOK, - }, nil - }, - }, + Timeout: 5 * time.Second, + Transport: noopTransport{}, }, } @@ -1341,7 +1341,7 @@ func BenchmarkMetrics(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - c.Count(NamespaceTracers, "init_time", nil).Submit(1) + c.Count(NamespaceGeneral, "logs_created", nil).Submit(1) } }) @@ -1352,7 +1352,7 @@ func BenchmarkMetrics(b *testing.B) { defer c.Close() - handle := c.Count(NamespaceTracers, "init_time", nil) + handle := c.Count(NamespaceGeneral, "logs_created", nil) b.ResetTimer() for i := 0; i < b.N; i++ { @@ -1369,7 +1369,7 @@ func BenchmarkMetrics(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - c.Gauge(NamespaceTracers, "init_time", nil).Submit(1) + c.Gauge(NamespaceTracers, "stats_buckets", nil).Submit(1) } }) @@ -1380,7 +1380,7 @@ func BenchmarkMetrics(b *testing.B) { defer c.Close() - handle := c.Gauge(NamespaceTracers, "init_time", nil) + handle := c.Gauge(NamespaceTracers, "stats_buckets", nil) b.ResetTimer() for i := 0; i < b.N; i++ { @@ -1425,7 +1425,7 @@ func BenchmarkMetrics(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - c.Distribution(NamespaceTracers, "init_time", nil).Submit(1) + c.Distribution(NamespaceGeneral, "init_time", nil).Submit(1) } }) @@ -1436,7 +1436,7 @@ func BenchmarkMetrics(b *testing.B) { defer c.Close() - handle := c.Distribution(NamespaceTracers, "init_time", nil) + handle := c.Distribution(NamespaceGeneral, "init_time", nil) b.ResetTimer() for i := 0; i < b.N; i++ { @@ -1446,9 +1446,9 @@ func BenchmarkMetrics(b *testing.B) { } func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { - nbSameMetric := 10 - nbDifferentMetrics := 100 - nbGoroutines := 25 + nbSameMetric := 1000 + nbDifferentMetrics := 50 + nbGoroutines := 50 clientConfig := ClientConfig{ HeartbeatInterval: time.Hour, @@ -1459,14 +1459,8 @@ func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { // Empty transport to avoid sending data to the agent HTTPClient: &http.Client{ - Timeout: 5 * time.Second, - Transport: &testRoundTripper{ - roundTrip: func(_ *http.Request) (*http.Response, error) { - return &http.Response{ - StatusCode: http.StatusOK, - }, nil - }, - }, + Timeout: 5 * time.Second, + Transport: noopTransport{}, }, } @@ -1487,8 +1481,9 @@ func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { go func() { defer wg.Done() for j := 0; j < nbDifferentMetrics; j++ { + metricName := "init_time_" + strconv.Itoa(j) for k := 0; k < nbSameMetric; k++ { - c.Count(NamespaceTracers, "init_time", []string{"test:1"}).Submit(1) + c.Count(NamespaceGeneral, metricName, []string{"test:1"}).Submit(1) } } }() @@ -1512,15 +1507,17 @@ func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { for x := 0; x < b.N; x++ { var wg sync.WaitGroup - handle := c.Count(NamespaceTracers, "init_time", []string{"test:1"}) + handles := make([]MetricHandle, nbDifferentMetrics) + for i := 0; i < nbDifferentMetrics; i++ { + handles[i] = c.Count(NamespaceGeneral, "init_time_"+strconv.Itoa(i), []string{"test:1"}) + } for i := 0; i < nbGoroutines; i++ { wg.Add(1) go func() { defer wg.Done() for j := 0; j < nbDifferentMetrics; j++ { - for k := 0; k < nbSameMetric; k++ { - handle.Submit(1) + handles[j].Submit(1) } } }() diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 6cb4b678bd..2599ab4c5f 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -224,9 +224,11 @@ func globalClientCall(fun func(client Client)) { fun(*client) } +var noopMetricHandleInstance = noopMetricHandle{} + func globalClientNewMetric(namespace Namespace, kind transport.MetricType, name string, tags []string) MetricHandle { if Disabled() { - return noopMetricHandle{} + return noopMetricHandleInstance } maker := func(client Client) MetricHandle { diff --git a/internal/newtelemetry/internal/ringbuffer.go b/internal/newtelemetry/internal/ringbuffer.go index 622865cb64..406aed43ac 100644 --- a/internal/newtelemetry/internal/ringbuffer.go +++ b/internal/newtelemetry/internal/ringbuffer.go @@ -100,6 +100,11 @@ func (rq *RingQueue[T]) Dequeue() T { rq.mu.Lock() defer rq.mu.Unlock() + if rq.count == 0 { + var zero T + return zero + } + ret := rq.buffer[rq.head] // bitwise modulus rq.head = (rq.head + 1) % len(rq.buffer) @@ -140,7 +145,12 @@ func (rq *RingQueue[T]) Flush() []T { // ReleaseBuffer returns the buffer to the pool. func (rq *RingQueue[T]) ReleaseBuffer(buf []T) { - rq.pool.Put(buf[:cap(buf)]) // Make sure nobody reduced the length of the buffer + var zero T + buf = buf[:cap(buf)] // Make sure nobody reduced the length of the buffer + for i := range buf { + buf[i] = zero + } + rq.pool.Put(buf) } // IsEmpty returns true if the buffer is empty. diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go index 91ef809b0f..1155dff341 100644 --- a/internal/newtelemetry/metrics.go +++ b/internal/newtelemetry/metrics.go @@ -37,21 +37,21 @@ func (k metricKey) SplitTags() []string { func validateMetricKey(namespace Namespace, kind transport.MetricType, name string, tags []string) error { if len(name) == 0 { - return fmt.Errorf("telemetry: metric name with tags %v should be empty", tags) + return fmt.Errorf("metric name with tags %v should be empty", tags) } if !knownmetrics.IsKnownMetric(namespace, kind, name) { - return fmt.Errorf("telemetry: metric name %q of kind %q in namespace %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ + return fmt.Errorf("metric name %q of kind %q in namespace %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ "The metric will still be sent", name, string(kind), namespace) } for _, tag := range tags { if len(tag) == 0 { - return fmt.Errorf("telemetry: metric %q has should not have empty tags", name) + return fmt.Errorf("metric %q has should not have empty tags", name) } if strings.Contains(tag, ",") { - return fmt.Errorf("telemetry: metric %q tag %q should not contain commas", name, tag) + return fmt.Errorf("metric %q tag %q should not contain commas", name, tag) } } @@ -67,12 +67,13 @@ func newMetricKey(namespace Namespace, kind transport.MetricType, name string, t // metricsHandle is the internal equivalent of MetricHandle for Count/Rate/Gauge metrics that are sent via the payload [transport.GenerateMetrics]. type metricHandle interface { MetricHandle - payload() transport.MetricData + Payload() transport.MetricData } type metrics struct { store internal.TypedSyncMap[metricKey, metricHandle] skipAllowlist bool // Debugging feature to skip the allowlist of known metrics + pool sync.Pool } // LoadOrStore returns a MetricHandle for the given metric key. If the metric key does not exist, it will be created. @@ -85,11 +86,11 @@ func (m *metrics) LoadOrStore(namespace Namespace, kind transport.MetricType, na ) switch kind { case transport.CountMetric: - handle, loaded = m.store.LoadOrStore(key, &count{metric: metric{key: key}}) + handle, loaded = m.store.LoadOrStore(key, &count{metric: metric{key: key, pool: &m.pool}}) case transport.GaugeMetric: - handle, loaded = m.store.LoadOrStore(key, &gauge{metric: metric{key: key}}) + handle, loaded = m.store.LoadOrStore(key, &gauge{metric: metric{key: key, pool: &m.pool}}) case transport.RateMetric: - handle, loaded = m.store.LoadOrStore(key, &rate{metric: metric{key: key}}) + handle, loaded = m.store.LoadOrStore(key, &rate{count: count{metric: metric{key: key, pool: &m.pool}}}) if !loaded { // Initialize the interval start for rate metrics r := handle.(*rate) @@ -113,7 +114,7 @@ func (m *metrics) LoadOrStore(namespace Namespace, kind transport.MetricType, na func (m *metrics) Payload() transport.Payload { series := make([]transport.MetricData, 0, m.store.Len()) m.store.Range(func(_ metricKey, handle metricHandle) bool { - if payload := handle.payload(); payload.Metric != "" { + if payload := handle.Payload(); payload.Type != "" { series = append(series, payload) } return true @@ -131,16 +132,11 @@ type metricPoint struct { time time.Time } -var metricPointPool = sync.Pool{ - New: func() any { - return &metricPoint{value: math.NaN()} - }, -} - +// metric is a meta t type metric struct { - key metricKey - ptr atomic.Pointer[metricPoint] - intervalStart atomic.Pointer[time.Time] + key metricKey + ptr atomic.Pointer[metricPoint] + pool *sync.Pool } func (m *metric) Get() float64 { @@ -151,26 +147,18 @@ func (m *metric) Get() float64 { return math.NaN() } -func (m *metric) payload() transport.MetricData { +func (m *metric) Payload() transport.MetricData { point := m.ptr.Swap(nil) if point == nil { return transport.MetricData{} } + defer m.pool.Put(point) + return m.payload(point) +} - var ( - value = point.value - intervalStart = m.intervalStart.Swap(nil) - intervalSeconds float64 - ) - - // Rate metric only - if intervalStart != nil { - intervalSeconds = time.Since(*intervalStart).Seconds() - if int64(intervalSeconds) == 0 { // Interval for rate is too small, we prefer not sending data over sending something wrong - return transport.MetricData{} - } - - value = value / intervalSeconds +func (m *metric) payload(point *metricPoint) transport.MetricData { + if point == nil { + return transport.MetricData{} } return transport.MetricData{ @@ -179,19 +167,19 @@ func (m *metric) payload() transport.MetricData { Tags: m.key.SplitTags(), Type: m.key.kind, Common: knownmetrics.IsCommonMetric(m.key.namespace, m.key.kind, m.key.name), - Interval: int64(intervalSeconds), Points: [][2]any{ - {point.time.Unix(), value}, + {point.time.Unix(), point.value}, }, } } +// count is a metric that represents a single value that is incremented over time and flush and reset at zero when flushed type count struct { metric } func (m *count) Submit(newValue float64) { - newPoint := metricPointPool.Get().(*metricPoint) + newPoint := m.pool.Get().(*metricPoint) newPoint.time = time.Now() for { oldPoint := m.ptr.Load() @@ -202,51 +190,74 @@ func (m *count) Submit(newValue float64) { newPoint.value = oldValue + newValue if m.ptr.CompareAndSwap(oldPoint, newPoint) { if oldPoint != nil { - metricPointPool.Put(oldPoint) + m.pool.Put(oldPoint) } return } } } +// gauge is a metric that represents a single value at a point in time that is not incremental type gauge struct { metric } -func (m *gauge) Submit(value float64) { - newPoint := metricPointPool.Get().(*metricPoint) +func (g *gauge) Submit(value float64) { + newPoint := g.pool.Get().(*metricPoint) newPoint.time = time.Now() newPoint.value = value for { - oldPoint := m.ptr.Load() - if m.ptr.CompareAndSwap(oldPoint, newPoint) { + oldPoint := g.ptr.Load() + if g.ptr.CompareAndSwap(oldPoint, newPoint) { if oldPoint != nil { - metricPointPool.Put(oldPoint) + g.pool.Put(oldPoint) } return } } } +// rate is like a count metric but the value sent is divided by an interval of time that is also sent/ type rate struct { - metric + count + intervalStart atomic.Pointer[time.Time] } -func (m *rate) Submit(newValue float64) { - newPoint := metricPointPool.Get().(*metricPoint) - newPoint.time = time.Now() - for { - oldPoint := m.ptr.Load() - var oldValue float64 - if oldPoint != nil { - oldValue = oldPoint.value - } - newPoint.value = oldValue + newValue - if m.ptr.CompareAndSwap(oldPoint, newPoint) { - if oldPoint != nil { - metricPointPool.Put(oldPoint) - } - return - } +func (r *rate) Get() float64 { + sum := r.count.Get() + intervalStart := r.intervalStart.Load() + if intervalStart == nil { + return math.NaN() } + + intervalSeconds := time.Since(*intervalStart).Seconds() + if int64(intervalSeconds) == 0 { // Interval for rate is too small, we prefer not sending data over sending something wrong + return math.NaN() + } + + return sum / intervalSeconds +} + +func (r *rate) Payload() transport.MetricData { + now := time.Now() + intervalStart := r.intervalStart.Swap(&now) + if intervalStart == nil { + return transport.MetricData{} + } + + intervalSeconds := time.Since(*intervalStart).Seconds() + if int64(intervalSeconds) == 0 { // Interval for rate is too small, we prefer not sending data over sending something wrong + return transport.MetricData{} + } + + point := r.ptr.Swap(nil) + if point == nil { + return transport.MetricData{} + } + defer r.pool.Put(point) + + point.value /= intervalSeconds + payload := r.metric.payload(point) + payload.Interval = int64(intervalSeconds) + return payload } From a6a0bdb9690da0a8a7a53a2435da05eba2251f91 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 6 Feb 2025 11:29:15 +0100 Subject: [PATCH 50/61] removed mapper.wrapper type Signed-off-by: Eliott Bouhana --- .../internal/mapper/app_closing.go | 8 ++++---- .../internal/mapper/app_started.go | 8 ++++---- .../newtelemetry/internal/mapper/wrapper.go | 18 ------------------ 3 files changed, 8 insertions(+), 26 deletions(-) delete mode 100644 internal/newtelemetry/internal/mapper/wrapper.go diff --git a/internal/newtelemetry/internal/mapper/app_closing.go b/internal/newtelemetry/internal/mapper/app_closing.go index c3d1fd6b14..aeba9c7adc 100644 --- a/internal/newtelemetry/internal/mapper/app_closing.go +++ b/internal/newtelemetry/internal/mapper/app_closing.go @@ -10,14 +10,14 @@ import ( ) // NewAppClosingMapper returns a new Mapper that appends an AppClosing payload to the given payloads and calls the underlying Mapper with it. -func NewAppClosingMapper(underlying Mapper) Mapper { - return &appClosingEnricher{wrapper{underlying}} +func NewAppClosingMapper(next Mapper) Mapper { + return &appClosingEnricher{next: next} } type appClosingEnricher struct { - wrapper + next Mapper } func (t *appClosingEnricher) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { - return t.wrapper.Transform(append(payloads, transport.AppClosing{})) + return t.next.Transform(append(payloads, transport.AppClosing{})) } diff --git a/internal/newtelemetry/internal/mapper/app_started.go b/internal/newtelemetry/internal/mapper/app_started.go index 5e10831aa7..f281672a17 100644 --- a/internal/newtelemetry/internal/mapper/app_started.go +++ b/internal/newtelemetry/internal/mapper/app_started.go @@ -11,14 +11,14 @@ import ( ) type appStartedReducer struct { - wrapper + next Mapper } // NewAppStartedMapper returns a new Mapper that adds an AppStarted payload to the beginning of all payloads // and pass it down to irs underlying mapper. // The AppStarted payload ingest the [transport.AppClientConfigurationChange] and [transport.AppProductChange] payloads -func NewAppStartedMapper(underlying Mapper) Mapper { - return &appStartedReducer{wrapper{underlying}} +func NewAppStartedMapper(next Mapper) Mapper { + return &appStartedReducer{next: next} } func (t *appStartedReducer) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { @@ -43,6 +43,6 @@ func (t *appStartedReducer) Transform(payloads []transport.Payload) ([]transport } // The app-started event should be the first event in the payload and not in an message-batch - payloads, mapper := t.wrapper.Transform(payloadLefts) + payloads, mapper := t.next.Transform(payloadLefts) return append([]transport.Payload{appStarted}, payloads...), mapper } diff --git a/internal/newtelemetry/internal/mapper/wrapper.go b/internal/newtelemetry/internal/mapper/wrapper.go deleted file mode 100644 index c2c52901a3..0000000000 --- a/internal/newtelemetry/internal/mapper/wrapper.go +++ /dev/null @@ -1,18 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2025 Datadog, Inc. - -package mapper - -import ( - "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" -) - -type wrapper struct { - underlying Mapper -} - -func (w wrapper) Transform(payloads []transport.Payload) ([]transport.Payload, Mapper) { - return w.underlying.Transform(payloads) -} From c4080ec434e8be1077248226ef4f81b5b93df639 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 6 Feb 2025 11:30:09 +0100 Subject: [PATCH 51/61] simplify knownmetrics generator Signed-off-by: Eliott Bouhana --- .../internal/knownmetrics/generator/generator.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/internal/newtelemetry/internal/knownmetrics/generator/generator.go b/internal/newtelemetry/internal/knownmetrics/generator/generator.go index a343a52a31..1cdf13ff38 100644 --- a/internal/newtelemetry/internal/knownmetrics/generator/generator.go +++ b/internal/newtelemetry/internal/knownmetrics/generator/generator.go @@ -11,7 +11,6 @@ import ( "encoding/json" "flag" "fmt" - "io" "net/http" "os" "os/exec" @@ -32,7 +31,7 @@ const ( ) func base64Decode(encoded string) string { - decoded, _ := io.ReadAll(base64.NewDecoder(base64.StdEncoding, strings.NewReader(encoded))) + decoded, _ := base64.StdEncoding.DecodeString(encoded) return string(decoded) } @@ -78,7 +77,7 @@ func downloadFromDdgo(remoteURL, localPath, branch, token string, getMetricNames return strings.Compare(i.Name, j.Name) }) - fp, err := os.OpenFile(localPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + fp, err := os.Create(localPath) if err != nil { return err } From 07591374212ae0e4e095a7fd1f16af5ea92a772e Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 6 Feb 2025 11:50:38 +0100 Subject: [PATCH 52/61] added a new TypedSyncPool for use in the RingQueue to aggregate usage of multiple sync.pool + suggestions Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 5 ++- internal/newtelemetry/distributions.go | 6 +++- internal/newtelemetry/internal/recorder.go | 13 ++++--- internal/newtelemetry/internal/ringbuffer.go | 17 +++++++--- .../newtelemetry/internal/transport/origin.go | 8 ++--- .../newtelemetry/internal/typedsyncpool.go | 34 +++++++++++++++++++ internal/newtelemetry/metrics.go | 15 ++++---- 7 files changed, 73 insertions(+), 25 deletions(-) create mode 100644 internal/newtelemetry/internal/typedsyncpool.go diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 01ac852efc..4afa1aefac 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -62,13 +62,12 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client }, metrics: metrics{ skipAllowlist: config.Debug, - pool: sync.Pool{ - New: func() any { return &metricPoint{} }, - }, + pool: internal.NewSyncPool(func() *metricPoint { return &metricPoint{} }), }, distributions: distributions{ skipAllowlist: config.Debug, queueSize: config.DistributionsSize, + pool: internal.NewSyncPool(func() []float64 { return make([]float64, config.DistributionsSize.Min) }), }, } diff --git a/internal/newtelemetry/distributions.go b/internal/newtelemetry/distributions.go index ff7b20ff42..da4b4a96d7 100644 --- a/internal/newtelemetry/distributions.go +++ b/internal/newtelemetry/distributions.go @@ -16,6 +16,7 @@ import ( type distributions struct { store internal.TypedSyncMap[metricKey, *distribution] + pool *internal.SyncPool[[]float64] skipAllowlist bool // Debugging feature to skip the allowlist of known metrics queueSize Range[int] } @@ -24,7 +25,10 @@ type distributions struct { func (d *distributions) LoadOrStore(namespace Namespace, name string, tags []string) MetricHandle { kind := transport.DistMetric key := newMetricKey(namespace, kind, name, tags) - handle, loaded := d.store.LoadOrStore(key, &distribution{key: key, values: internal.NewRingQueue[float64](d.queueSize.Min, d.queueSize.Max)}) + handle, loaded := d.store.LoadOrStore(key, &distribution{ + key: key, + values: internal.NewRingQueueWithPool[float64](d.queueSize.Min, d.queueSize.Max, d.pool), + }) if !loaded { // The metric is new: validate and log issues about it if err := validateMetricKey(namespace, kind, name, tags); err != nil { log.Warn("telemetry: %v", err) diff --git a/internal/newtelemetry/internal/recorder.go b/internal/newtelemetry/internal/recorder.go index 672bac1ecd..d993ae9758 100644 --- a/internal/newtelemetry/internal/recorder.go +++ b/internal/newtelemetry/internal/recorder.go @@ -11,16 +11,21 @@ type Recorder[T any] struct { queue *RingQueue[func(T)] } +// TODO: tweak these values once we get telemetry data from the telemetry client +const ( + minQueueCap = 16 // Initial queue capacity + maxQueueCap = 512 // Maximum queue capacity +) + // NewRecorder creates a new [Recorder] instance. with 512 as the maximum number of recorded functions before overflowing. func NewRecorder[T any]() Recorder[T] { return Recorder[T]{ - // TODO: tweak this value once we get telemetry data from the telemetry client - queue: NewRingQueue[func(T)](16, 512), + queue: NewRingQueue[func(T)](minQueueCap, maxQueueCap), } } -// Record takes a function and records it in the Recorder's queue. If the queue is full, it returns false. -// Once Replay is called, all recorded functions will be replayed with object T as an argument in order of recording. +// Record takes a function and records it in the [Recorder]'s queue. If the queue is full, it returns false. +// Once [Recorder.Replay] is called, all recorded functions will be replayed with object T as an argument in order of recording. func (r Recorder[T]) Record(f func(T)) bool { if r.queue == nil { return true diff --git a/internal/newtelemetry/internal/ringbuffer.go b/internal/newtelemetry/internal/ringbuffer.go index 406aed43ac..02cefac403 100644 --- a/internal/newtelemetry/internal/ringbuffer.go +++ b/internal/newtelemetry/internal/ringbuffer.go @@ -16,7 +16,7 @@ type RingQueue[T any] struct { // mu is the lock for the buffer, head and tail. mu sync.Mutex // pool is the pool of buffers. Normally there should only be one or 2 buffers in the pool. - pool sync.Pool + pool *SyncPool[[]T] maxBufferSize int } @@ -25,9 +25,16 @@ func NewRingQueue[T any](minSize, maxSize int) *RingQueue[T] { return &RingQueue[T]{ buffer: make([]T, minSize), maxBufferSize: maxSize, - pool: sync.Pool{ - New: func() any { return make([]T, minSize) }, - }, + pool: NewSyncPool[[]T](func() []T { return make([]T, minSize) }), + } +} + +// NewRingQueueWithPool creates a new RingQueue with a minimum size, a maximum size and a pool. Make sure the pool is properly initialized with the right type +func NewRingQueueWithPool[T any](minSize, maxSize int, pool *SyncPool[[]T]) *RingQueue[T] { + return &RingQueue[T]{ + buffer: make([]T, minSize), + maxBufferSize: maxSize, + pool: pool, } } @@ -121,7 +128,7 @@ func (rq *RingQueue[T]) GetBuffer() []T { func (rq *RingQueue[T]) getBufferLocked() []T { prevBuf := rq.buffer - rq.buffer = rq.pool.Get().([]T) + rq.buffer = rq.pool.Get() rq.head, rq.tail, rq.count = 0, 0, 0 return prevBuf } diff --git a/internal/newtelemetry/internal/transport/origin.go b/internal/newtelemetry/internal/transport/origin.go index b12ee41d72..01c30f541e 100644 --- a/internal/newtelemetry/internal/transport/origin.go +++ b/internal/newtelemetry/internal/transport/origin.go @@ -10,8 +10,8 @@ type Origin string const ( OriginDefault Origin = "default" - OriginCode = "code" - OriginDDConfig = "dd_config" - OriginEnvVar = "env_var" - OriginRemoteConfig = "remote_config" + OriginCode Origin = "code" + OriginDDConfig Origin = "dd_config" + OriginEnvVar Origin = "env_var" + OriginRemoteConfig Origin = "remote_config" ) diff --git a/internal/newtelemetry/internal/typedsyncpool.go b/internal/newtelemetry/internal/typedsyncpool.go new file mode 100644 index 0000000000..69b45c5f7c --- /dev/null +++ b/internal/newtelemetry/internal/typedsyncpool.go @@ -0,0 +1,34 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "sync" +) + +// SyncPool is a wrapper around sync.Pool that provides type safety. +type SyncPool[T any] struct { + pool *sync.Pool +} + +// NewSyncPool creates a new SyncPool with the given new function. +func NewSyncPool[T any](new func() T) *SyncPool[T] { + return &SyncPool[T]{ + pool: &sync.Pool{ + New: func() any { + return new() + }, + }, + } +} + +func (sp *SyncPool[T]) Get() T { + return sp.pool.Get().(T) +} + +func (sp *SyncPool[T]) Put(v T) { + sp.pool.Put(v) +} diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go index 1155dff341..433237c9fb 100644 --- a/internal/newtelemetry/metrics.go +++ b/internal/newtelemetry/metrics.go @@ -10,7 +10,6 @@ import ( "math" "sort" "strings" - "sync" "sync/atomic" "time" @@ -72,8 +71,8 @@ type metricHandle interface { type metrics struct { store internal.TypedSyncMap[metricKey, metricHandle] + pool *internal.SyncPool[*metricPoint] skipAllowlist bool // Debugging feature to skip the allowlist of known metrics - pool sync.Pool } // LoadOrStore returns a MetricHandle for the given metric key. If the metric key does not exist, it will be created. @@ -86,11 +85,11 @@ func (m *metrics) LoadOrStore(namespace Namespace, kind transport.MetricType, na ) switch kind { case transport.CountMetric: - handle, loaded = m.store.LoadOrStore(key, &count{metric: metric{key: key, pool: &m.pool}}) + handle, loaded = m.store.LoadOrStore(key, &count{metric: metric{key: key, pool: m.pool}}) case transport.GaugeMetric: - handle, loaded = m.store.LoadOrStore(key, &gauge{metric: metric{key: key, pool: &m.pool}}) + handle, loaded = m.store.LoadOrStore(key, &gauge{metric: metric{key: key, pool: m.pool}}) case transport.RateMetric: - handle, loaded = m.store.LoadOrStore(key, &rate{count: count{metric: metric{key: key, pool: &m.pool}}}) + handle, loaded = m.store.LoadOrStore(key, &rate{count: count{metric: metric{key: key, pool: m.pool}}}) if !loaded { // Initialize the interval start for rate metrics r := handle.(*rate) @@ -136,7 +135,7 @@ type metricPoint struct { type metric struct { key metricKey ptr atomic.Pointer[metricPoint] - pool *sync.Pool + pool *internal.SyncPool[*metricPoint] } func (m *metric) Get() float64 { @@ -179,7 +178,7 @@ type count struct { } func (m *count) Submit(newValue float64) { - newPoint := m.pool.Get().(*metricPoint) + newPoint := m.pool.Get() newPoint.time = time.Now() for { oldPoint := m.ptr.Load() @@ -203,7 +202,7 @@ type gauge struct { } func (g *gauge) Submit(value float64) { - newPoint := g.pool.Get().(*metricPoint) + newPoint := g.pool.Get() newPoint.time = time.Now() newPoint.value = value for { From a5b0d0cfd63f79aa7a12a96d8660e2a8e8f6ebeb Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Thu, 6 Feb 2025 16:32:13 +0100 Subject: [PATCH 53/61] apply last suggestions from @RomainMuller Signed-off-by: Eliott Bouhana --- internal/newtelemetry/api.go | 16 +++- internal/newtelemetry/client.go | 82 ++++++++--------- internal/newtelemetry/client_config.go | 87 ++++++++++--------- internal/newtelemetry/client_test.go | 10 +-- internal/newtelemetry/configuration.go | 1 + internal/newtelemetry/dependencies.go | 23 ++--- internal/newtelemetry/distributions.go | 12 +-- internal/newtelemetry/globalclient.go | 42 ++++----- internal/newtelemetry/integration.go | 20 ++--- internal/newtelemetry/internal/range.go | 31 +++++++ internal/newtelemetry/internal/recorder.go | 12 +-- internal/newtelemetry/internal/ringbuffer.go | 74 +++++++++------- .../newtelemetry/internal/ringbuffer_test.go | 48 ++++++---- .../internal/{typedsyncmap.go => syncmap.go} | 23 +++-- .../{typedsyncpool.go => syncpool.go} | 4 +- internal/newtelemetry/internal/ticker.go | 44 ++++++---- internal/newtelemetry/internal/writer.go | 17 ++-- internal/newtelemetry/log/log.go | 16 ++-- internal/newtelemetry/logger.go | 12 +-- internal/newtelemetry/metrics.go | 2 +- 20 files changed, 327 insertions(+), 249 deletions(-) create mode 100644 internal/newtelemetry/internal/range.go rename internal/newtelemetry/internal/{typedsyncmap.go => syncmap.go} (56%) rename internal/newtelemetry/internal/{typedsyncpool.go => syncpool.go} (81%) diff --git a/internal/newtelemetry/api.go b/internal/newtelemetry/api.go index fb24b1db09..41cba3e6c7 100644 --- a/internal/newtelemetry/api.go +++ b/internal/newtelemetry/api.go @@ -15,7 +15,7 @@ import ( // different products used by the same application type Namespace = transport.Namespace -// Goland is having a hard time with the following const block, it keeps deleting the type of the consts +// Goland is having a hard time with the following const block, it keeps deleting the type // //goland:noinspection GoVarAndConstTypeMayBeOmitted const ( @@ -32,7 +32,7 @@ const ( // Origin describes the source of a configuration change type Origin = transport.Origin -// Goland is having a hard time with the following const block, it keeps deleting the type of the consts +// Goland is having a hard time with the following const block, it keeps deleting the type // //goland:noinspection GoVarAndConstTypeMayBeOmitted const ( @@ -43,6 +43,18 @@ const ( OriginRemoteConfig Origin = transport.OriginRemoteConfig ) +// LogLevel describes the level of a log message +type LogLevel = transport.LogLevel + +// Goland is having a hard time with the following const block, it keeps deleting the type +// +//goland:noinspection GoVarAndConstTypeMayBeOmitted +const ( + LogDebug LogLevel = transport.LogLevelDebug + LogWarn LogLevel = transport.LogLevelWarn + LogError LogLevel = transport.LogLevelError +) + // MetricHandle can be used to submit different values for the same metric. // MetricHandle is used to reduce lock contention when submitting metrics. // This can also be used ephemerally to submit a single metric value like this: diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 4afa1aefac..f10bce9812 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -55,7 +55,7 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client writer: writer, clientConfig: config, flushMapper: mapper.NewDefaultMapper(config.HeartbeatInterval, config.ExtendedHeartbeatInterval), - payloadQueue: internal.NewRingQueue[transport.Payload](config.PayloadQueueSize.Min, config.PayloadQueueSize.Max), + payloadQueue: internal.NewRingQueue[transport.Payload](config.PayloadQueueSize), dependencies: dependencies{ DependencyLoader: config.DependencyLoader, @@ -86,19 +86,22 @@ func newClient(tracerConfig internal.TracerConfig, config ClientConfig) (*client client.dataSources = append(client.dataSources, &client.metrics, &client.distributions) } - client.flushTicker = internal.NewTicker(func() { - client.Flush() - }, config.FlushInterval.Min, config.FlushInterval.Max) + client.flushTicker = internal.NewTicker(client.Flush, config.FlushInterval) return client, nil } +// dataSources is where the data that will be flushed is coming from. I.e metrics, logs, configurations, etc. +type dataSource interface { + Payload() transport.Payload +} + type client struct { tracerConfig internal.TracerConfig - writer internal.Writer clientConfig ClientConfig // Data sources + dataSources []dataSource integrations integrations products products configuration configuration @@ -106,17 +109,18 @@ type client struct { logger logger metrics metrics distributions distributions - dataSources []interface { - Payload() transport.Payload - } - - flushTicker *internal.Ticker // flushMapper is the transformer to use for the next flush on the gathered bodies on this tick flushMapper mapper.Mapper flushMapperMu sync.Mutex - // payloadQueue is used when we cannot flush previously built bodies + // flushTicker is the ticker that triggers a call to client.Flush every flush interval + flushTicker *internal.Ticker + + // writer is the writer to use to send the payloads to the backend or the agent + writer internal.Writer + + // payloadQueue is used when we cannot flush previously built payload for multiple reasons. payloadQueue *internal.RingQueue[transport.Payload] } @@ -196,7 +200,7 @@ func (c *client) Flush() { return } log.Warn("panic while flushing telemetry data, stopping telemetry: %v", r) - telemetryClientEnabled = false + telemetryClientDisabled = true if gc, ok := GlobalClient().(*client); ok && gc == c { SwapClient(nil) } @@ -219,8 +223,8 @@ func (c *client) transform(payloads []transport.Payload) []transport.Payload { return payloads } -// flush sends all the data sources to the writer by let them flow through the given transformer function. -// The transformer function is used to transform the bodies before sending them to the writer. +// flush sends all the data sources to the writer after having sent them through the [transform] function. +// It returns the amount of bytes sent to the writer. func (c *client) flush(payloads []transport.Payload) (int, error) { payloads = c.transform(payloads) @@ -228,12 +232,7 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { return 0, nil } - if c.payloadQueue.IsEmpty() { - c.flushTicker.CanDecreaseSpeed() - } else if c.payloadQueue.IsFull() { - c.flushTicker.CanIncreaseSpeed() - } - + emptyQueue := c.payloadQueue.IsEmpty() // We enqueue the new payloads to preserve the order of the payloads c.payloadQueue.Enqueue(payloads...) payloads = c.payloadQueue.Flush() @@ -249,7 +248,11 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { c.computeFlushMetrics(results, err) if err != nil { // We stop flushing when we encounter a fatal error, put the bodies in the queue and return the error - log.Error("error while flushing telemetry data: %v", err) + if results[len(results)-1].StatusCode == 413 { // If the payload is too large we have no way to divide it, we can only skip it... + log.Warn("telemetry: tried sending a payload that was too large, dropping it") + continue + } + log.Warn("error while flushing telemetry data: %v", err) c.payloadQueue.Enqueue(payloads[i:]...) return nbBytes, err } @@ -266,16 +269,16 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { nbBytes += successfulCall.PayloadByteSize } + if emptyQueue && !speedIncreased { // If we did not send a very big payload, and we have no payloads + c.flushTicker.CanDecreaseSpeed() + } + if len(failedCalls) > 0 { - errName := "error" - if len(failedCalls) > 1 { - errName = "errors" - } var errs []error for _, call := range failedCalls { errs = append(errs, call.Error) } - log.Debug("non-fatal %s while flushing telemetry data: %v", errName, errors.Join(errs...)) + log.Debug("non-fatal error(s) while flushing telemetry data: %v", errors.Join(errs...)) } return nbBytes, nil @@ -283,45 +286,46 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { // computeFlushMetrics computes and submits the metrics for the flush operation using the output from the writer.Flush method. // It will submit the number of requests, responses, errors, the number of bytes sent and the duration of the call that was successful. -func (c *client) computeFlushMetrics(results []internal.EndpointRequestResult, err error) { - if !c.clientConfig.InternalMetricsEnabled { +func (c *client) computeFlushMetrics(results []internal.EndpointRequestResult, reason error) { + if !c.clientConfig.internalMetricsEnabled { return } indexToEndpoint := func(i int) string { - if i == 0 { + if i == 0 && c.clientConfig.AgentURL != "" { return "agent" } return "agentless" } for i, result := range results { - c.Count(transport.NamespaceTelemetry, "telemetry_api.requests", []string{"endpoint:" + indexToEndpoint(i)}).Submit(1) + endpoint := "endpoint:" + indexToEndpoint(i) + c.Count(transport.NamespaceTelemetry, "telemetry_api.requests", []string{endpoint}).Submit(1) if result.StatusCode != 0 { - c.Count(transport.NamespaceTelemetry, "telemetry_api.responses", []string{"endpoint:" + indexToEndpoint(i), "status_code:" + strconv.Itoa(result.StatusCode)}).Submit(1) + c.Count(transport.NamespaceTelemetry, "telemetry_api.responses", []string{endpoint, "status_code:" + strconv.Itoa(result.StatusCode)}).Submit(1) } if result.Error != nil { - typ := "network" + typ := "type:network" if os.IsTimeout(result.Error) { - typ = "timeout" + typ = "type:timeout" } var writerStatusCodeError *internal.WriterStatusCodeError if errors.As(result.Error, &writerStatusCodeError) { - typ = "status_code" + typ = "type:status_code" } - c.Count(transport.NamespaceTelemetry, "telemetry_api.errors", []string{"endpoint:" + indexToEndpoint(i), "type:" + typ}).Submit(1) + c.Count(transport.NamespaceTelemetry, "telemetry_api.errors", []string{endpoint, typ}).Submit(1) } } - if err != nil { + if reason != nil { return } successfulCall := results[len(results)-1] - endpoint := indexToEndpoint(len(results) - 1) - c.Distribution(transport.NamespaceTelemetry, "telemetry_api.bytes", []string{"endpoint:" + endpoint}).Submit(float64(successfulCall.PayloadByteSize)) - c.Distribution(transport.NamespaceTelemetry, "telemetry_api.ms", []string{"endpoint:" + endpoint}).Submit(float64(successfulCall.CallDuration.Milliseconds())) + endpoint := "endpoint:" + indexToEndpoint(len(results)-1) + c.Distribution(transport.NamespaceTelemetry, "telemetry_api.bytes", []string{endpoint}).Submit(float64(successfulCall.PayloadByteSize)) + c.Distribution(transport.NamespaceTelemetry, "telemetry_api.ms", []string{endpoint}).Submit(float64(successfulCall.CallDuration.Milliseconds())) } func (c *client) AppStart() { diff --git a/internal/newtelemetry/client_config.go b/internal/newtelemetry/client_config.go index 2dde65f215..f4d6751c0a 100644 --- a/internal/newtelemetry/client_config.go +++ b/internal/newtelemetry/client_config.go @@ -17,30 +17,22 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" ) -type Range[T any] struct { - Min T - Max T -} - type ClientConfig struct { // DependencyLoader determines how dependency data is sent via telemetry. // If nil, the library should not send the app-dependencies-loaded event. // The default value is [debug.ReadBuildInfo] since Application Security Monitoring uses this data to detect vulnerabilities in the ASM-SCA product - // This can be controlled via the env var DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED + // This can only be controlled via the env var DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED DependencyLoader func() (*debug.BuildInfo, bool) // MetricsEnabled determines whether metrics are sent via telemetry. // If false, libraries should not send the generate-metrics or distributions events. - // This can be controlled via the env var DD_TELEMETRY_METRICS_ENABLED + // This can only be controlled via the env var DD_TELEMETRY_METRICS_ENABLED MetricsEnabled bool // LogsEnabled determines whether logs are sent via telemetry. - // This can be controlled via the env var DD_TELEMETRY_LOG_COLLECTION_ENABLED + // This can only be controlled via the env var DD_TELEMETRY_LOG_COLLECTION_ENABLED LogsEnabled bool - // InternalMetricsEnabled determines whether client stats metrics are sent via telemetry. - InternalMetricsEnabled bool - // AgentlessURL is the full URL to the agentless telemetry endpoint. (optional) // Defaults to https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry AgentlessURL string @@ -61,16 +53,16 @@ type ClientConfig struct { // FlushInterval is the interval at which the client flushes the data. // By default, the client will start to Flush at 60s intervals and will reduce the interval based on the load till it hit 15s - // Both values cannot be higher than 60s because the heartbeat need to be sent at least every 60s. - FlushInterval Range[time.Duration] + // Both values cannot be higher than 60s because the heartbeat need to be sent at least every 60s. Values will be clamped otherwise. + FlushInterval internal.Range[time.Duration] - // PayloadQueueSize is the size of the payload queue. - PayloadQueueSize Range[int] + // PayloadQueueSize is the size of the payload queue. Default range is [4, 32]. + PayloadQueueSize internal.Range[int] - // DistributionsSize is the size of the distribution queue. - DistributionsSize Range[int] + // DistributionsSize is the size of the distribution queue. Default range is [2^8, 2^14]. + DistributionsSize internal.Range[int] - // Debug enables debug mode for the telemetry clientt and sent it to the backend so it logs the request + // Debug enables debug mode for the telemetry client and sent it to the backend so it logs the request Debug bool // APIKey is the API key to use for sending telemetry to the agentless endpoint. (using DD_API_KEY env var by default) @@ -80,6 +72,9 @@ type ClientConfig struct { // This is necessary because backend won't allow bodies larger than 5MB. // The default value here will be 2MB to take into account the large inaccuracy in estimating the size of bodies EarlyFlushPayloadSize int + + // internalMetricsEnabled determines whether client stats metrics are sent via telemetry. Default to true. + internalMetricsEnabled bool } var ( @@ -88,29 +83,37 @@ var ( agentlessURL = "https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry" // defaultHeartbeatInterval is the default interval at which the agent sends a heartbeat. - defaultHeartbeatInterval = 60 // seconds + defaultHeartbeatInterval = time.Minute // seconds // defaultExtendedHeartbeatInterval is the default interval at which the agent sends an extended heartbeat. defaultExtendedHeartbeatInterval = 24 * time.Hour // defaultMinFlushInterval is the default interval at which the client flushes the data. - defaultMinFlushInterval = 15.0 * time.Second + defaultFlushIntervalRange = internal.Range[time.Duration]{ + Min: 15 * time.Second, + Max: 60 * time.Second, + } - // defaultMaxFlushInterval is the default interval at which the client flushes the data. - defaultMaxFlushInterval = 60.0 * time.Second + defaultAuthorizedHearbeatRange = internal.Range[time.Duration]{ + Min: time.Microsecond, + Max: time.Minute, + } agentProxyAPIPath = "/telemetry/proxy/api/v2/apmtelemetry" defaultEarlyFlushPayloadSize = 2 * 1024 * 1024 // 2MB - // maxPayloadSize is specified by the backend to be 5MB. The goal is to never reach this value otherwise our data will be silently dropped. - maxPayloadSize = 5 * 1024 * 1024 // 5MB + // authorizedPayloadSize.Max is specified by the backend to be 5MB. The goal is to never reach this value otherwise our data will be silently dropped. + authorizedPayloadSize = internal.Range[int]{ + Min: 0, + Max: 5 * 1024 * 1024, // 5MB + } // TODO: tweak this value once we get real telemetry data from the telemetry client // This means that, by default, we incur dataloss if we spend ~30mins without flushing, considering we send telemetry data this looks reasonable. // This also means that in the worst case scenario, memory-wise, the app is stabilized after running for 30mins. // Ideally both values should be power of 2 because of the way the ring queue is implemented as it's growing - defaultPayloadQueueSize = Range[int]{ + defaultPayloadQueueSize = internal.Range[int]{ Min: 4, Max: 32, } @@ -120,31 +123,26 @@ var ( // Considering we add a point per user request on a simple http server, we would be losing data after 2^14 requests per minute or about 280 requests per second or under 3ms per request. // If this throughput is constant, the telemetry client flush ticker speed will increase to, at best, double twice to flush 15 seconds of data each time. // Which will bring our max throughput to 1100 points per second or about 750µs per request. - distributionsSize = Range[int]{ + distributionsSize = internal.Range[int]{ Min: 1 << 8, Max: 1 << 14, } ) -// clamp squeezes a value between a minimum and maximum value. -func clamp[T ~int64](value, minVal, maxVal T) T { - return max(min(maxVal, value), minVal) -} - func (config ClientConfig) validateConfig() error { - if config.HeartbeatInterval > 60*time.Second { + if config.HeartbeatInterval > time.Minute { return fmt.Errorf("HeartbeatInterval cannot be higher than 60s, got %v", config.HeartbeatInterval) } - if config.FlushInterval.Min > 60*time.Second || config.FlushInterval.Max > 60*time.Second { + if config.FlushInterval.Min > time.Minute || config.FlushInterval.Max > time.Minute { return fmt.Errorf("FlushIntervalRange cannot be higher than 60s, got Min: %v, Max: %v", config.FlushInterval.Min, config.FlushInterval.Max) } - if config.FlushInterval.Min > config.FlushInterval.Max { + if !config.FlushInterval.IsOrdered() { return fmt.Errorf("FlushIntervalRange Min cannot be higher than Max, got Min: %v, Max: %v", config.FlushInterval.Min, config.FlushInterval.Max) } - if config.EarlyFlushPayloadSize > maxPayloadSize || config.EarlyFlushPayloadSize <= 0 { + if !authorizedPayloadSize.Contains(config.EarlyFlushPayloadSize) { return fmt.Errorf("EarlyFlushPayloadSize must be between 0 and 5MB, got %v", config.EarlyFlushPayloadSize) } @@ -159,24 +157,27 @@ func defaultConfig(config ClientConfig) ClientConfig { if config.APIKey == "" { config.APIKey = os.Getenv("DD_API_KEY") + if config.APIKey == "" { + config.APIKey = os.Getenv("DD-API-KEY") + } } if config.HeartbeatInterval == 0 { - config.HeartbeatInterval = time.Duration(globalinternal.IntEnv("DD_TELEMETRY_HEARTBEAT_INTERVAL", defaultHeartbeatInterval)) * time.Second + config.HeartbeatInterval = globalinternal.DurationEnv("DD_TELEMETRY_HEARTBEAT_INTERVAL", defaultHeartbeatInterval) } else { - config.HeartbeatInterval = clamp(config.HeartbeatInterval, time.Microsecond, 60*time.Second) + config.HeartbeatInterval = defaultAuthorizedHearbeatRange.Clamp(config.HeartbeatInterval) } if config.FlushInterval.Min == 0 { - config.FlushInterval.Min = defaultMinFlushInterval + config.FlushInterval.Min = defaultFlushIntervalRange.Min } else { - config.FlushInterval.Min = clamp(config.FlushInterval.Min, time.Microsecond, 60*time.Second) + config.FlushInterval.Min = defaultAuthorizedHearbeatRange.Clamp(config.FlushInterval.Min) } if config.FlushInterval.Max == 0 { - config.FlushInterval.Max = defaultMaxFlushInterval + config.FlushInterval.Max = defaultFlushIntervalRange.Max } else { - config.FlushInterval.Max = clamp(config.FlushInterval.Max, time.Microsecond, 60*time.Second) + config.FlushInterval.Max = defaultAuthorizedHearbeatRange.Clamp(config.FlushInterval.Max) } if config.DependencyLoader == nil && globalinternal.BoolEnv("DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED", true) { @@ -191,8 +192,8 @@ func defaultConfig(config ClientConfig) ClientConfig { config.LogsEnabled = globalinternal.BoolEnv("DD_TELEMETRY_LOG_COLLECTION_ENABLED", true) } - if !config.InternalMetricsEnabled { - config.InternalMetricsEnabled = true + if !config.internalMetricsEnabled { + config.internalMetricsEnabled = true } if config.EarlyFlushPayloadSize == 0 { diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 2b0a25c51e..e3b01d4598 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -962,8 +962,8 @@ func TestClientFlush(t *testing.T) { config := defaultConfig(test.clientConfig) config.AgentURL = "http://localhost:8126" config.DependencyLoader = test.clientConfig.DependencyLoader // Don't use the default dependency loader - config.InternalMetricsEnabled = test.clientConfig.InternalMetricsEnabled // only enabled internal metrics when explicitly set - config.InternalMetricsEnabled = false + config.internalMetricsEnabled = test.clientConfig.internalMetricsEnabled // only enabled internal metrics when explicitly set + config.internalMetricsEnabled = false c, err := newClient(tracerConfig, config) require.NoError(t, err) defer c.Close() @@ -1286,7 +1286,7 @@ func BenchmarkWorstCaseScenarioFloodLogging(b *testing.B) { clientConfig := ClientConfig{ HeartbeatInterval: time.Hour, ExtendedHeartbeatInterval: time.Hour, - FlushInterval: Range[time.Duration]{Min: time.Second, Max: time.Second}, + FlushInterval: internal.Range[time.Duration]{Min: time.Second, Max: time.Second}, AgentURL: "http://localhost:8126", // Empty transport to avoid sending data to the agent @@ -1453,8 +1453,8 @@ func BenchmarkWorstCaseScenarioFloodMetrics(b *testing.B) { clientConfig := ClientConfig{ HeartbeatInterval: time.Hour, ExtendedHeartbeatInterval: time.Hour, - FlushInterval: Range[time.Duration]{Min: time.Second, Max: time.Second}, - DistributionsSize: Range[int]{256, -1}, + FlushInterval: internal.Range[time.Duration]{Min: time.Second, Max: time.Second}, + DistributionsSize: internal.Range[int]{Min: 256, Max: -1}, AgentURL: "http://localhost:8126", // Empty transport to avoid sending data to the agent diff --git a/internal/newtelemetry/configuration.go b/internal/newtelemetry/configuration.go index c51c0100bb..3cdbe76477 100644 --- a/internal/newtelemetry/configuration.go +++ b/internal/newtelemetry/configuration.go @@ -58,6 +58,7 @@ func (c *configuration) Payload() transport.Payload { c.seqID++ delete(c.config, conf.Name) } + return transport.AppClientConfigurationChange{ Configuration: configs, } diff --git a/internal/newtelemetry/dependencies.go b/internal/newtelemetry/dependencies.go index b23d746083..10e2bee3d3 100644 --- a/internal/newtelemetry/dependencies.go +++ b/internal/newtelemetry/dependencies.go @@ -24,23 +24,17 @@ type dependencies struct { } func (d *dependencies) Payload() transport.Payload { - if d.DependencyLoader == nil { - return nil - } - - d.mu.Lock() - defer d.mu.Unlock() - d.once.Do(func() { deps := d.loadDeps() // Requirement described here: // https://github.com/DataDog/instrumentation-telemetry-api-docs/blob/main/GeneratedDocumentation/ApiDocs/v2/producing-telemetry.md#app-dependencies-loaded - if len(deps) > 2000 { + const maxPerPayload = 2000 + if len(deps) > maxPerPayload { log.Debug("telemetry: too many (%d) dependencies to send, sending over multiple bodies", len(deps)) } - for i := 0; i < len(deps); i += 2000 { - end := min(i+2000, len(deps)) + for i := 0; i < len(deps); i += maxPerPayload { + end := min(i+maxPerPayload, len(deps)) d.payloads = append(d.payloads, transport.AppDependenciesLoaded{ Dependencies: deps[i:end], @@ -48,10 +42,14 @@ func (d *dependencies) Payload() transport.Payload { } }) + d.mu.Lock() + defer d.mu.Unlock() + if len(d.payloads) == 0 { return nil } + // return payloads one by one payloadZero := d.payloads[0] if len(d.payloads) == 1 { d.payloads = nil @@ -64,7 +62,12 @@ func (d *dependencies) Payload() transport.Payload { return payloadZero } +// loadDeps returns the dependencies from the DependencyLoader, formatted for telemetry intake. func (d *dependencies) loadDeps() []transport.Dependency { + if d.DependencyLoader == nil { + return nil + } + deps, ok := d.DependencyLoader() if !ok { log.Debug("telemetry: could not read build info, no dependencies will be reported") diff --git a/internal/newtelemetry/distributions.go b/internal/newtelemetry/distributions.go index da4b4a96d7..d7712902a8 100644 --- a/internal/newtelemetry/distributions.go +++ b/internal/newtelemetry/distributions.go @@ -15,10 +15,10 @@ import ( ) type distributions struct { - store internal.TypedSyncMap[metricKey, *distribution] + store internal.SyncMap[metricKey, *distribution] pool *internal.SyncPool[[]float64] + queueSize internal.Range[int] skipAllowlist bool // Debugging feature to skip the allowlist of known metrics - queueSize Range[int] } // LoadOrStore returns a MetricHandle for the given distribution metric. If the metric key does not exist, it will be created. @@ -27,7 +27,7 @@ func (d *distributions) LoadOrStore(namespace Namespace, name string, tags []str key := newMetricKey(namespace, kind, name, tags) handle, loaded := d.store.LoadOrStore(key, &distribution{ key: key, - values: internal.NewRingQueueWithPool[float64](d.queueSize.Min, d.queueSize.Max, d.pool), + values: internal.NewRingQueueWithPool[float64](d.queueSize, d.pool), }) if !loaded { // The metric is new: validate and log issues about it if err := validateMetricKey(namespace, kind, name, tags); err != nil { @@ -57,13 +57,13 @@ func (d *distributions) Payload() transport.Payload { type distribution struct { key metricKey values *internal.RingQueue[float64] -} -var distrLogLossOnce sync.Once + logLoss sync.Once +} func (d *distribution) Submit(value float64) { if !d.values.Enqueue(value) { - distrLogLossOnce.Do(func() { + d.logLoss.Do(func() { log.Debug("telemetry: distribution %q is losing values because the buffer is full", d.key.name) }) } diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index 2599ab4c5f..c44b1d46d6 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -22,7 +22,7 @@ var ( globalClientRecorder = internal.NewRecorder[Client]() // metricsHandleSwappablePointers contains all the swappableMetricHandle, used to replay actions done before the actual MetricHandle is set - metricsHandleSwappablePointers internal.TypedSyncMap[metricKey, *swappableMetricHandle] + metricsHandleSwappablePointers internal.SyncMap[metricKey, *swappableMetricHandle] ) // GlobalClient returns the global telemetry client. @@ -37,19 +37,20 @@ func GlobalClient() Client { // StartApp starts the telemetry client with the given client send the app-started telemetry and sets it as the global (*client) // then calls client.Flush on the client asynchronously. func StartApp(client Client) { - if Disabled() || GlobalClient() != nil { + if Disabled() { return } - SwapClient(client) + if GlobalClient() != nil { + log.Debug("telemetry: StartApp called multiple times, ignoring") + return + } - globalClientRecorder.Replay(client) + SwapClient(client) client.AppStart() - go func() { - client.Flush() - }() + go client.Flush() } // SwapClient swaps the global client with the given client and Flush the old (*client). @@ -63,6 +64,7 @@ func SwapClient(client Client) { } if client != nil { + globalClientRecorder.Replay(client) // Swap all metrics hot pointers to the new MetricHandle metricsHandleSwappablePointers.Range(func(_ metricKey, value *swappableMetricHandle) bool { value.swap(value.maker(client)) @@ -89,10 +91,6 @@ func MockClient(client Client) func() { // StopApp creates the app-stopped telemetry, adding to the queue and Flush all the queue before stopping the (*client). func StopApp() { - if Disabled() || GlobalClient() == nil { - return - } - if client := globalClient.Swap(nil); client != nil && *client != nil { (*client).AppStop() (*client).Flush() @@ -100,38 +98,40 @@ func StopApp() { } } -var telemetryClientEnabled = globalinternal.BoolEnv("DD_INSTRUMENTATION_TELEMETRY_ENABLED", true) +var telemetryClientDisabled = !globalinternal.BoolEnv("DD_INSTRUMENTATION_TELEMETRY_ENABLED", true) // Disabled returns whether instrumentation telemetry is disabled // according to the DD_INSTRUMENTATION_TELEMETRY_ENABLED env var func Disabled() bool { - return !telemetryClientEnabled + return telemetryClientDisabled } // Count creates a new metric handle for the given parameters that can be used to submit values. -// Count will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. -// The MetricHandle is then swapped with the actual MetricHandle once the client is started. +// Count will always return a [MetricHandle], even if telemetry is disabled or the client has yet to start. +// The [MetricHandle] is then swapped with the actual [MetricHandle] once the client is started. func Count(namespace Namespace, name string, tags []string) MetricHandle { return globalClientNewMetric(namespace, transport.CountMetric, name, tags) } // Rate creates a new metric handle for the given parameters that can be used to submit values. -// Rate will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. -// The MetricHandle is then swapped with the actual MetricHandle once the client is started. +// Rate will always return a [MetricHandle], even if telemetry is disabled or the client has yet to start. +// The [MetricHandle] is then swapped with the actual [MetricHandle] once the client is started. func Rate(namespace Namespace, name string, tags []string) MetricHandle { return globalClientNewMetric(namespace, transport.RateMetric, name, tags) } // Gauge creates a new metric handle for the given parameters that can be used to submit values. -// Gauge will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. -// The MetricHandle is then swapped with the actual MetricHandle once the client is started. +// Gauge will always return a [MetricHandle], even if telemetry is disabled or the client has yet to start. +// The [MetricHandle] is then swapped with the actual [MetricHandle] once the client is started. func Gauge(namespace Namespace, name string, tags []string) MetricHandle { return globalClientNewMetric(namespace, transport.GaugeMetric, name, tags) } // Distribution creates a new metric handle for the given parameters that can be used to submit values. -// Distribution will always return a MetricHandle, even if telemetry is disabled or the client has yet to start. -// The MetricHandle is then swapped with the actual MetricHandle once the client is started. +// Distribution will always return a [MetricHandle], even if telemetry is disabled or the client has yet to start. +// The [MetricHandle] is then swapped with the actual [MetricHandle] once the client is started. +// The Get() method of the [MetricHandle] will return the last value submitted. +// Distribution MetricHandle is advised to be held in a variable more than the rest of the metric types to avoid too many useless allocations. func Distribution(namespace Namespace, name string, tags []string) MetricHandle { return globalClientNewMetric(namespace, transport.DistMetric, name, tags) } diff --git a/internal/newtelemetry/integration.go b/internal/newtelemetry/integration.go index c62fa6ec1d..d77ef116cd 100644 --- a/internal/newtelemetry/integration.go +++ b/internal/newtelemetry/integration.go @@ -13,13 +13,18 @@ import ( type integrations struct { mu sync.Mutex - integrations []Integration + integrations []transport.Integration } func (i *integrations) Add(integration Integration) { i.mu.Lock() defer i.mu.Unlock() - i.integrations = append(i.integrations, integration) + i.integrations = append(i.integrations, transport.Integration{ + Name: integration.Name, + Version: integration.Version, + Enabled: integration.Error == "", // no error means the integration was enabled successfully + Error: integration.Error, + }) } func (i *integrations) Payload() transport.Payload { @@ -28,16 +33,7 @@ func (i *integrations) Payload() transport.Payload { if len(i.integrations) == 0 { return nil } - - integrations := make([]transport.Integration, len(i.integrations)) - for idx, integration := range i.integrations { - integrations[idx] = transport.Integration{ - Name: integration.Name, - Version: integration.Version, - Enabled: integration.Error == "", // no error means the integration was enabled successfully - Error: integration.Error, - } - } + integrations := i.integrations i.integrations = nil return transport.AppIntegrationChange{ Integrations: integrations, diff --git a/internal/newtelemetry/internal/range.go b/internal/newtelemetry/internal/range.go new file mode 100644 index 0000000000..c084dc51de --- /dev/null +++ b/internal/newtelemetry/internal/range.go @@ -0,0 +1,31 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package internal + +import ( + "cmp" +) + +// Range is a type that represents a range of values. +type Range[T cmp.Ordered] struct { + Min T + Max T +} + +// IsOrdered checks if the range is ordered. e.g. Min <= Max. +func (r Range[T]) IsOrdered() bool { + return r.Min <= r.Max +} + +// Contains checks if a value is within the range. +func (r Range[T]) Contains(value T) bool { + return value >= r.Min && value <= r.Max +} + +// Clamp squeezes a value between a minimum and maximum value. +func (r Range[T]) Clamp(value T) T { + return max(min(r.Max, value), r.Min) +} diff --git a/internal/newtelemetry/internal/recorder.go b/internal/newtelemetry/internal/recorder.go index d993ae9758..69c6fdd7f4 100644 --- a/internal/newtelemetry/internal/recorder.go +++ b/internal/newtelemetry/internal/recorder.go @@ -12,15 +12,15 @@ type Recorder[T any] struct { } // TODO: tweak these values once we get telemetry data from the telemetry client -const ( - minQueueCap = 16 // Initial queue capacity - maxQueueCap = 512 // Maximum queue capacity -) +var queueCap = Range[int]{ + Min: 16, // Initial queue capacity + Max: 512, // Maximum queue capacity +} // NewRecorder creates a new [Recorder] instance. with 512 as the maximum number of recorded functions before overflowing. func NewRecorder[T any]() Recorder[T] { return Recorder[T]{ - queue: NewRingQueue[func(T)](minQueueCap, maxQueueCap), + queue: NewRingQueue[func(T)](queueCap), } } @@ -49,5 +49,5 @@ func (r Recorder[T]) Replay(t T) { // Clear clears the Recorder's queue. func (r Recorder[T]) Clear() { - r.queue.ReleaseBuffer(r.queue.GetBuffer()) + r.queue.Clear() } diff --git a/internal/newtelemetry/internal/ringbuffer.go b/internal/newtelemetry/internal/ringbuffer.go index 02cefac403..aeccfb3e06 100644 --- a/internal/newtelemetry/internal/ringbuffer.go +++ b/internal/newtelemetry/internal/ringbuffer.go @@ -16,35 +16,40 @@ type RingQueue[T any] struct { // mu is the lock for the buffer, head and tail. mu sync.Mutex // pool is the pool of buffers. Normally there should only be one or 2 buffers in the pool. - pool *SyncPool[[]T] - maxBufferSize int + pool *SyncPool[[]T] + // BufferSizes is the range of buffer sizes that the ring queue can have. + BufferSizes Range[int] } // NewRingQueue creates a new RingQueue with a minimum size and a maximum size. -func NewRingQueue[T any](minSize, maxSize int) *RingQueue[T] { +func NewRingQueue[T any](rang Range[int]) *RingQueue[T] { return &RingQueue[T]{ - buffer: make([]T, minSize), - maxBufferSize: maxSize, - pool: NewSyncPool[[]T](func() []T { return make([]T, minSize) }), + buffer: make([]T, rang.Min), + pool: NewSyncPool[[]T](func() []T { return make([]T, rang.Min) }), + BufferSizes: rang, } } // NewRingQueueWithPool creates a new RingQueue with a minimum size, a maximum size and a pool. Make sure the pool is properly initialized with the right type -func NewRingQueueWithPool[T any](minSize, maxSize int, pool *SyncPool[[]T]) *RingQueue[T] { +func NewRingQueueWithPool[T any](rang Range[int], pool *SyncPool[[]T]) *RingQueue[T] { return &RingQueue[T]{ - buffer: make([]T, minSize), - maxBufferSize: maxSize, - pool: pool, + buffer: make([]T, rang.Min), + pool: pool, + BufferSizes: rang, } } // Length returns the number of elements currently stored in the queue. func (rq *RingQueue[T]) Length() int { + rq.mu.Lock() + defer rq.mu.Unlock() + return rq.count } func (rq *RingQueue[T]) resizeLocked() { - newBuf := make([]T, min(rq.count*2, rq.maxBufferSize)) + newBuf := make([]T, rq.BufferSizes.Clamp(rq.count*2)) + defer rq.releaseBuffer(rq.buffer) if rq.tail > rq.head { copy(newBuf, rq.buffer[rq.head:rq.tail]) @@ -61,7 +66,7 @@ func (rq *RingQueue[T]) resizeLocked() { func (rq *RingQueue[T]) enqueueLocked(elem T) bool { spaceLeft := true if rq.count == len(rq.buffer) { - if len(rq.buffer) == rq.maxBufferSize { + if len(rq.buffer) == rq.BufferSizes.Max { spaceLeft = false // bitwise modulus rq.head = (rq.head + 1) % len(rq.buffer) @@ -81,20 +86,21 @@ func (rq *RingQueue[T]) enqueueLocked(elem T) bool { func (rq *RingQueue[T]) ReversePeek() T { rq.mu.Lock() defer rq.mu.Unlock() + if rq.count == 0 { + var zero T + return zero + } return rq.buffer[(rq.tail-1+len(rq.buffer))%len(rq.buffer)] } -// Peek returns the first element that was enqueued without removing it. -func (rq *RingQueue[T]) Peek() T { - rq.mu.Lock() - defer rq.mu.Unlock() - return rq.buffer[rq.head] -} - -// Enqueue adds one or multiple values to the buffer. returns false if the buffer is full. +// Enqueue adds one or multiple values to the buffer. Returns false if at least one item had to be pulled out from the queue to make space for new ones func (rq *RingQueue[T]) Enqueue(vals ...T) bool { rq.mu.Lock() defer rq.mu.Unlock() + if len(vals) == 0 { + return rq.IsFull() + } + spaceLeft := true for _, val := range vals { spaceLeft = rq.enqueueLocked(val) @@ -119,8 +125,8 @@ func (rq *RingQueue[T]) Dequeue() T { return ret } -// GetBuffer returns the current buffer and resets it. -func (rq *RingQueue[T]) GetBuffer() []T { +// getBuffer returns the current buffer and resets it. +func (rq *RingQueue[T]) getBuffer() []T { rq.mu.Lock() defer rq.mu.Unlock() return rq.getBufferLocked() @@ -140,7 +146,7 @@ func (rq *RingQueue[T]) Flush() []T { buf := rq.getBufferLocked() rq.mu.Unlock() - defer rq.ReleaseBuffer(buf) + defer rq.releaseBuffer(buf) copyBuf := make([]T, count) for i := 0; i < count; i++ { @@ -150,8 +156,8 @@ func (rq *RingQueue[T]) Flush() []T { return copyBuf } -// ReleaseBuffer returns the buffer to the pool. -func (rq *RingQueue[T]) ReleaseBuffer(buf []T) { +// releaseBuffer returns the buffer to the pool. +func (rq *RingQueue[T]) releaseBuffer(buf []T) { var zero T buf = buf[:cap(buf)] // Make sure nobody reduced the length of the buffer for i := range buf { @@ -162,10 +168,7 @@ func (rq *RingQueue[T]) ReleaseBuffer(buf []T) { // IsEmpty returns true if the buffer is empty. func (rq *RingQueue[T]) IsEmpty() bool { - rq.mu.Lock() - defer rq.mu.Unlock() - - return rq.count == 0 + return rq.Length() == 0 } // IsFull returns true if the buffer is full and cannot accept more elements. @@ -173,5 +176,16 @@ func (rq *RingQueue[T]) IsFull() bool { rq.mu.Lock() defer rq.mu.Unlock() - return len(rq.buffer) == rq.count && len(rq.buffer) == rq.maxBufferSize + return len(rq.buffer) == rq.count && len(rq.buffer) == rq.BufferSizes.Max +} + +// Clear removes all elements from the buffer. +func (rq *RingQueue[T]) Clear() { + rq.mu.Lock() + defer rq.mu.Unlock() + rq.head, rq.tail, rq.count = 0, 0, 0 + var zero T + for i := range rq.buffer { + rq.buffer[i] = zero + } } diff --git a/internal/newtelemetry/internal/ringbuffer_test.go b/internal/newtelemetry/internal/ringbuffer_test.go index 8e07d36fca..9b98eebc10 100644 --- a/internal/newtelemetry/internal/ringbuffer_test.go +++ b/internal/newtelemetry/internal/ringbuffer_test.go @@ -12,35 +12,35 @@ import ( ) func TestEnqueueSingleElement(t *testing.T) { - queue := NewRingQueue[int](3, 5) + queue := NewRingQueue[int](Range[int]{3, 5}) success := queue.Enqueue(1) assert.True(t, success) assert.Equal(t, 1, queue.ReversePeek()) } func TestEnqueueMultipleElements(t *testing.T) { - queue := NewRingQueue[int](3, 5) + queue := NewRingQueue[int](Range[int]{3, 5}) success := queue.Enqueue(1, 2, 3) assert.True(t, success) assert.Equal(t, 3, queue.ReversePeek()) } func TestEnqueueBeyondCapacity(t *testing.T) { - queue := NewRingQueue[int](3, 5) + queue := NewRingQueue[int](Range[int]{3, 5}) success := queue.Enqueue(1, 2, 3, 4, 5, 6) assert.False(t, success) assert.Equal(t, 6, queue.ReversePeek()) } func TestDequeueSingleElement(t *testing.T) { - queue := NewRingQueue[int](3, 5) + queue := NewRingQueue[int](Range[int]{3, 5}) queue.Enqueue(1) val := queue.Dequeue() assert.Equal(t, 1, val) } func TestDequeueMultipleElements(t *testing.T) { - queue := NewRingQueue[int](3, 5) + queue := NewRingQueue[int](Range[int]{3, 5}) queue.Enqueue(1, 2, 3) val1 := queue.Dequeue() val2 := queue.Dequeue() @@ -48,22 +48,32 @@ func TestDequeueMultipleElements(t *testing.T) { assert.Equal(t, 2, val2) } +func TestDequeueFromEmptyQueueAfterBeingFull(t *testing.T) { + queue := NewRingQueue[int](Range[int]{1, 1}) + assert.True(t, queue.Enqueue(1)) + assert.False(t, queue.Enqueue(2)) + assert.Equal(t, []int{2}, queue.Flush()) + + // Should return the zero value for int + assert.Equal(t, 0, queue.Dequeue()) +} + func TestDequeueFromEmptyQueue(t *testing.T) { - queue := NewRingQueue[int](3, 5) + queue := NewRingQueue[int](Range[int]{3, 5}) val := queue.Dequeue() assert.Equal(t, 0, val) // Assuming zero value for int } func TestGetBufferAndReset(t *testing.T) { - queue := NewRingQueue[int](3, 5) + queue := NewRingQueue[int](Range[int]{3, 5}) queue.Enqueue(1, 2, 3) - buffer := queue.GetBuffer() + buffer := queue.getBuffer() assert.Equal(t, []int{1, 2, 3}, buffer) assert.True(t, queue.IsEmpty()) } func TestFlushAndReset(t *testing.T) { - queue := NewRingQueue[int](3, 5) + queue := NewRingQueue[int](Range[int]{3, 5}) queue.Enqueue(1, 2, 3) buffer := queue.Flush() assert.Equal(t, []int{1, 2, 3}, buffer) @@ -71,30 +81,30 @@ func TestFlushAndReset(t *testing.T) { } func TestIsEmptyWhenQueueIsEmpty(t *testing.T) { - queue := NewRingQueue[int](3, 5) + queue := NewRingQueue[int](Range[int]{3, 5}) assert.True(t, queue.IsEmpty()) } func TestIsEmptyWhenQueueIsNotEmpty(t *testing.T) { - queue := NewRingQueue[int](3, 5) + queue := NewRingQueue[int](Range[int]{3, 5}) queue.Enqueue(1) assert.False(t, queue.IsEmpty()) } func TestIsFullWhenQueueIsFull(t *testing.T) { - queue := NewRingQueue[int](3, 5) + queue := NewRingQueue[int](Range[int]{3, 5}) queue.Enqueue(1, 2, 3, 4, 5) assert.True(t, queue.IsFull()) } func TestIsFullWhenQueueIsNotFull(t *testing.T) { - queue := NewRingQueue[int](3, 5) + queue := NewRingQueue[int](Range[int]{3, 5}) queue.Enqueue(1, 2, 3) assert.False(t, queue.IsFull()) } func TestEnqueueToFullQueue(t *testing.T) { - queue := NewRingQueue[int](3, 3) + queue := NewRingQueue[int](Range[int]{3, 3}) success := queue.Enqueue(1, 2, 3) assert.True(t, success) success = queue.Enqueue(4) @@ -103,7 +113,7 @@ func TestEnqueueToFullQueue(t *testing.T) { } func TestDequeueFromFullQueue(t *testing.T) { - queue := NewRingQueue[int](3, 3) + queue := NewRingQueue[int](Range[int]{3, 3}) queue.Enqueue(1, 2, 3) val := queue.Dequeue() assert.Equal(t, 1, val) @@ -111,7 +121,7 @@ func TestDequeueFromFullQueue(t *testing.T) { } func TestEnqueueAfterDequeue(t *testing.T) { - queue := NewRingQueue[int](3, 3) + queue := NewRingQueue[int](Range[int]{3, 3}) queue.Enqueue(1, 2, 3) queue.Dequeue() success := queue.Enqueue(4) @@ -120,7 +130,7 @@ func TestEnqueueAfterDequeue(t *testing.T) { } func TestDequeueAllElements(t *testing.T) { - queue := NewRingQueue[int](3, 3) + queue := NewRingQueue[int](Range[int]{3, 3}) queue.Enqueue(1, 2, 3) val1 := queue.Dequeue() val2 := queue.Dequeue() @@ -132,7 +142,7 @@ func TestDequeueAllElements(t *testing.T) { } func TestEnqueueAndDequeueInterleaved(t *testing.T) { - queue := NewRingQueue[int](3, 3) + queue := NewRingQueue[int](Range[int]{3, 3}) queue.Enqueue(1) val1 := queue.Dequeue() queue.Enqueue(2) @@ -146,7 +156,7 @@ func TestEnqueueAndDequeueInterleaved(t *testing.T) { } func TestEnqueueWithResize(t *testing.T) { - queue := NewRingQueue[int](2, 4) + queue := NewRingQueue[int](Range[int]{2, 4}) queue.Enqueue(1, 2) assert.Equal(t, 2, len(queue.buffer)) queue.Enqueue(3) diff --git a/internal/newtelemetry/internal/typedsyncmap.go b/internal/newtelemetry/internal/syncmap.go similarity index 56% rename from internal/newtelemetry/internal/typedsyncmap.go rename to internal/newtelemetry/internal/syncmap.go index 14f9c97682..24482917f3 100644 --- a/internal/newtelemetry/internal/typedsyncmap.go +++ b/internal/newtelemetry/internal/syncmap.go @@ -9,11 +9,11 @@ import ( "sync" ) -type TypedSyncMap[K comparable, V any] struct { +type SyncMap[K comparable, V any] struct { sync.Map } -func (m *TypedSyncMap[K, V]) Load(key K) (value V, ok bool) { +func (m *SyncMap[K, V]) Load(key K) (value V, ok bool) { v, ok := m.Map.Load(key) if !ok { return @@ -22,32 +22,31 @@ func (m *TypedSyncMap[K, V]) Load(key K) (value V, ok bool) { return } -func (m *TypedSyncMap[K, V]) Range(f func(key K, value V) bool) { +func (m *SyncMap[K, V]) Range(f func(key K, value V) bool) { m.Map.Range(func(key, value interface{}) bool { return f(key.(K), value.(V)) }) } -func (m *TypedSyncMap[K, V]) Len() int { +func (m *SyncMap[K, V]) Len() int { count := 0 - m.Map.Range(func(_, _ interface{}) bool { + m.Map.Range(func(_, _ any) bool { count++ return true }) return count } -func (m *TypedSyncMap[K, V]) LoadOrStore(key K, value V) (actual V, loaded bool) { +func (m *SyncMap[K, V]) LoadOrStore(key K, value V) (actual V, loaded bool) { v, loaded := m.Map.LoadOrStore(key, value) - actual = v.(V) - return + return v.(V), loaded } -func (m *TypedSyncMap[K, V]) LoadAndDelete(key K) (value V, loaded bool) { +func (m *SyncMap[K, V]) LoadAndDelete(key K) (value V, loaded bool) { v, loaded := m.Map.LoadAndDelete(key) if !loaded { - return + var zero V + return zero, loaded } - value = v.(V) - return + return v.(V), true } diff --git a/internal/newtelemetry/internal/typedsyncpool.go b/internal/newtelemetry/internal/syncpool.go similarity index 81% rename from internal/newtelemetry/internal/typedsyncpool.go rename to internal/newtelemetry/internal/syncpool.go index 69b45c5f7c..a4379ccdc4 100644 --- a/internal/newtelemetry/internal/typedsyncpool.go +++ b/internal/newtelemetry/internal/syncpool.go @@ -9,12 +9,12 @@ import ( "sync" ) -// SyncPool is a wrapper around sync.Pool that provides type safety. +// SyncPool is a wrapper around [sync.Pool] that provides type safety. type SyncPool[T any] struct { pool *sync.Pool } -// NewSyncPool creates a new SyncPool with the given new function. +// NewSyncPool creates a new Pool with the given new function. func NewSyncPool[T any](new func() T) *SyncPool[T] { return &SyncPool[T]{ pool: &sync.Pool{ diff --git a/internal/newtelemetry/internal/ticker.go b/internal/newtelemetry/internal/ticker.go index 6b5011c06e..2ccfaa9199 100644 --- a/internal/newtelemetry/internal/ticker.go +++ b/internal/newtelemetry/internal/ticker.go @@ -13,30 +13,26 @@ import ( type TickFunc func() type Ticker struct { - *time.Ticker + ticker *time.Ticker tickSpeedMu sync.Mutex tickSpeed time.Duration - maxInterval time.Duration - minInterval time.Duration + interval Range[time.Duration] tickFunc TickFunc } -func NewTicker(tickFunc TickFunc, minInterval, maxInterval time.Duration) *Ticker { +func NewTicker(tickFunc TickFunc, interval Range[time.Duration]) *Ticker { ticker := &Ticker{ - Ticker: time.NewTicker(maxInterval), - tickSpeed: maxInterval, - - maxInterval: maxInterval, - minInterval: minInterval, - - tickFunc: tickFunc, + ticker: time.NewTicker(interval.Max), + tickSpeed: interval.Max, + interval: interval, + tickFunc: tickFunc, } go func() { - for range ticker.C { + for range ticker.ticker.C { tickFunc() } }() @@ -48,14 +44,30 @@ func (t *Ticker) CanIncreaseSpeed() { t.tickSpeedMu.Lock() defer t.tickSpeedMu.Unlock() - t.tickSpeed = max(t.tickSpeed/2, t.minInterval) - t.Reset(t.tickSpeed) + oldTickSpeed := t.tickSpeed + t.tickSpeed = t.interval.Clamp(t.tickSpeed / 2) + + if oldTickSpeed == t.tickSpeed { + return + } + + t.ticker.Reset(t.tickSpeed) } func (t *Ticker) CanDecreaseSpeed() { t.tickSpeedMu.Lock() defer t.tickSpeedMu.Unlock() - t.tickSpeed = min(t.tickSpeed*2, t.maxInterval) - t.Reset(t.tickSpeed) + oldTickSpeed := t.tickSpeed + t.tickSpeed = t.interval.Clamp(t.tickSpeed * 2) + + if oldTickSpeed == t.tickSpeed { + return + } + + t.ticker.Reset(t.tickSpeed) +} + +func (t *Ticker) Stop() { + t.ticker.Stop() } diff --git a/internal/newtelemetry/internal/writer.go b/internal/newtelemetry/internal/writer.go index 36722bb4fd..b26c476748 100644 --- a/internal/newtelemetry/internal/writer.go +++ b/internal/newtelemetry/internal/writer.go @@ -21,6 +21,7 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/globalconfig" "gopkg.in/DataDog/dd-trace-go.v1/internal/hostname" + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal/transport" "gopkg.in/DataDog/dd-trace-go.v1/internal/osinfo" "gopkg.in/DataDog/dd-trace-go.v1/internal/version" @@ -122,6 +123,10 @@ type WriterConfig struct { } func NewWriter(config WriterConfig) (Writer, error) { + if len(config.Endpoints) == 0 { + return nil, fmt.Errorf("telemetry/writer: no endpoints provided") + } + if config.HTTPClient == nil { config.HTTPClient = defaultHTTPClient } @@ -132,10 +137,7 @@ func NewWriter(config WriterConfig) (Writer, error) { copyClient := *config.HTTPClient config.HTTPClient = ©Client config.HTTPClient.Timeout = 5 * time.Second - } - - if len(config.Endpoints) == 0 { - return nil, fmt.Errorf("telemetry/writer: no endpoints provided") + log.Debug("telemetry/writer: client timeout was higher than 5 seconds, clamping it to 5 seconds") } body := newBody(config.TracerConfig, config.Debug) @@ -191,17 +193,16 @@ func preBakeRequest(body *transport.Body, endpoint *http.Request) *http.Request // newRequest creates a new http.Request with the given payload and the necessary headers. func (w *writer) newRequest(endpoint *http.Request, payload transport.Payload) *http.Request { - pipeReader, pipeWriter := io.Pipe() - request := endpoint.Clone(context.Background()) - w.body.SeqID++ w.body.TracerTime = time.Now().Unix() w.body.RequestType = payload.RequestType() w.body.Payload = payload - request.Body = pipeReader + request := endpoint.Clone(context.Background()) request.Header.Set("DD-Telemetry-Request-Type", string(payload.RequestType())) + pipeReader, pipeWriter := io.Pipe() + request.Body = pipeReader go func() { // No need to wait on this because the http client will close the pipeReader which will close the pipeWriter and finish the goroutine pipeWriter.CloseWithError(json.NewEncoder(pipeWriter).Encode(w.body)) diff --git a/internal/newtelemetry/log/log.go b/internal/newtelemetry/log/log.go index 332004bf73..eabe43e9e1 100644 --- a/internal/newtelemetry/log/log.go +++ b/internal/newtelemetry/log/log.go @@ -11,7 +11,7 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry" ) -func divideArgs(args ...any) ([]newtelemetry.LogOption, []any) { +func divideArgs(args []any) ([]newtelemetry.LogOption, []any) { if len(args) == 0 { return nil, nil } @@ -30,18 +30,20 @@ func divideArgs(args ...any) ([]newtelemetry.LogOption, []any) { // Debug sends a telemetry payload with a debug log message to the backend. func Debug(format string, args ...any) { - options, fmtArgs := divideArgs(args...) - newtelemetry.Log(newtelemetry.LogDebug, fmt.Sprintf(format, fmtArgs...), options...) + log(newtelemetry.LogDebug, format, args) } // Warn sends a telemetry payload with a warning log message to the backend. func Warn(format string, args ...any) { - options, fmtArgs := divideArgs(args...) - newtelemetry.Log(newtelemetry.LogWarn, fmt.Sprintf(format, fmtArgs...), options...) + log(newtelemetry.LogWarn, format, args) } // Error sends a telemetry payload with an error log message to the backend. func Error(format string, args ...any) { - options, fmtArgs := divideArgs(args...) - newtelemetry.Log(newtelemetry.LogError, fmt.Sprintf(format, fmtArgs...), options...) + log(newtelemetry.LogError, format, args) +} + +func log(lvl newtelemetry.LogLevel, format string, args []any) { + opts, fmtArgs := divideArgs(args) + newtelemetry.Log(lvl, fmt.Sprintf(format, fmtArgs...), opts...) } diff --git a/internal/newtelemetry/logger.go b/internal/newtelemetry/logger.go index f65b9d1a1a..b9a424d7d4 100644 --- a/internal/newtelemetry/logger.go +++ b/internal/newtelemetry/logger.go @@ -31,7 +31,7 @@ func WithTags(tags []string) LogOption { // that is generated inside the WithStacktrace function. Logs demultiplication does not take the stacktrace into account. // This means that a log that has been demultiplicated will only show of the first log. func WithStacktrace() LogOption { - buf := make([]byte, 1<<12) + buf := make([]byte, 4_096) buf = buf[:runtime.Stack(buf, false)] return func(_ *loggerKey, value *loggerValue) { if value == nil { @@ -41,14 +41,6 @@ func WithStacktrace() LogOption { } } -type LogLevel = transport.LogLevel - -const ( - LogDebug = transport.LogLevelDebug - LogWarn = transport.LogLevelWarn - LogError = transport.LogLevelError -) - type loggerKey struct { tags string message string @@ -62,7 +54,7 @@ type loggerValue struct { } type logger struct { - store internal.TypedSyncMap[loggerKey, *loggerValue] + store internal.SyncMap[loggerKey, *loggerValue] } func (logger *logger) Add(level LogLevel, text string, opts ...LogOption) { diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go index 433237c9fb..b9c57fd064 100644 --- a/internal/newtelemetry/metrics.go +++ b/internal/newtelemetry/metrics.go @@ -70,7 +70,7 @@ type metricHandle interface { } type metrics struct { - store internal.TypedSyncMap[metricKey, metricHandle] + store internal.SyncMap[metricKey, metricHandle] pool *internal.SyncPool[*metricPoint] skipAllowlist bool // Debugging feature to skip the allowlist of known metrics } From 3dd727bd000bd06af2a9d99e2dff58eab069c704 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Fri, 7 Feb 2025 17:25:19 +0100 Subject: [PATCH 54/61] fix issue where reducing the value of DD_TELEMETRY_HEARTBEAT_INTERVAL would not reduce the value of the flushing interval Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client_config.go | 18 +++++++++++------- internal/newtelemetry/internal/range.go | 8 ++++++++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/internal/newtelemetry/client_config.go b/internal/newtelemetry/client_config.go index f4d6751c0a..c13ac0b7c8 100644 --- a/internal/newtelemetry/client_config.go +++ b/internal/newtelemetry/client_config.go @@ -83,7 +83,7 @@ var ( agentlessURL = "https://instrumentation-telemetry-intake.datadoghq.com/api/v2/apmtelemetry" // defaultHeartbeatInterval is the default interval at which the agent sends a heartbeat. - defaultHeartbeatInterval = time.Minute // seconds + defaultHeartbeatInterval = time.Minute // defaultExtendedHeartbeatInterval is the default interval at which the agent sends an extended heartbeat. defaultExtendedHeartbeatInterval = 24 * time.Hour @@ -162,12 +162,6 @@ func defaultConfig(config ClientConfig) ClientConfig { } } - if config.HeartbeatInterval == 0 { - config.HeartbeatInterval = globalinternal.DurationEnv("DD_TELEMETRY_HEARTBEAT_INTERVAL", defaultHeartbeatInterval) - } else { - config.HeartbeatInterval = defaultAuthorizedHearbeatRange.Clamp(config.HeartbeatInterval) - } - if config.FlushInterval.Min == 0 { config.FlushInterval.Min = defaultFlushIntervalRange.Min } else { @@ -180,6 +174,16 @@ func defaultConfig(config ClientConfig) ClientConfig { config.FlushInterval.Max = defaultAuthorizedHearbeatRange.Clamp(config.FlushInterval.Max) } + heartBeatInterval := defaultHeartbeatInterval + if config.HeartbeatInterval != 0 { + heartBeatInterval = config.HeartbeatInterval + } + + envVal := globalinternal.FloatEnv("DD_TELEMETRY_HEARTBEAT_INTERVAL", heartBeatInterval.Seconds()) + config.HeartbeatInterval = defaultAuthorizedHearbeatRange.Clamp(time.Duration(envVal * float64(time.Second))) + // Make sure we flush at least at each heartbeat interval + config.FlushInterval = config.FlushInterval.ReduceMax(config.HeartbeatInterval) + if config.DependencyLoader == nil && globalinternal.BoolEnv("DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED", true) { config.DependencyLoader = debug.ReadBuildInfo } diff --git a/internal/newtelemetry/internal/range.go b/internal/newtelemetry/internal/range.go index c084dc51de..c2e1e1852f 100644 --- a/internal/newtelemetry/internal/range.go +++ b/internal/newtelemetry/internal/range.go @@ -29,3 +29,11 @@ func (r Range[T]) Contains(value T) bool { func (r Range[T]) Clamp(value T) T { return max(min(r.Max, value), r.Min) } + +// ReduceMax returns a new range where value is the new max and min is either the current min or the new value to make sure the range is ordered. +func (r Range[T]) ReduceMax(value T) Range[T] { + return Range[T]{ + Min: min(r.Min, value), + Max: value, + } +} From ba29163862c756a7b769fa5eaa6d33f8596c547d Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 10 Feb 2025 14:37:37 +0100 Subject: [PATCH 55/61] add debug logs and don't make env and version mandatory parameters Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 16 ++++------- internal/newtelemetry/client_test.go | 40 +++++++++++++++++++-------- internal/newtelemetry/globalclient.go | 5 +++- 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index f10bce9812..1a02339aa6 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -23,14 +23,6 @@ func NewClient(service, env, version string, config ClientConfig) (Client, error return nil, errors.New("service name must not be empty") } - if env == "" { - return nil, errors.New("environment name must not be empty") - } - - if version == "" { - return nil, errors.New("version must not be empty") - } - config = defaultConfig(config) if err := config.validateConfig(); err != nil { return nil, err @@ -213,7 +205,12 @@ func (c *client) Flush() { } } - _, _ = c.flush(payloads) + nbBytes, err := c.flush(payloads) + if err != nil { + log.Warn("error while flushing telemetry data: %v", err) + } + + log.Debug("flushed %d bytes of telemetry data", nbBytes) } func (c *client) transform(payloads []transport.Payload) []transport.Payload { @@ -252,7 +249,6 @@ func (c *client) flush(payloads []transport.Payload) (int, error) { log.Warn("telemetry: tried sending a payload that was too large, dropping it") continue } - log.Warn("error while flushing telemetry data: %v", err) c.payloadQueue.Enqueue(payloads[i:]...) return nbBytes, err } diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index e3b01d4598..4fc1434813 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -39,31 +39,49 @@ func TestNewClient(t *testing.T) { newErr string }{ { - name: "empty service", + name: "nominal", tracerConfig: internal.TracerConfig{ - Service: "", + Service: "test-service", Env: "test-env", Version: "1.0.0", }, - newErr: "service name must not be empty", + clientConfig: ClientConfig{ + AgentURL: "http://localhost:8126", + }, }, { - name: "empty environment", + name: "empty service", + tracerConfig: internal.TracerConfig{}, + newErr: "service name must not be empty", + }, + { + name: "empty agent url", tracerConfig: internal.TracerConfig{ Service: "test-service", - Env: "", - Version: "1.0.0", }, - newErr: "environment name must not be empty", + clientConfig: ClientConfig{}, + newErr: "could not build any endpoint", }, { - name: "empty version", + name: "invalid agent url", tracerConfig: internal.TracerConfig{ Service: "test-service", - Env: "test-env", - Version: "", }, - newErr: "version must not be empty", + clientConfig: ClientConfig{ + AgentURL: "toto_protocol://localhost:8126", + }, + newErr: "invalid agent URL", + }, + { + name: "Too big payload size", + tracerConfig: internal.TracerConfig{ + Service: "test-service", + }, + clientConfig: ClientConfig{ + AgentURL: "http://localhost:8126", + EarlyFlushPayloadSize: 64 * 1024 * 1024, // 64MB + }, + newErr: "EarlyFlushPayloadSize must be between 0 and 5MB", }, } { t.Run(test.name, func(t *testing.T) { diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index c44b1d46d6..e661aaf189 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -50,7 +50,10 @@ func StartApp(client Client) { client.AppStart() - go client.Flush() + go func() { + client.Flush() + log.Debug("telemetry: sucessfully flushed the telemetry app-started payload") + }() } // SwapClient swaps the global client with the given client and Flush the old (*client). From e27abdaa28d98ce86e184dfffefab8f954600ba0 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 10 Feb 2025 15:11:00 +0100 Subject: [PATCH 56/61] fix lint error that showed up from nothing Signed-off-by: Eliott Bouhana --- internal/newtelemetry/internal/syncpool.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/newtelemetry/internal/syncpool.go b/internal/newtelemetry/internal/syncpool.go index a4379ccdc4..29ab2f9b1e 100644 --- a/internal/newtelemetry/internal/syncpool.go +++ b/internal/newtelemetry/internal/syncpool.go @@ -15,11 +15,11 @@ type SyncPool[T any] struct { } // NewSyncPool creates a new Pool with the given new function. -func NewSyncPool[T any](new func() T) *SyncPool[T] { +func NewSyncPool[T any](newT func() T) *SyncPool[T] { return &SyncPool[T]{ pool: &sync.Pool{ New: func() any { - return new() + return newT() }, }, } From c4440c32a5a83227b6942789b081fed4d19dd2c9 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 10 Feb 2025 15:35:41 +0100 Subject: [PATCH 57/61] log when heartbeat is at custom interval Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client_config.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/newtelemetry/client_config.go b/internal/newtelemetry/client_config.go index c13ac0b7c8..98111775af 100644 --- a/internal/newtelemetry/client_config.go +++ b/internal/newtelemetry/client_config.go @@ -14,6 +14,7 @@ import ( "time" globalinternal "gopkg.in/DataDog/dd-trace-go.v1/internal" + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" "gopkg.in/DataDog/dd-trace-go.v1/internal/newtelemetry/internal" ) @@ -181,6 +182,9 @@ func defaultConfig(config ClientConfig) ClientConfig { envVal := globalinternal.FloatEnv("DD_TELEMETRY_HEARTBEAT_INTERVAL", heartBeatInterval.Seconds()) config.HeartbeatInterval = defaultAuthorizedHearbeatRange.Clamp(time.Duration(envVal * float64(time.Second))) + if config.HeartbeatInterval != defaultHeartbeatInterval { + log.Debug("telemetry: using custom heartbeat interval %v", config.HeartbeatInterval) + } // Make sure we flush at least at each heartbeat interval config.FlushInterval = config.FlushInterval.ReduceMax(config.HeartbeatInterval) From 300a2b924c2b4a5c4aebebb8b0ba212d662c0542 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 10 Feb 2025 16:12:01 +0100 Subject: [PATCH 58/61] stop using for range in ticker.C Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client.go | 6 ++++-- internal/newtelemetry/internal/ticker.go | 18 ++++++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/internal/newtelemetry/client.go b/internal/newtelemetry/client.go index 1a02339aa6..a37d25629d 100644 --- a/internal/newtelemetry/client.go +++ b/internal/newtelemetry/client.go @@ -207,10 +207,12 @@ func (c *client) Flush() { nbBytes, err := c.flush(payloads) if err != nil { - log.Warn("error while flushing telemetry data: %v", err) + log.Warn("telemetry: error while flushing: %v", err) } - log.Debug("flushed %d bytes of telemetry data", nbBytes) + if c.clientConfig.Debug { + log.Debug("telemetry: flushed %d bytes of data", nbBytes) + } } func (c *client) transform(payloads []transport.Payload) []transport.Payload { diff --git a/internal/newtelemetry/internal/ticker.go b/internal/newtelemetry/internal/ticker.go index 2ccfaa9199..167dad5b37 100644 --- a/internal/newtelemetry/internal/ticker.go +++ b/internal/newtelemetry/internal/ticker.go @@ -8,6 +8,8 @@ package internal import ( "sync" "time" + + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" ) type TickFunc func() @@ -21,6 +23,8 @@ type Ticker struct { interval Range[time.Duration] tickFunc TickFunc + + stopChan chan struct{} } func NewTicker(tickFunc TickFunc, interval Range[time.Duration]) *Ticker { @@ -29,11 +33,17 @@ func NewTicker(tickFunc TickFunc, interval Range[time.Duration]) *Ticker { tickSpeed: interval.Max, interval: interval, tickFunc: tickFunc, + stopChan: make(chan struct{}), } go func() { - for range ticker.ticker.C { - tickFunc() + for { + select { + case <-ticker.ticker.C: + tickFunc() + case <-ticker.stopChan: + break + } } }() @@ -51,6 +61,7 @@ func (t *Ticker) CanIncreaseSpeed() { return } + log.Debug("telemetry: increasing flush speed to an interval of %s", t.tickSpeed) t.ticker.Reset(t.tickSpeed) } @@ -65,9 +76,12 @@ func (t *Ticker) CanDecreaseSpeed() { return } + log.Debug("telemetry: decreasing flush speed to an interval of %s", t.tickSpeed) t.ticker.Reset(t.tickSpeed) } func (t *Ticker) Stop() { t.ticker.Stop() + t.stopChan <- struct{}{} + close(t.stopChan) } From 45c8ba4f10d631a995a51d22759daa2bd1e756d4 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 10 Feb 2025 16:47:17 +0100 Subject: [PATCH 59/61] fix heartbeat flakyness Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client_config.go | 4 +++ internal/newtelemetry/client_test.go | 42 ++++++++++++++++++++++++ internal/newtelemetry/internal/ticker.go | 5 +++ 3 files changed, 51 insertions(+) diff --git a/internal/newtelemetry/client_config.go b/internal/newtelemetry/client_config.go index 98111775af..6675b47807 100644 --- a/internal/newtelemetry/client_config.go +++ b/internal/newtelemetry/client_config.go @@ -188,6 +188,10 @@ func defaultConfig(config ClientConfig) ClientConfig { // Make sure we flush at least at each heartbeat interval config.FlushInterval = config.FlushInterval.ReduceMax(config.HeartbeatInterval) + if config.HeartbeatInterval == config.FlushInterval.Max { // Since the go ticker is not exact when it comes to the interval, we need to make sure the heartbeat is actually sent + config.HeartbeatInterval = config.HeartbeatInterval - 10*time.Millisecond + } + if config.DependencyLoader == nil && globalinternal.BoolEnv("DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED", true) { config.DependencyLoader = debug.ReadBuildInfo } diff --git a/internal/newtelemetry/client_test.go b/internal/newtelemetry/client_test.go index 4fc1434813..66e2a508d5 100644 --- a/internal/newtelemetry/client_test.go +++ b/internal/newtelemetry/client_test.go @@ -1205,6 +1205,48 @@ func TestClientEnd2End(t *testing.T) { } } +func TestHeartBeatInterval(t *testing.T) { + startTime := time.Now() + payloadtimes := make([]time.Duration, 0, 32) + c, err := NewClient("test-service", "test-env", "1.0.0", ClientConfig{ + AgentURL: "http://localhost:8126", + HeartbeatInterval: 2 * time.Second, + HTTPClient: &http.Client{ + Timeout: 5 * time.Second, + Transport: &testRoundTripper{ + t: t, + roundTrip: func(_ *http.Request) (*http.Response, error) { + payloadtimes = append(payloadtimes, time.Since(startTime)) + startTime = time.Now() + return &http.Response{ + StatusCode: http.StatusOK, + }, nil + }, + }, + }, + }) + require.NoError(t, err) + defer c.Close() + + for i := 0; i < 10; i++ { + c.Log(LogError, "test") + time.Sleep(1 * time.Second) + } + + // 10 seconds have passed, we should have sent 5 heartbeats + + c.Flush() + c.Close() + + require.InDelta(t, 5, len(payloadtimes), 1) + sum := 0.0 + for _, d := range payloadtimes { + sum += d.Seconds() + } + + assert.InDelta(t, 2, sum/5, 0.1) +} + func TestSendingFailures(t *testing.T) { cfg := ClientConfig{ AgentURL: "http://localhost:8126", diff --git a/internal/newtelemetry/internal/ticker.go b/internal/newtelemetry/internal/ticker.go index 167dad5b37..4f061e623f 100644 --- a/internal/newtelemetry/internal/ticker.go +++ b/internal/newtelemetry/internal/ticker.go @@ -25,6 +25,7 @@ type Ticker struct { tickFunc TickFunc stopChan chan struct{} + stopped bool } func NewTicker(tickFunc TickFunc, interval Range[time.Duration]) *Ticker { @@ -81,7 +82,11 @@ func (t *Ticker) CanDecreaseSpeed() { } func (t *Ticker) Stop() { + if t.stopped { + return + } t.ticker.Stop() t.stopChan <- struct{}{} close(t.stopChan) + t.stopped = true } From 146966ba0f75213fc6405904b971cec39b1661e8 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana Date: Mon, 10 Feb 2025 18:28:42 +0100 Subject: [PATCH 60/61] fix last issues mentioned by @RomainMuller Signed-off-by: Eliott Bouhana --- internal/newtelemetry/client_config.go | 2 +- internal/newtelemetry/distributions.go | 2 +- internal/newtelemetry/globalclient.go | 16 +++++++++------- internal/newtelemetry/metrics.go | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/internal/newtelemetry/client_config.go b/internal/newtelemetry/client_config.go index 6675b47807..d0688e4434 100644 --- a/internal/newtelemetry/client_config.go +++ b/internal/newtelemetry/client_config.go @@ -20,8 +20,8 @@ import ( type ClientConfig struct { // DependencyLoader determines how dependency data is sent via telemetry. - // If nil, the library should not send the app-dependencies-loaded event. // The default value is [debug.ReadBuildInfo] since Application Security Monitoring uses this data to detect vulnerabilities in the ASM-SCA product + // To disable this feature, please implement a function that returns nil, false. // This can only be controlled via the env var DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED DependencyLoader func() (*debug.BuildInfo, bool) diff --git a/internal/newtelemetry/distributions.go b/internal/newtelemetry/distributions.go index d7712902a8..a556936731 100644 --- a/internal/newtelemetry/distributions.go +++ b/internal/newtelemetry/distributions.go @@ -41,7 +41,7 @@ func (d *distributions) LoadOrStore(namespace Namespace, name string, tags []str func (d *distributions) Payload() transport.Payload { series := make([]transport.DistributionSeries, 0, d.store.Len()) d.store.Range(func(_ metricKey, handle *distribution) bool { - if payload := handle.payload(); payload.Metric != "" { + if payload := handle.payload(); payload.Namespace != "" { series = append(series, payload) } return true diff --git a/internal/newtelemetry/globalclient.go b/internal/newtelemetry/globalclient.go index e661aaf189..efd156bb0c 100644 --- a/internal/newtelemetry/globalclient.go +++ b/internal/newtelemetry/globalclient.go @@ -66,14 +66,16 @@ func SwapClient(client Client) { (*oldClient).Close() } - if client != nil { - globalClientRecorder.Replay(client) - // Swap all metrics hot pointers to the new MetricHandle - metricsHandleSwappablePointers.Range(func(_ metricKey, value *swappableMetricHandle) bool { - value.swap(value.maker(client)) - return true - }) + if client == nil { + return } + + globalClientRecorder.Replay(client) + // Swap all metrics hot pointers to the new MetricHandle + metricsHandleSwappablePointers.Range(func(_ metricKey, value *swappableMetricHandle) bool { + value.swap(value.maker(client)) + return true + }) } // MockClient swaps the global client with the given client and clears the recorder to make sure external calls are not replayed. diff --git a/internal/newtelemetry/metrics.go b/internal/newtelemetry/metrics.go index b9c57fd064..0ab4c28cc0 100644 --- a/internal/newtelemetry/metrics.go +++ b/internal/newtelemetry/metrics.go @@ -40,7 +40,7 @@ func validateMetricKey(namespace Namespace, kind transport.MetricType, name stri } if !knownmetrics.IsKnownMetric(namespace, kind, name) { - return fmt.Errorf("metric name %q of kind %q in namespace %q is not a known metric, please update the list of metrics name or check that your wrote the name correctly. "+ + return fmt.Errorf("metric name %q of kind %q in namespace %q is not a known metric, please update the list of metrics name or check that you wrote the name correctly. "+ "The metric will still be sent", name, string(kind), namespace) } From 5dc48d5190a4641dfa75f0f807f613ecfcb1d1e9 Mon Sep 17 00:00:00 2001 From: Eliott Bouhana <47679741+eliottness@users.noreply.github.com> Date: Wed, 12 Feb 2025 10:40:07 +0000 Subject: [PATCH 61/61] Create README.md --- internal/newtelemetry/README.md | 43 +++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 internal/newtelemetry/README.md diff --git a/internal/newtelemetry/README.md b/internal/newtelemetry/README.md new file mode 100644 index 0000000000..5e940ddd32 --- /dev/null +++ b/internal/newtelemetry/README.md @@ -0,0 +1,43 @@ +# Instrumentation Telemetry Client + +This documentation details the current architecture of the Instrumentation Telemetry Client of dd-trace-go and was are its capabilities. + +### Data Flow + +```mermaid +flowchart TD + linkStyle default interpolate basis + globalclient@{ shape: circle } -->|client == nil| recorder + globalclient -->|client != nil| client + recorder@{ shape: cyl } --> client@{ shape: circle } + + subgraph datasources + integrations@{ shape: cyl } + configuration@{ shape: cyl } + dependencies@{ shape: cyl } + products@{ shape: cyl } + logs@{ shape: cyl } + metrics@{ shape: cyl } + end + + client --> datasources + + subgraph mapper + direction LR + app-started --> + default[message-batch
heartbeat
extended-heartbeat] --> app-closing + end + + flush@{ shape:rounded } + + queue@{ shape: cyl } --> flush + + datasources -..->|at flush| mapper --> flush + flush -->|if writer fails| queue + + flush --> writer + + writer --> agent@{ shape: das } + writer --> backend@{ shape: stadium } + agent --> backend +```