diff --git a/app/app_test.go b/app/app_test.go index cb68d02878..27f16374a1 100644 --- a/app/app_test.go +++ b/app/app_test.go @@ -766,7 +766,7 @@ func TestPeerRouting_TraceLocalityDisabled(t *testing.T) { redisDB := 12 + i cfg := defaultConfig(basePort, redisDB) collectionCfg := cfg.GetCollectionConfig() - collectionCfg.DisableTraceLocality = true + collectionCfg.TraceLocalityMode = "distributed" cfg.GetCollectionConfigVal = collectionCfg apps[i], graph = newStartedApp(t, senders[i], nil, peers, cfg) diff --git a/collect/collect.go b/collect/collect.go index 5515cfcc33..f0132ab3b4 100644 --- a/collect/collect.go +++ b/collect/collect.go @@ -210,7 +210,7 @@ func (i *InMemCollector) Start() error { i.Peers.RegisterUpdatedPeersCallback(i.redistributeTimer.Reset) } - if i.Config.GetCollectionConfig().DisableTraceLocality { + if !i.Config.GetCollectionConfig().TraceLocalityEnabled() { i.keptDecisionMessages = make(chan string, decisionMessageBufferSize) i.dropDecisionMessages = make(chan string, decisionMessageBufferSize) i.PubSub.Subscribe(context.Background(), keptTraceDecisionTopic, i.signalKeptTraceDecisions) @@ -524,7 +524,7 @@ func (i *InMemCollector) redistributeTraces(ctx context.Context) { for _, sp := range trace.GetSpans() { - if i.Config.GetCollectionConfig().DisableTraceLocality { + if !i.Config.GetCollectionConfig().TraceLocalityEnabled() { dc := i.createDecisionSpan(sp, trace, newTarget) i.PeerTransmission.EnqueueEvent(dc) continue @@ -669,14 +669,7 @@ func (i *InMemCollector) processSpan(ctx context.Context, sp *types.Span, source isMyTrace bool ) // if trace locality is enabled, we should forward all spans to its correct peer - if i.Config.GetCollectionConfig().DisableTraceLocality { - targetShard, isMyTrace = i.IsMyTrace(sp.TraceID) - // if the span is a decision span and the trace no longer belong to us, we should not forward it to the peer - if !isMyTrace && sp.IsDecisionSpan() { - return - } - - } else { + if i.Config.GetCollectionConfig().TraceLocalityEnabled() { targetShard = i.Sharder.WhichShard(sp.TraceID) isMyTrace = true if !targetShard.Equals(i.Sharder.MyShard()) { @@ -684,6 +677,13 @@ func (i *InMemCollector) processSpan(ctx context.Context, sp *types.Span, source i.PeerTransmission.EnqueueSpan(sp) return } + } else { + targetShard, isMyTrace = i.IsMyTrace(sp.TraceID) + // if the span is a decision span and the trace no longer belong to us, we should not forward it to the peer + if !isMyTrace && sp.IsDecisionSpan() { + return + } + } tcfg := i.Config.GetTracesConfig() @@ -1069,7 +1069,7 @@ func (i *InMemCollector) Stop() error { close(i.fromPeer) close(i.outgoingTraces) - if i.Config.GetCollectionConfig().DisableTraceLocality { + if !i.Config.GetCollectionConfig().TraceLocalityEnabled() { close(i.dropDecisionBuffer) close(i.keptDecisionBuffer) } @@ -1527,7 +1527,7 @@ func (i *InMemCollector) makeDecision(ctx context.Context, trace *types.Trace, s HasRoot: hasRoot, } - if i.Config.GetCollectionConfig().DisableTraceLocality { + if !i.Config.GetCollectionConfig().TraceLocalityEnabled() { i.publishTraceDecision(ctx, td) } @@ -1535,15 +1535,15 @@ func (i *InMemCollector) makeDecision(ctx context.Context, trace *types.Trace, s } func (i *InMemCollector) IsMyTrace(traceID string) (sharder.Shard, bool) { + if i.Config.GetCollectionConfig().TraceLocalityEnabled() { + return i.Sharder.MyShard(), true + } + // if trace locality is disabled, we should only process // traces that belong to the current refinery - if i.Config.GetCollectionConfig().DisableTraceLocality { - targeShard := i.Sharder.WhichShard(traceID) - - return targeShard, i.Sharder.MyShard().Equals(targeShard) - } + targeShard := i.Sharder.WhichShard(traceID) - return i.Sharder.MyShard(), true + return targeShard, i.Sharder.MyShard().Equals(targeShard) } @@ -1578,7 +1578,7 @@ func (i *InMemCollector) publishTraceDecision(ctx context.Context, td TraceDecis } func (i *InMemCollector) sendKeptDecisions() { - if !i.Config.GetCollectionConfig().DisableTraceLocality { + if i.Config.GetCollectionConfig().TraceLocalityEnabled() { return } interval := time.Duration(i.Config.GetCollectionConfig().KeptDecisionSendInterval) @@ -1589,7 +1589,7 @@ func (i *InMemCollector) sendKeptDecisions() { } func (i *InMemCollector) sendDropDecisions() { - if !i.Config.GetCollectionConfig().DisableTraceLocality { + if i.Config.GetCollectionConfig().TraceLocalityEnabled() { return } interval := time.Duration(i.Config.GetCollectionConfig().DropDecisionSendInterval) diff --git a/collect/collect_test.go b/collect/collect_test.go index d063ce52b7..c96027b2da 100644 --- a/collect/collect_test.go +++ b/collect/collect_test.go @@ -98,7 +98,7 @@ func newTestCollector(conf config.Config, transmission transmit.Transmission, pe redistributeTimer: redistributeNotifier, } - if conf.GetCollectionConfig().DisableTraceLocality { + if !conf.GetCollectionConfig().TraceLocalityEnabled() { localPubSub.Subscribe(context.Background(), keptTraceDecisionTopic, c.signalKeptTraceDecisions) } @@ -196,7 +196,7 @@ func TestAddRootSpan(t *testing.T) { conf.Mux.Lock() collectionCfg := conf.GetCollectionConfigVal - collectionCfg.DisableTraceLocality = true + collectionCfg.TraceLocalityMode = "distributed" conf.GetCollectionConfigVal = collectionCfg conf.Mux.Unlock() @@ -527,8 +527,8 @@ func TestDryRunMode(t *testing.T) { DryRun: true, ParentIdFieldNames: []string{"trace.parent_id", "parentId"}, GetCollectionConfigVal: config.CollectionConfig{ - ShutdownDelay: config.Duration(1 * time.Millisecond), - DisableTraceLocality: true, + ShutdownDelay: config.Duration(1 * time.Millisecond), + TraceLocalityMode: "distributed", }, } transmission := &transmit.MockTransmission{} @@ -2233,8 +2233,8 @@ func TestSendDropDecisions(t *testing.T) { GetSamplerTypeVal: &config.DeterministicSamplerConfig{SampleRate: 1}, ParentIdFieldNames: []string{"trace.parent_id", "parentId"}, GetCollectionConfigVal: config.CollectionConfig{ - ShutdownDelay: config.Duration(1 * time.Millisecond), - DisableTraceLocality: true, + ShutdownDelay: config.Duration(1 * time.Millisecond), + TraceLocalityMode: "distributed", }, } transmission := &transmit.MockTransmission{} @@ -2318,7 +2318,7 @@ func TestExpiredTracesCleanup(t *testing.T) { MaxBatchSize: 1500, }, GetCollectionConfigVal: config.CollectionConfig{ - DisableTraceLocality: true, + TraceLocalityMode: "distributed", }, GetSamplerTypeVal: &config.DeterministicSamplerConfig{SampleRate: 1}, AddSpanCountToRoot: true, diff --git a/collect/stressRelief.go b/collect/stressRelief.go index e21b855e0f..fb69b04a74 100644 --- a/collect/stressRelief.go +++ b/collect/stressRelief.go @@ -428,7 +428,7 @@ func (s *StressRelief) Recalc() uint { // If a single node is under significant stress, it can activate stress relief mode overallStressLevel := uint(math.Max(float64(clusterStressLevel), float64(localLevel))) - if s.Config.GetCollectionConfig().DisableTraceLocality { + if !s.Config.GetCollectionConfig().TraceLocalityEnabled() { overallStressLevel = clusterStressLevel } s.overallStressLevel = overallStressLevel diff --git a/collect/stress_relief_test.go b/collect/stress_relief_test.go index fcaa7b0dc1..03586b8ccd 100644 --- a/collect/stress_relief_test.go +++ b/collect/stress_relief_test.go @@ -158,7 +158,7 @@ func TestStressRelief_OverallStressLevel_DisableTraceLocality(t *testing.T) { MinimumActivationDuration: config.Duration(5 * time.Second), }, GetCollectionConfigVal: config.CollectionConfig{ - DisableTraceLocality: true, + TraceLocalityMode: "distributed", }, } // On startup, the stress relief should not be active diff --git a/config.md b/config.md index 39b2b0fc7b..383a5f09f6 100644 --- a/config.md +++ b/config.md @@ -1,7 +1,7 @@ # Honeycomb Refinery Configuration Documentation This is the documentation for the configuration file for Honeycomb's Refinery. -It was automatically generated on 2024-11-26 at 18:43:37 UTC. +It was automatically generated on 2024-12-02 at 16:52:55 UTC. ## The Config file @@ -402,7 +402,7 @@ This is useful for evaluating sampling rules. When DryRun is enabled, traces is decorated with `meta.refinery. dryrun.kept` that is set to `true` or `false`, based on whether the trace would be kept or dropped. In addition, `SampleRate` will be set to the incoming rate for all traces, and the field `meta.refinery.dryrun.sample_rate` will be set to the sample rate that would have been used. -NOTE: This setting is not compatible with `DisableTraceLocality=true`, because drop trace decisions shared among peers do not contain all the relevant information needed to send traces to Honeycomb. +NOTE: This setting is not compatible with `TraceCache=distributed`, because drop trace decisions shared among peers do not contain all the relevant information needed to send traces to Honeycomb. - Eligible for live reload. - Type: `bool` @@ -1021,21 +1021,23 @@ This value should be set to a bit less than the normal timeout period for shutti - Type: `duration` - Default: `15s` -### `DisableTraceLocality` +### `TraceLocalityMode` -DisableTraceLocality controls whether all spans that belongs to the same trace are sent to a single Refinery for processing. +TraceLocalityMode controls how Refinery handles spans that belong to the same trace in a clustered environment. -When `false`, Refinery will route all spans that belong to the same trace to a single peer. +When `concentrated`, Refinery will route all spans that belong to the same trace to a single peer. This is the default behavior ("Trace Locality") and the way Refinery has worked in the past. -When `true`, Refinery will instead keep spans on the node where they were received, and forward proxy spans that contain only the key information needed to make a trace decision. -This can reduce the amount of traffic between peers in most cases, and can help avoid a situation where a single large trace can cause a memory overrun on a single node. -If `true`, the amount of traffic between peers will be reduced, but the amount of traffic between Refinery and Redis will significantly increase, because Refinery uses Redis to distribute the trace decisions to all nodes in the cluster. +When `distributed`, Refinery will instead keep spans on the node where they were received, and forward proxy spans that contain only the key information needed to make a trace decision. +This can reduce the amount of traffic between peers, and can help avoid a situation where a single large trace can cause a memory overrun on a single node. +If `distributed`, the amount of traffic between peers will be reduced, but the amount of traffic between Refinery and Redis will significantly increase, because Refinery uses Redis to distribute the trace decisions to all nodes in the cluster. It is important to adjust the size of the Redis cluster in this case. -NOTE: This setting is not compatible with `DryRun` when set to true. +The total volume of network traffic in `distributed` mode should be expected to decrease unless the cluster size is very large (hundreds of nodes). +NOTE: This setting is not compatible with `DryRun` when set to `distributed`. See `DryRun` for more information. - Not eligible for live reload. -- Type: `bool` +- Type: `string` +- Default: `concentrated` ### `HealthCheckTimeout` diff --git a/config/file_config.go b/config/file_config.go index 5c10b03787..3e01936ed2 100644 --- a/config/file_config.go +++ b/config/file_config.go @@ -317,8 +317,8 @@ type CollectionConfig struct { DisableRedistribution bool `yaml:"DisableRedistribution"` RedistributionDelay Duration `yaml:"RedistributionDelay" default:"30s"` - ShutdownDelay Duration `yaml:"ShutdownDelay" default:"15s"` - DisableTraceLocality bool `yaml:"DisableTraceLocality"` + ShutdownDelay Duration `yaml:"ShutdownDelay" default:"15s"` + TraceLocalityMode string `yaml:"TraceLocalityMode" default:"concentrated"` MaxDropDecisionBatchSize int `yaml:"MaxDropDecisionBatchSize" default:"1000"` DropDecisionSendInterval Duration `yaml:"DropDecisionSendInterval" default:"1s"` @@ -355,6 +355,19 @@ func (c CollectionConfig) GetIncomingQueueSize() int { return c.IncomingQueueSize } +// TraceLocalityEnabled returns whether trace locality is enabled. +func (c CollectionConfig) TraceLocalityEnabled() bool { + switch c.TraceLocalityMode { + case "concentrated": + return true + case "distributed": + return false + default: + // Default to true for backwards compatibility + return true + } +} + type BufferSizeConfig struct { UpstreamBufferSize int `yaml:"UpstreamBufferSize" default:"10_000"` PeerBufferSize int `yaml:"PeerBufferSize" default:"100_000"` diff --git a/config/metadata/configMeta.yaml b/config/metadata/configMeta.yaml index ab1e473c6c..a7ff328e73 100644 --- a/config/metadata/configMeta.yaml +++ b/config/metadata/configMeta.yaml @@ -489,7 +489,7 @@ groups: `meta.refinery.dryrun.sample_rate` will be set to the sample rate that would have been used. - NOTE: This setting is not compatible with `DisableTraceLocality=true`, + NOTE: This setting is not compatible with `TraceCache=distributed`, because drop trace decisions shared among peers do not contain all the relevant information needed to send traces to Honeycomb. @@ -1340,26 +1340,26 @@ groups: This value should be set to a bit less than the normal timeout period for shutting down without forcibly terminating the process. - - name: DisableTraceLocality - type: bool + - name: TraceLocalityMode + type: string valuetype: nondefault firstversion: v2.9 - default: false + default: "concentrated" reload: false - summary: controls whether all spans that belongs to the same trace are sent to a single Refinery for processing. + summary: controls how Refinery handles spans that belong to the same trace in a clustered environment. description: > - When `false`, Refinery will route all spans that belong to the same trace to a single peer. This is the - default behavior ("Trace Locality") and the way Refinery has worked in the past. When `true`, Refinery + When `concentrated`, Refinery will route all spans that belong to the same trace to a single peer. This is the + default behavior ("Trace Locality") and the way Refinery has worked in the past. When `distributed`, Refinery will instead keep spans on the node where they were received, and forward proxy spans that contain only - the key information needed to make a trace decision. This can reduce the amount of traffic between peers - in most cases, and can help avoid a situation where a single large trace can cause a memory overrun on - a single node. + the key information needed to make a trace decision. This can reduce the amount of traffic between peers, + and can help avoid a situation where a single large trace can cause a memory overrun on a single node. - If `true`, the amount of traffic between peers will be reduced, but the amount of traffic between Refinery + If `distributed`, the amount of traffic between peers will be reduced, but the amount of traffic between Refinery and Redis will significantly increase, because Refinery uses Redis to distribute the trace decisions to all nodes in the cluster. It is important to adjust the size of the Redis cluster in this case. - NOTE: This setting is not compatible with `DryRun` when set to true. See `DryRun` for more information. + The total volume of network traffic in `distributed` mode should be expected to decrease unless the cluster size is very large (hundreds of nodes). + NOTE: This setting is not compatible with `DryRun` when set to `distributed`. See `DryRun` for more information. - name: HealthCheckTimeout type: duration diff --git a/config_complete.yaml b/config_complete.yaml index 1b1339c664..3a1224f4b4 100644 --- a/config_complete.yaml +++ b/config_complete.yaml @@ -2,7 +2,7 @@ ## Honeycomb Refinery Configuration ## ###################################### # -# created on 2024-11-22 at 17:59:57 UTC from ../../config.yaml using a template generated on 2024-11-22 at 17:59:50 UTC +# created on 2024-12-02 at 16:40:51 UTC from ../../config.yaml using a template generated on 2024-12-02 at 16:40:45 UTC # This file contains a configuration for the Honeycomb Refinery. It is in YAML # format, organized into named groups, each of which contains a set of @@ -427,7 +427,7 @@ Debugging: ## to the incoming rate for all traces, and the field ## `meta.refinery.dryrun.sample_rate` will be set to the sample rate that ## would have been used. - ## NOTE: This setting is not compatible with `DisableTraceLocality=true`, + ## NOTE: This setting is not compatible with `TraceCache=distributed`, ## because drop trace decisions shared among peers do not contain all the ## relevant information needed to send traces to Honeycomb. ## @@ -1083,27 +1083,30 @@ Collection: ## Eligible for live reload. # ShutdownDelay: 15s - ## DisableTraceLocality controls whether all spans that belongs to the - ## same trace are sent to a single Refinery for processing. - ## - ## When `false`, Refinery will route all spans that belong to the same - ## trace to a single peer. This is the default behavior ("Trace - ## Locality") and the way Refinery has worked in the past. When `true`, - ## Refinery will instead keep spans on the node where they were received, - ## and forward proxy spans that contain only the key information needed - ## to make a trace decision. This can reduce the amount of traffic - ## between peers in most cases, and can help avoid a situation where a - ## single large trace can cause a memory overrun on a single node. - ## If `true`, the amount of traffic between peers will be reduced, but - ## the amount of traffic between Refinery and Redis will significantly - ## increase, because Refinery uses Redis to distribute the trace - ## decisions to all nodes in the cluster. It is important to adjust the - ## size of the Redis cluster in this case. - ## NOTE: This setting is not compatible with `DryRun` when set to true. - ## See `DryRun` for more information. - ## - ## Not eligible for live reload. - # DisableTraceLocality: false + ## TraceLocalityMode controls how Refinery handles spans that belongs to + ## the same trace in a clustered environment. + ## + ## When `concentrated`, Refinery will route all spans that belong to the + ## same trace to a single peer. This is the default behavior ("Trace + ## Locality") and the way Refinery has worked in the past. When + ## `distributed`, Refinery will instead keep spans on the node where they + ## were received, and forward proxy spans that contain only the key + ## information needed to make a trace decision. This can reduce the + ## amount of traffic between peers, and can help avoid a situation where + ## a single large trace can cause a memory overrun on a single node. + ## If `distributed`, the amount of traffic between peers will be reduced, + ## but the amount of traffic between Refinery and Redis will + ## significantly increase, because Refinery uses Redis to distribute the + ## trace decisions to all nodes in the cluster. It is important to adjust + ## the size of the Redis cluster in this case. + ## The total volume of network traffic in `distributed` mode should be + ## expected to decrease unless the cluster size is very large (hundreds + ## of nodes). NOTE: This setting is not compatible with `DryRun` when set + ## to `distributed`. See `DryRun` for more information. + ## + ## default: concentrated + ## Not eligible for live reload. + # TraceLocalityMode: concentrated ## HealthCheckTimeout controls the maximum duration allowed for ## collection health checks to complete. diff --git a/refinery_config.md b/refinery_config.md index 0c89ceee32..a569ef67ef 100644 --- a/refinery_config.md +++ b/refinery_config.md @@ -381,7 +381,7 @@ This is useful for evaluating sampling rules. When DryRun is enabled, traces is decorated with `meta.refinery. dryrun.kept` that is set to `true` or `false`, based on whether the trace would be kept or dropped. In addition, `SampleRate` will be set to the incoming rate for all traces, and the field `meta.refinery.dryrun.sample_rate` will be set to the sample rate that would have been used. -NOTE: This setting is not compatible with `DisableTraceLocality=true`, because drop trace decisions shared among peers do not contain all the relevant information needed to send traces to Honeycomb. +NOTE: This setting is not compatible with `TraceCache=distributed`, because drop trace decisions shared among peers do not contain all the relevant information needed to send traces to Honeycomb. - Eligible for live reload. - Type: `bool` @@ -1006,21 +1006,23 @@ This value should be set to a bit less than the normal timeout period for shutti - Type: `duration` - Default: `15s` -### `DisableTraceLocality` +### `TraceLocalityMode` -`DisableTraceLocality` controls whether all spans that belongs to the same trace are sent to a single Refinery for processing. +`TraceLocalityMode` controls how Refinery handles spans that belong to the same trace in a clustered environment. -When `false`, Refinery will route all spans that belong to the same trace to a single peer. +When `concentrated`, Refinery will route all spans that belong to the same trace to a single peer. This is the default behavior ("Trace Locality") and the way Refinery has worked in the past. -When `true`, Refinery will instead keep spans on the node where they were received, and forward proxy spans that contain only the key information needed to make a trace decision. -This can reduce the amount of traffic between peers in most cases, and can help avoid a situation where a single large trace can cause a memory overrun on a single node. -If `true`, the amount of traffic between peers will be reduced, but the amount of traffic between Refinery and Redis will significantly increase, because Refinery uses Redis to distribute the trace decisions to all nodes in the cluster. +When `distributed`, Refinery will instead keep spans on the node where they were received, and forward proxy spans that contain only the key information needed to make a trace decision. +This can reduce the amount of traffic between peers, and can help avoid a situation where a single large trace can cause a memory overrun on a single node. +If `distributed`, the amount of traffic between peers will be reduced, but the amount of traffic between Refinery and Redis will significantly increase, because Refinery uses Redis to distribute the trace decisions to all nodes in the cluster. It is important to adjust the size of the Redis cluster in this case. -NOTE: This setting is not compatible with `DryRun` when set to true. +The total volume of network traffic in `distributed` mode should be expected to decrease unless the cluster size is very large (hundreds of nodes). +NOTE: This setting is not compatible with `DryRun` when set to `distributed`. See `DryRun` for more information. - Not eligible for live reload. -- Type: `bool` +- Type: `string` +- Default: `concentrated` ### `HealthCheckTimeout` diff --git a/route/route.go b/route/route.go index 34b63e6e2a..75e202e056 100644 --- a/route/route.go +++ b/route/route.go @@ -614,7 +614,7 @@ func (r *Router) processEvent(ev *types.Event, reqID interface{}) error { } } - if !r.Config.GetCollectionConfig().DisableTraceLocality { + if r.Config.GetCollectionConfig().TraceLocalityEnabled() { // Figure out if we should handle this span locally or pass on to a peer targetShard := r.Sharder.WhichShard(traceID) if !targetShard.Equals(r.Sharder.MyShard()) { diff --git a/tools/convert/configDataNames.txt b/tools/convert/configDataNames.txt index 618c6d22da..a3e2eb6a41 100644 --- a/tools/convert/configDataNames.txt +++ b/tools/convert/configDataNames.txt @@ -1,5 +1,5 @@ # Names of groups and fields in the new config file format. -# Automatically generated on 2024-11-22 at 17:59:51 UTC. +# Automatically generated on 2024-12-02 at 16:40:46 UTC. General: - ConfigurationVersion @@ -194,7 +194,7 @@ Collection: - ShutdownDelay - - DisableTraceLocality + - TraceLocalityMode - HealthCheckTimeout diff --git a/tools/convert/minimal_config.yaml b/tools/convert/minimal_config.yaml index a5aa01bf81..feeb4c8913 100644 --- a/tools/convert/minimal_config.yaml +++ b/tools/convert/minimal_config.yaml @@ -1,5 +1,5 @@ # sample uncommented config file containing all possible fields -# automatically generated on 2024-11-22 at 18:35:03 UTC +# automatically generated on 2024-12-02 at 16:40:47 UTC General: ConfigurationVersion: 2 MinRefineryVersion: "v2.0" @@ -103,7 +103,7 @@ Collection: DisableRedistribution: false RedistributionDelay: 30s ShutdownDelay: 15s - DisableTraceLocality: false + TraceLocalityMode: concentrated HealthCheckTimeout: 3s BufferSizes: UpstreamBufferSize: 10_000 diff --git a/tools/convert/templates/configV2.tmpl b/tools/convert/templates/configV2.tmpl index 7fce333f70..03f934829d 100644 --- a/tools/convert/templates/configV2.tmpl +++ b/tools/convert/templates/configV2.tmpl @@ -2,7 +2,7 @@ ## Honeycomb Refinery Configuration ## ###################################### # -# created {{ now }} from {{ .Input }} using a template generated on 2024-11-22 at 17:59:50 UTC +# created {{ now }} from {{ .Input }} using a template generated on 2024-12-02 at 16:40:45 UTC # This file contains a configuration for the Honeycomb Refinery. It is in YAML # format, organized into named groups, each of which contains a set of @@ -425,7 +425,7 @@ Debugging: ## to the incoming rate for all traces, and the field ## `meta.refinery.dryrun.sample_rate` will be set to the sample rate that ## would have been used. - ## NOTE: This setting is not compatible with `DisableTraceLocality=true`, + ## NOTE: This setting is not compatible with `TraceCache=distributed`, ## because drop trace decisions shared among peers do not contain all the ## relevant information needed to send traces to Honeycomb. ## @@ -1078,27 +1078,30 @@ Collection: ## Eligible for live reload. {{ nonDefaultOnly .Data "ShutdownDelay" "ShutdownDelay" "15s" }} - ## DisableTraceLocality controls whether all spans that belongs to the - ## same trace are sent to a single Refinery for processing. - ## - ## When `false`, Refinery will route all spans that belong to the same - ## trace to a single peer. This is the default behavior ("Trace - ## Locality") and the way Refinery has worked in the past. When `true`, - ## Refinery will instead keep spans on the node where they were received, - ## and forward proxy spans that contain only the key information needed - ## to make a trace decision. This can reduce the amount of traffic - ## between peers in most cases, and can help avoid a situation where a - ## single large trace can cause a memory overrun on a single node. - ## If `true`, the amount of traffic between peers will be reduced, but - ## the amount of traffic between Refinery and Redis will significantly - ## increase, because Refinery uses Redis to distribute the trace - ## decisions to all nodes in the cluster. It is important to adjust the - ## size of the Redis cluster in this case. - ## NOTE: This setting is not compatible with `DryRun` when set to true. - ## See `DryRun` for more information. - ## - ## Not eligible for live reload. - {{ nonDefaultOnly .Data "DisableTraceLocality" "DisableTraceLocality" false }} + ## TraceLocalityMode controls how Refinery handles spans that belongs to + ## the same trace in a clustered environment. + ## + ## When `concentrated`, Refinery will route all spans that belong to the + ## same trace to a single peer. This is the default behavior ("Trace + ## Locality") and the way Refinery has worked in the past. When + ## `distributed`, Refinery will instead keep spans on the node where they + ## were received, and forward proxy spans that contain only the key + ## information needed to make a trace decision. This can reduce the + ## amount of traffic between peers, and can help avoid a situation where + ## a single large trace can cause a memory overrun on a single node. + ## If `distributed`, the amount of traffic between peers will be reduced, + ## but the amount of traffic between Refinery and Redis will + ## significantly increase, because Refinery uses Redis to distribute the + ## trace decisions to all nodes in the cluster. It is important to adjust + ## the size of the Redis cluster in this case. + ## The total volume of network traffic in `distributed` mode should be + ## expected to decrease unless the cluster size is very large (hundreds + ## of nodes). NOTE: This setting is not compatible with `DryRun` when set + ## to `distributed`. See `DryRun` for more information. + ## + ## default: concentrated + ## Not eligible for live reload. + {{ nonDefaultOnly .Data "TraceLocalityMode" "TraceLocalityMode" "concentrated" }} ## HealthCheckTimeout controls the maximum duration allowed for ## collection health checks to complete.