From 380699a80c12de997a5516870591a8b30dbafd54 Mon Sep 17 00:00:00 2001 From: Mike Beaumont Date: Thu, 24 Oct 2024 17:53:27 +0200 Subject: [PATCH] feat(kuma-cp): add option to disable KDS traces (#11847) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Motivation These traces are potentially not useful and too long-lived. ## Implementation information Added option to leave the status quo or completely disable KDS traces. Note that Envoy admin operations should not be affected. Signed-off-by: Mike Beaumont Signed-off-by: Kürşat Aktaş --- docs/generated/raw/kuma-cp.yaml | 4 ++++ pkg/config/app/kuma-cp/kuma-cp.defaults.yaml | 4 ++++ pkg/config/loader_test.go | 4 ++++ pkg/config/multizone/kds.go | 1 + pkg/config/multizone/multicluster.go | 3 +++ pkg/config/multizone/tracing.go | 7 +++++++ pkg/kds/mux/server.go | 5 ++++- 7 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 pkg/config/multizone/tracing.go diff --git a/docs/generated/raw/kuma-cp.yaml b/docs/generated/raw/kuma-cp.yaml index 5d5589cfc5fb..11496e58d309 100644 --- a/docs/generated/raw/kuma-cp.yaml +++ b/docs/generated/raw/kuma-cp.yaml @@ -516,6 +516,10 @@ multizone: # Response backoff is a time Global CP waits before sending ACK/NACK. # This is a way to slow down Zone CP from sending resources too often. responseBackoff: 0s # ENV: KUMA_MULTIZONE_GLOBAL_KDS_RESPONSE_BACKOFF + tracing: + # Defines whether tracing is enabled for all gRPC methods + # of GlobalKDSServer and KDSSyncService or completely disabled + enabled: true # ENV: KUMA_MULTIZONE_GLOBAL_KDS_TRACING_ENABLED zone: # Kuma Zone name used to mark the zone dataplane resources name: "default" # ENV: KUMA_MULTIZONE_ZONE_NAME diff --git a/pkg/config/app/kuma-cp/kuma-cp.defaults.yaml b/pkg/config/app/kuma-cp/kuma-cp.defaults.yaml index 5d5589cfc5fb..11496e58d309 100644 --- a/pkg/config/app/kuma-cp/kuma-cp.defaults.yaml +++ b/pkg/config/app/kuma-cp/kuma-cp.defaults.yaml @@ -516,6 +516,10 @@ multizone: # Response backoff is a time Global CP waits before sending ACK/NACK. # This is a way to slow down Zone CP from sending resources too often. responseBackoff: 0s # ENV: KUMA_MULTIZONE_GLOBAL_KDS_RESPONSE_BACKOFF + tracing: + # Defines whether tracing is enabled for all gRPC methods + # of GlobalKDSServer and KDSSyncService or completely disabled + enabled: true # ENV: KUMA_MULTIZONE_GLOBAL_KDS_TRACING_ENABLED zone: # Kuma Zone name used to mark the zone dataplane resources name: "default" # ENV: KUMA_MULTIZONE_ZONE_NAME diff --git a/pkg/config/loader_test.go b/pkg/config/loader_test.go index cffc4d432a6b..17fbfb3c5185 100644 --- a/pkg/config/loader_test.go +++ b/pkg/config/loader_test.go @@ -266,6 +266,7 @@ var _ = Describe("Config loader", func() { Expect(cfg.Multizone.Global.KDS.ResponseBackoff.Duration).To(Equal(time.Second)) Expect(cfg.Multizone.Global.KDS.ZoneHealthCheck.PollInterval.Duration).To(Equal(11 * time.Second)) Expect(cfg.Multizone.Global.KDS.ZoneHealthCheck.Timeout.Duration).To(Equal(110 * time.Second)) + Expect(cfg.Multizone.Global.KDS.Tracing.Enabled).To(BeFalse()) Expect(cfg.Multizone.Zone.GlobalAddress).To(Equal("grpc://1.1.1.1:5685")) Expect(cfg.Multizone.Zone.Name).To(Equal("zone-1")) Expect(cfg.Multizone.Zone.KDS.RootCAFile).To(Equal("/rootCa")) @@ -621,6 +622,8 @@ multizone: zoneHealthCheck: pollInterval: 11s timeout: 110s + tracing: + enabled: false zone: globalAddress: "grpc://1.1.1.1:5685" name: "zone-1" @@ -964,6 +967,7 @@ meshService: "KUMA_MULTIZONE_GLOBAL_KDS_RESPONSE_BACKOFF": "1s", "KUMA_MULTIZONE_GLOBAL_KDS_ZONE_HEALTH_CHECK_POLL_INTERVAL": "11s", "KUMA_MULTIZONE_GLOBAL_KDS_ZONE_HEALTH_CHECK_TIMEOUT": "110s", + "KUMA_MULTIZONE_GLOBAL_KDS_TRACING_ENABLED": "false", "KUMA_MULTIZONE_ZONE_GLOBAL_ADDRESS": "grpc://1.1.1.1:5685", "KUMA_MULTIZONE_ZONE_NAME": "zone-1", "KUMA_MULTIZONE_ZONE_KDS_ROOT_CA_FILE": "/rootCa", diff --git a/pkg/config/multizone/kds.go b/pkg/config/multizone/kds.go index 8995f23f9be0..94730eb89cc5 100644 --- a/pkg/config/multizone/kds.go +++ b/pkg/config/multizone/kds.go @@ -42,6 +42,7 @@ type KdsServerConfig struct { ResponseBackoff config_types.Duration `json:"responseBackoff" envconfig:"kuma_multizone_global_kds_response_backoff"` // ZoneHealthCheck holds config for ensuring zones are online ZoneHealthCheck ZoneHealthCheckConfig `json:"zoneHealthCheck"` + Tracing KDSServerTracing `json:"tracing"` } var _ config.Config = &KdsServerConfig{} diff --git a/pkg/config/multizone/multicluster.go b/pkg/config/multizone/multicluster.go index e6494466a913..f15f0dc65cfb 100644 --- a/pkg/config/multizone/multicluster.go +++ b/pkg/config/multizone/multicluster.go @@ -46,6 +46,9 @@ func DefaultGlobalConfig() *GlobalConfig { TlsMinVersion: "TLSv1_2", TlsCipherSuites: []string{}, NackBackoff: config_types.Duration{Duration: 5 * time.Second}, + Tracing: KDSServerTracing{ + Enabled: true, + }, }, } } diff --git a/pkg/config/multizone/tracing.go b/pkg/config/multizone/tracing.go new file mode 100644 index 000000000000..c3945b73acd9 --- /dev/null +++ b/pkg/config/multizone/tracing.go @@ -0,0 +1,7 @@ +package multizone + +type KDSServerTracing struct { + // Defines whether tracing is enabled for all gRPC methods + // of GlobalKDSServer and KDSSyncService or completely disabled + Enabled bool `json:"enabled,omitempty" envconfig:"kuma_multizone_global_kds_tracing_enabled"` +} diff --git a/pkg/kds/mux/server.go b/pkg/kds/mux/server.go index 686dad19381b..8890ac0e21d3 100644 --- a/pkg/kds/mux/server.go +++ b/pkg/kds/mux/server.go @@ -110,8 +110,11 @@ func (s *server) Start(stop <-chan struct{}) error { grpcOptions = append( grpcOptions, grpc.ChainUnaryInterceptor(s.unaryInterceptors...), - grpc.StatsHandler(otelgrpc.NewServerHandler()), ) + if s.config.Tracing.Enabled { + grpcOptions = append(grpcOptions, grpc.StatsHandler(otelgrpc.NewServerHandler())) + } + grpcServer := grpc.NewServer(grpcOptions...) mesh_proto.RegisterGlobalKDSServiceServer(grpcServer, s.serviceServer)