From 5f6a703460656cb3ec033c3f639ce5d6f7e695a7 Mon Sep 17 00:00:00 2001 From: Alejandro Pedraza Date: Wed, 22 Nov 2023 15:39:29 -0500 Subject: [PATCH] Add ability to configure client-go's `QPS` and `Burst` settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem and Symptoms When having a very large number of proxies request identity in a short period of time (e.g. during large node scaling events), the identity controller will attempt to validate the tokens sent by the proxies at a rate surpassing client-go's the default request rate threshold, triggering client-side throttling, which will delay the proxies initialization, and even failing their startup (after a 2m timeout). The identity controller will surface this through log entries like this: ``` time="2023-11-08T19:50:45Z" level=error msg="error validating token for web.emojivoto.serviceaccount.identity.linkerd.cluster.local: client rate limiter Wait returned an error: rate: Wait(n=1) would exceed context deadline" ``` ## Solution Client-go's default `QPS` is 5 and `Burst` is 10. This PR exposes those settings as entries in `values.yaml` with defaults of 100 and 200 respectively. Note this only applies to the identity controller, as it's the only controller performing direct requests to the `kube-apiserver` in a hot path. The other controllers mostly rely in informers, and direct calls are sporadic. ## Observability The `QPS` and `Burst` settings used are exposed both as a log entry as soon as the controller starts, and as in the new metric gauges `http_client_qps` and `http_client_burst` ## Testing You can use the following K6 script, which simulates 6k calls to the `Certify` service during one minute from emojivoto's web pod. Before running this you need to: - Put the identity.proto and [all the other proto files](https://github.com/linkerd/linkerd2-proxy-api/tree/v0.11.0/proto) in the same directory. - Edit the [checkRequest](https://github.com/linkerd/linkerd2/blob/edge-23.11.3/pkg/identity/service.go#L266) function and add logging statements to figure the `token` and `csr` entries you can use here, that will be shown as soon as a web pod starts. ```javascript import { Client, Stream } from 'k6/experimental/grpc'; import { sleep } from 'k6'; const client = new Client(); client.load(['.'], 'identity.proto'); // This always holds: // req_num = (1 / req_duration ) * duration * VUs // Given req_duration (0.5s) test duration (1m) and the target req_num (6k), we // can solve for the required VUs: // VUs = req_num * req_duration / duration // VUs = 6000 * 0.5 / 60 = 50 export const options = { scenarios: { identity: { executor: 'constant-vus', vus: 50, duration: '1m', }, }, }; export default () => { client.connect('localhost:8080', { plaintext: true, }); const stream = new Stream(client, 'io.linkerd.proxy.identity.Identity/Certify'); // Replace with your own token let token = "ZXlKaGJHY2lPaUpTVXpJMU5pSXNJbXRwWkNJNkluQjBaV1pUZWtaNWQyVm5OMmxmTTBkV2VUTlhWSFpqTmxwSmJYRmtNMWRSVEhwNVNHWllhUzFaZDNNaWZRLmV5SmhkV1FpT2xzaWFXUmxiblJwZEhrdWJEVmtMbWx2SWwwc0ltVjRjQ0k2TVRjd01EWTRPVFk1TUN3aWFXRjBJam94TnpBd05qQXpNamt3TENKcGMzTWlPaUpvZEhSd2N6b3ZMMnQxWW1WeWJtVjBaWE11WkdWbVlYVnNkQzV6ZG1NdVkyeDFjM1JsY2k1c2IyTmhiQ0lzSW10MVltVnlibVYwWlhNdWFXOGlPbnNpYm1GdFpYTndZV05sSWpvaVpXMXZhbWwyYjNSdklpd2ljRzlrSWpwN0ltNWhiV1VpT2lKM1pXSXRPRFUxT1dJNU4yWTNZeTEwYldJNU5TSXNJblZwWkNJNklqaGlZbUV5WWpsbExXTXdOVGN0TkRnMk1TMWhNalZsTFRjelpEY3dOV1EzWmpoaU1TSjlMQ0p6WlhKMmFXTmxZV05qYjNWdWRDSTZleUp1WVcxbElqb2lkMlZpSWl3aWRXbGtJam9pWm1JelpUQXlNRE10TmpZMU55MDBOMk0xTFRoa09EUXRORGt6WXpBM1lXUTJaak0zSW4xOUxDSnVZbVlpT2pFM01EQTJNRE15T1RBc0luTjFZaUk2SW5ONWMzUmxiVHB6WlhKMmFXTmxZV05qYjNWdWREcGxiVzlxYVhadmRHODZkMlZpSW4wLnlwMzAzZVZkeHhpamxBOG1wVjFObGZKUDB3SC03RmpUQl9PcWJ3NTNPeGU1cnNTcDNNNk96VWR6OFdhYS1hcjNkVVhQR2x2QXRDRVU2RjJUN1lKUFoxVmxxOUFZZTNvV2YwOXUzOWRodUU1ZDhEX21JUl9rWDUxY193am9UcVlORHA5ZzZ4ZFJNcW9reGg3NE9GNXFjaEFhRGtENUJNZVZ6a25kUWZtVVZwME5BdTdDMTZ3UFZWSlFmNlVXRGtnYkI1SW9UQXpxSmcyWlpyNXBBY3F5enJ0WE1rRkhSWmdvYUxVam5sN1FwX0ljWm8yYzJWWk03T2QzRjIwcFZaVzJvejlOdGt3THZoSEhSMkc5WlNJQ3RHRjdhTkYwNVR5ZC1UeU1BVnZMYnM0ZFl1clRYaHNORjhQMVk4RmFuNjE4d0x6ZUVMOUkzS1BJLUctUXRUNHhWdw=="; // Replace with your own CSR let csr = "MIIBWjCCAQECAQAwRjFEMEIGA1UEAxM7d2ViLmVtb2ppdm90by5zZXJ2aWNlYWNjb3VudC5pZGVudGl0eS5saW5rZXJkLmNsdXN0ZXIubG9jYWwwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATKjgVXu6F+WCda3Bbq2ue6m3z6OTMfQ4Vnmekmvirip/XGyi2HbzRzjARnIzGlG8wo4EfeYBtd2MBCb50kP8F8oFkwVwYJKoZIhvcNAQkOMUowSDBGBgNVHREEPzA9gjt3ZWIuZW1vaml2b3RvLnNlcnZpY2VhY2NvdW50LmlkZW50aXR5LmxpbmtlcmQuY2x1c3Rlci5sb2NhbDAKBggqhkjOPQQDAgNHADBEAiAM7aXY8MRs/EOhtPo4+PRHuiNOV+nsmNDv5lvtJt8T+QIgFP5JAq0iq7M6ShRNkRG99ZquJ3L3TtLWMNVTPvqvvUE="; const data = { identity: "web.emojivoto.serviceaccount.identity.linkerd.cluster.local", token: token, certificate_signing_request: csr, }; stream.write(data); // This request takes around 2ms, so this sleep will mostly determine its final duration sleep(0.5); }; ``` This results in the following report: ``` scenarios: (100.00%) 1 scenario, 50 max VUs, 1m30s max duration (incl. graceful stop): * identity: 50 looping VUs for 1m0s (gracefulStop: 30s) data_received................: 6.3 MB 104 kB/s data_sent....................: 9.4 MB 156 kB/s grpc_req_duration............: avg=2.14ms min=873.93µs med=1.9ms max=12.89ms p(90)=3.13ms p(95)=3.86ms grpc_streams.................: 6000 99.355331/s grpc_streams_msgs_received...: 6000 99.355331/s grpc_streams_msgs_sent.......: 6000 99.355331/s iteration_duration...........: avg=503.16ms min=500.8ms med=502.64ms max=532.36ms p(90)=504.05ms p(95)=505.72ms iterations...................: 6000 99.355331/s vus..........................: 50 min=50 max=50 vus_max......................: 50 min=50 max=50 running (1m00.4s), 00/50 VUs, 6000 complete and 0 interrupted iterations ``` With the old defaults (QPS=5 and Burst=10), the latencies would be much higher and number of complete requests much lower. --- charts/linkerd-control-plane/README.md | 2 ++ .../templates/identity.yaml | 2 ++ charts/linkerd-control-plane/values.yaml | 8 ++++++ ...install_controlplane_tracing_output.golden | 4 +++ cli/cmd/testdata/install_custom_domain.golden | 4 +++ .../testdata/install_custom_registry.golden | 4 +++ cli/cmd/testdata/install_default.golden | 4 +++ ...stall_default_override_dst_get_nets.golden | 4 +++ cli/cmd/testdata/install_default_token.golden | 4 +++ cli/cmd/testdata/install_ha_output.golden | 4 +++ .../install_ha_with_overrides_output.golden | 4 +++ .../install_heartbeat_disabled_output.golden | 4 +++ .../install_helm_control_plane_output.golden | 4 +++ ...nstall_helm_control_plane_output_ha.golden | 4 +++ .../install_helm_output_ha_labels.golden | 4 +++ ...l_helm_output_ha_namespace_selector.golden | 4 +++ .../testdata/install_no_init_container.golden | 4 +++ cli/cmd/testdata/install_output.golden | 4 +++ cli/cmd/testdata/install_proxy_ignores.golden | 4 +++ cli/cmd/testdata/install_values_file.golden | 4 +++ controller/cmd/identity/main.go | 10 ++++++- controller/k8s/api.go | 4 +-- controller/webhook/launcher.go | 2 +- multicluster/cmd/check.go | 4 +-- pkg/charts/linkerd2/values.go | 2 ++ pkg/k8s/api.go | 20 ++++++++++++-- pkg/prometheus/prometheus.go | 26 +++++++++++++++++-- 27 files changed, 138 insertions(+), 10 deletions(-) diff --git a/charts/linkerd-control-plane/README.md b/charts/linkerd-control-plane/README.md index 811941f095a08..4ae85d35ee2e5 100644 --- a/charts/linkerd-control-plane/README.md +++ b/charts/linkerd-control-plane/README.md @@ -175,6 +175,8 @@ Kubernetes: `>=1.21.0-0` | identity.issuer.tls | object | `{"crtPEM":"","keyPEM":""}` | Which scheme is used for the identity issuer secret format | | identity.issuer.tls.crtPEM | string | `""` | Issuer certificate (ECDSA). It must be provided during install. | | identity.issuer.tls.keyPEM | string | `""` | Key for the issuer certificate (ECDSA). It must be provided during install | +| identity.kubeAPIClientBurst | int | `200` | Burst value over kubeAPIClientQPS | +| identity.kubeAPIClientQPS | int | `100` | Maximum QPS sent to the kube-apiserver before throttling. See https://github.com/kubernetes/client-go/blob/v12.0.0/util/flowcontrol/throttle.go | | identity.serviceAccountTokenProjection | bool | `true` | Use [Service Account token Volume projection](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#service-account-token-volume-projection) for pod validation instead of the default token | | identityTrustAnchorsPEM | string | `""` | Trust root certificate (ECDSA). It must be provided during install. | | identityTrustDomain | string | clusterDomain | Trust domain used for identity | diff --git a/charts/linkerd-control-plane/templates/identity.yaml b/charts/linkerd-control-plane/templates/identity.yaml index b22357f019592..d7fbeb6fb6d28 100644 --- a/charts/linkerd-control-plane/templates/identity.yaml +++ b/charts/linkerd-control-plane/templates/identity.yaml @@ -159,6 +159,8 @@ spec: - -identity-clock-skew-allowance={{.Values.identity.issuer.clockSkewAllowance}} - -identity-scheme={{.Values.identity.issuer.scheme}} - -enable-pprof={{.Values.enablePprof | default false}} + - -kube-apiclient-qps={{.Values.identity.kubeAPIClientQPS}} + - -kube-apiclient-burst={{.Values.identity.kubeAPIClientBurst}} {{- include "partials.linkerd.trace" . | nindent 8 -}} env: - name: LINKERD_DISABLED diff --git a/charts/linkerd-control-plane/values.yaml b/charts/linkerd-control-plane/values.yaml index 0fb82cc2f3819..396ac399e5395 100644 --- a/charts/linkerd-control-plane/values.yaml +++ b/charts/linkerd-control-plane/values.yaml @@ -326,6 +326,14 @@ identity: # -- Use [Service Account token Volume projection](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#service-account-token-volume-projection) for pod validation instead of the default token serviceAccountTokenProjection: true + + # -- Maximum QPS sent to the kube-apiserver before throttling. + # See https://github.com/kubernetes/client-go/blob/v12.0.0/util/flowcontrol/throttle.go + kubeAPIClientQPS: 100 + + # -- Burst value over kubeAPIClientQPS + kubeAPIClientBurst: 200 + issuer: scheme: linkerd.io/tls diff --git a/cli/cmd/testdata/install_controlplane_tracing_output.golden b/cli/cmd/testdata/install_controlplane_tracing_output.golden index ebcd0b21dac44..06ad3b0847f9b 100644 --- a/cli/cmd/testdata/install_controlplane_tracing_output.golden +++ b/cli/cmd/testdata/install_controlplane_tracing_output.golden @@ -525,6 +525,8 @@ data: AiAtuoI5XuCtrGVRzSmRTl2ra28aV9MyTU7d5qnTAFHKSgIgRKCvluOSgA5O21p5 51tdrmkHEZRr0qlLSJdHYgEfMzk= -----END CERTIFICATE----- + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: null @@ -859,6 +861,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 - -trace-collector=collector.linkerd-jaeger.svc.cluster.local:55678 env: - name: LINKERD_DISABLED diff --git a/cli/cmd/testdata/install_custom_domain.golden b/cli/cmd/testdata/install_custom_domain.golden index 0306f1a465e7a..0f39cc5925d05 100644 --- a/cli/cmd/testdata/install_custom_domain.golden +++ b/cli/cmd/testdata/install_custom_domain.golden @@ -525,6 +525,8 @@ data: AiAtuoI5XuCtrGVRzSmRTl2ra28aV9MyTU7d5qnTAFHKSgIgRKCvluOSgA5O21p5 51tdrmkHEZRr0qlLSJdHYgEfMzk= -----END CERTIFICATE----- + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: null @@ -859,6 +861,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_custom_registry.golden b/cli/cmd/testdata/install_custom_registry.golden index cb07eb109bfa0..b1cb0bc2499c4 100644 --- a/cli/cmd/testdata/install_custom_registry.golden +++ b/cli/cmd/testdata/install_custom_registry.golden @@ -525,6 +525,8 @@ data: AiAtuoI5XuCtrGVRzSmRTl2ra28aV9MyTU7d5qnTAFHKSgIgRKCvluOSgA5O21p5 51tdrmkHEZRr0qlLSJdHYgEfMzk= -----END CERTIFICATE----- + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: null @@ -859,6 +861,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_default.golden b/cli/cmd/testdata/install_default.golden index 0306f1a465e7a..0f39cc5925d05 100644 --- a/cli/cmd/testdata/install_default.golden +++ b/cli/cmd/testdata/install_default.golden @@ -525,6 +525,8 @@ data: AiAtuoI5XuCtrGVRzSmRTl2ra28aV9MyTU7d5qnTAFHKSgIgRKCvluOSgA5O21p5 51tdrmkHEZRr0qlLSJdHYgEfMzk= -----END CERTIFICATE----- + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: null @@ -859,6 +861,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_default_override_dst_get_nets.golden b/cli/cmd/testdata/install_default_override_dst_get_nets.golden index a355cf87ffa1c..9dbb9a5e3c9c7 100644 --- a/cli/cmd/testdata/install_default_override_dst_get_nets.golden +++ b/cli/cmd/testdata/install_default_override_dst_get_nets.golden @@ -525,6 +525,8 @@ data: AiAtuoI5XuCtrGVRzSmRTl2ra28aV9MyTU7d5qnTAFHKSgIgRKCvluOSgA5O21p5 51tdrmkHEZRr0qlLSJdHYgEfMzk= -----END CERTIFICATE----- + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: null @@ -859,6 +861,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_default_token.golden b/cli/cmd/testdata/install_default_token.golden index 332d1a68874f5..32921e4fc368b 100644 --- a/cli/cmd/testdata/install_default_token.golden +++ b/cli/cmd/testdata/install_default_token.golden @@ -525,6 +525,8 @@ data: AiAtuoI5XuCtrGVRzSmRTl2ra28aV9MyTU7d5qnTAFHKSgIgRKCvluOSgA5O21p5 51tdrmkHEZRr0qlLSJdHYgEfMzk= -----END CERTIFICATE----- + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: false identityProxyResources: null identityResources: null @@ -859,6 +861,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_ha_output.golden b/cli/cmd/testdata/install_ha_output.golden index 21162d710c681..fb1df897720ac 100644 --- a/cli/cmd/testdata/install_ha_output.golden +++ b/cli/cmd/testdata/install_ha_output.golden @@ -543,6 +543,8 @@ data: AiAtuoI5XuCtrGVRzSmRTl2ra28aV9MyTU7d5qnTAFHKSgIgRKCvluOSgA5O21p5 51tdrmkHEZRr0qlLSJdHYgEfMzk= -----END CERTIFICATE----- + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: @@ -930,6 +932,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_ha_with_overrides_output.golden b/cli/cmd/testdata/install_ha_with_overrides_output.golden index f1dcae1f530f1..389a60a8c5955 100644 --- a/cli/cmd/testdata/install_ha_with_overrides_output.golden +++ b/cli/cmd/testdata/install_ha_with_overrides_output.golden @@ -543,6 +543,8 @@ data: AiAtuoI5XuCtrGVRzSmRTl2ra28aV9MyTU7d5qnTAFHKSgIgRKCvluOSgA5O21p5 51tdrmkHEZRr0qlLSJdHYgEfMzk= -----END CERTIFICATE----- + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: @@ -930,6 +932,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_heartbeat_disabled_output.golden b/cli/cmd/testdata/install_heartbeat_disabled_output.golden index d0ced48d3849f..994a0440d055e 100644 --- a/cli/cmd/testdata/install_heartbeat_disabled_output.golden +++ b/cli/cmd/testdata/install_heartbeat_disabled_output.golden @@ -456,6 +456,8 @@ data: AiAtuoI5XuCtrGVRzSmRTl2ra28aV9MyTU7d5qnTAFHKSgIgRKCvluOSgA5O21p5 51tdrmkHEZRr0qlLSJdHYgEfMzk= -----END CERTIFICATE----- + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: null @@ -790,6 +792,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_helm_control_plane_output.golden b/cli/cmd/testdata/install_helm_control_plane_output.golden index 9ac6c65276378..66ec93eed187b 100644 --- a/cli/cmd/testdata/install_helm_control_plane_output.golden +++ b/cli/cmd/testdata/install_helm_control_plane_output.golden @@ -514,6 +514,8 @@ data: scheme: linkerd.io/tls tls: crtPEM: test-crt-pem + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: null @@ -832,6 +834,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_helm_control_plane_output_ha.golden b/cli/cmd/testdata/install_helm_control_plane_output_ha.golden index 66f3ef2a3962a..1d044871bed70 100644 --- a/cli/cmd/testdata/install_helm_control_plane_output_ha.golden +++ b/cli/cmd/testdata/install_helm_control_plane_output_ha.golden @@ -532,6 +532,8 @@ data: scheme: linkerd.io/tls tls: crtPEM: test-crt-pem + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: @@ -903,6 +905,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_helm_output_ha_labels.golden b/cli/cmd/testdata/install_helm_output_ha_labels.golden index f976a0a4e19b6..946b6b4ef05e4 100644 --- a/cli/cmd/testdata/install_helm_output_ha_labels.golden +++ b/cli/cmd/testdata/install_helm_output_ha_labels.golden @@ -532,6 +532,8 @@ data: scheme: linkerd.io/tls tls: crtPEM: test-crt-pem + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: @@ -911,6 +913,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_helm_output_ha_namespace_selector.golden b/cli/cmd/testdata/install_helm_output_ha_namespace_selector.golden index f6d39a2a3d938..0188dddc8dec9 100644 --- a/cli/cmd/testdata/install_helm_output_ha_namespace_selector.golden +++ b/cli/cmd/testdata/install_helm_output_ha_namespace_selector.golden @@ -527,6 +527,8 @@ data: scheme: linkerd.io/tls tls: crtPEM: test-crt-pem + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: @@ -893,6 +895,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_no_init_container.golden b/cli/cmd/testdata/install_no_init_container.golden index fcea6e8714f69..7370f235f9e5a 100644 --- a/cli/cmd/testdata/install_no_init_container.golden +++ b/cli/cmd/testdata/install_no_init_container.golden @@ -525,6 +525,8 @@ data: AiAtuoI5XuCtrGVRzSmRTl2ra28aV9MyTU7d5qnTAFHKSgIgRKCvluOSgA5O21p5 51tdrmkHEZRr0qlLSJdHYgEfMzk= -----END CERTIFICATE----- + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: null @@ -859,6 +861,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_output.golden b/cli/cmd/testdata/install_output.golden index 9d14e1d1a4174..651c0c0dcde0c 100644 --- a/cli/cmd/testdata/install_output.golden +++ b/cli/cmd/testdata/install_output.golden @@ -518,6 +518,8 @@ data: AiAtuoI5XuCtrGVRzSmRTl2ra28aV9MyTU7d5qnTAFHKSgIgRKCvluOSgA5O21p5 51tdrmkHEZRr0qlLSJdHYgEfMzk= -----END CERTIFICATE----- + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: null @@ -835,6 +837,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_proxy_ignores.golden b/cli/cmd/testdata/install_proxy_ignores.golden index 81dee954ee422..7bca2bbfbbba2 100644 --- a/cli/cmd/testdata/install_proxy_ignores.golden +++ b/cli/cmd/testdata/install_proxy_ignores.golden @@ -525,6 +525,8 @@ data: AiAtuoI5XuCtrGVRzSmRTl2ra28aV9MyTU7d5qnTAFHKSgIgRKCvluOSgA5O21p5 51tdrmkHEZRr0qlLSJdHYgEfMzk= -----END CERTIFICATE----- + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: null @@ -859,6 +861,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/cli/cmd/testdata/install_values_file.golden b/cli/cmd/testdata/install_values_file.golden index 6451a20bfd251..96e299a2fc4ef 100644 --- a/cli/cmd/testdata/install_values_file.golden +++ b/cli/cmd/testdata/install_values_file.golden @@ -525,6 +525,8 @@ data: AiAtuoI5XuCtrGVRzSmRTl2ra28aV9MyTU7d5qnTAFHKSgIgRKCvluOSgA5O21p5 51tdrmkHEZRr0qlLSJdHYgEfMzk= -----END CERTIFICATE----- + kubeAPIClientBurst: 200 + kubeAPIClientQPS: 100 serviceAccountTokenProjection: true identityProxyResources: null identityResources: null @@ -859,6 +861,8 @@ spec: - -identity-clock-skew-allowance=20s - -identity-scheme=linkerd.io/tls - -enable-pprof=false + - -kube-apiclient-qps=100 + - -kube-apiclient-burst=200 env: - name: LINKERD_DISABLED value: "linkerd-await cannot block the identity controller" diff --git a/controller/cmd/identity/main.go b/controller/cmd/identity/main.go index 465dfe941de1b..8c12421158e87 100644 --- a/controller/cmd/identity/main.go +++ b/controller/cmd/identity/main.go @@ -40,6 +40,8 @@ func Main(args []string) { identityIssuanceLifeTime := cmd.String("identity-issuance-lifetime", "", "the amount of time for which the Identity issuer should certify identity") identityClockSkewAllowance := cmd.String("identity-clock-skew-allowance", "", "the amount of time to allow for clock skew within a Linkerd cluster") enablePprof := cmd.Bool("enable-pprof", false, "Enable pprof endpoints on the admin server") + qps := cmd.Float64("kube-apiclient-qps", 100, "Maximum QPS sent to the kube-apiserver before throttling") + burst := cmd.Int("kube-apiclient-burst", 200, "Burst value over kube-apiclient-qps") issuerPath := cmd.String("issuer", "/var/run/linkerd/identity/issuer", @@ -137,10 +139,16 @@ func Main(args []string) { // // Create k8s API // - k8sAPI, err := k8s.NewAPI(*kubeConfigPath, "", "", []string{}, 0) + config, err := k8s.GetConfig(*kubeConfigPath, "") + if err != nil { + log.Fatalf("Error configuring Kubernetes API client: %s", err) + } + k8sAPI, err := k8s.NewAPIForConfig(config, "", []string{}, 0, float32(*qps), *burst) if err != nil { log.Fatalf("Failed to load kubeconfig: %s: %s", *kubeConfigPath, err) } + log.Infof("Using k8s client with QPS=%.2f Burst=%d", config.QPS, config.Burst) + v, err := idctl.NewK8sTokenValidator(ctx, k8sAPI, dom) if err != nil { log.Fatalf("Failed to initialize identity service: %s", err) diff --git a/controller/k8s/api.go b/controller/k8s/api.go index 64902e31aeff8..e3f800359c419 100644 --- a/controller/k8s/api.go +++ b/controller/k8s/api.go @@ -79,7 +79,7 @@ func InitializeAPI(ctx context.Context, kubeConfig string, ensureClusterWideAcce return nil, err } - k8sClient, err := k8s.NewAPIForConfig(config, "", []string{}, 0) + k8sClient, err := k8s.NewAPIForConfig(config, "", []string{}, 0, 0, 0) if err != nil { return nil, err } @@ -89,7 +89,7 @@ func InitializeAPI(ctx context.Context, kubeConfig string, ensureClusterWideAcce // InitializeAPIForConfig creates Kubernetes clients and returns an initialized API wrapper. func InitializeAPIForConfig(ctx context.Context, kubeConfig *rest.Config, ensureClusterWideAccess bool, cluster string, resources ...APIResource) (*API, error) { - k8sClient, err := k8s.NewAPIForConfig(kubeConfig, "", []string{}, 0) + k8sClient, err := k8s.NewAPIForConfig(kubeConfig, "", []string{}, 0, 0, 0) if err != nil { return nil, err } diff --git a/controller/webhook/launcher.go b/controller/webhook/launcher.go index c653cb62b7122..2dade0cd21d4d 100644 --- a/controller/webhook/launcher.go +++ b/controller/webhook/launcher.go @@ -44,7 +44,7 @@ func Launch( log.Fatalf("error building Kubernetes API config: %s", err) } - k8sAPI, err := pkgk8s.NewAPIForConfig(config, "", []string{}, 0) + k8sAPI, err := pkgk8s.NewAPIForConfig(config, "", []string{}, 0, 0, 0) if err != nil { //nolint:gocritic log.Fatalf("error configuring Kubernetes API client: %s", err) diff --git a/multicluster/cmd/check.go b/multicluster/cmd/check.go index 46624569a2d9d..622ef9cbe738c 100644 --- a/multicluster/cmd/check.go +++ b/multicluster/cmd/check.go @@ -352,7 +352,7 @@ func (hc *healthChecker) checkRemoteClusterConnectivity(ctx context.Context) err errors = append(errors, fmt.Errorf("* secret: [%s/%s] cluster: [%s]: unable to parse api config: %w", secret.Namespace, secret.Name, link.TargetClusterName, err)) continue } - remoteAPI, err := k8s.NewAPIForConfig(clientConfig, "", []string{}, healthcheck.RequestTimeout) + remoteAPI, err := k8s.NewAPIForConfig(clientConfig, "", []string{}, healthcheck.RequestTimeout, 0, 0) if err != nil { errors = append(errors, fmt.Errorf("* secret: [%s/%s] cluster: [%s]: could not instantiate api for target cluster: %w", secret.Namespace, secret.Name, link.TargetClusterName, err)) continue @@ -400,7 +400,7 @@ func (hc *healthChecker) checkRemoteClusterAnchors(ctx context.Context, localAnc errors = append(errors, fmt.Sprintf("* secret: [%s/%s] cluster: [%s]: unable to parse api config: %s", secret.Namespace, secret.Name, link.TargetClusterName, err)) continue } - remoteAPI, err := k8s.NewAPIForConfig(clientConfig, "", []string{}, healthcheck.RequestTimeout) + remoteAPI, err := k8s.NewAPIForConfig(clientConfig, "", []string{}, healthcheck.RequestTimeout, 0, 0) if err != nil { errors = append(errors, fmt.Sprintf("* secret: [%s/%s] cluster: [%s]: could not instantiate api for target cluster: %s", secret.Namespace, secret.Name, link.TargetClusterName, err)) continue diff --git a/pkg/charts/linkerd2/values.go b/pkg/charts/linkerd2/values.go index 791318a45a61b..e73ec08fda3c7 100644 --- a/pkg/charts/linkerd2/values.go +++ b/pkg/charts/linkerd2/values.go @@ -231,6 +231,8 @@ type ( Identity struct { ExternalCA bool `json:"externalCA"` ServiceAccountTokenProjection bool `json:"serviceAccountTokenProjection"` + KubeAPIClientQPS float32 `json:"kubeAPIClientQPS"` + KubeAPIClientBurst int `json:"kubeAPIClientBurst"` Issuer *Issuer `json:"issuer"` } diff --git a/pkg/k8s/api.go b/pkg/k8s/api.go index c555155dc1764..85a4ac22920a1 100644 --- a/pkg/k8s/api.go +++ b/pkg/k8s/api.go @@ -50,17 +50,33 @@ func NewAPI(configPath, kubeContext string, impersonate string, impersonateGroup if err != nil { return nil, fmt.Errorf("error configuring Kubernetes API client: %w", err) } - return NewAPIForConfig(config, impersonate, impersonateGroup, timeout) + return NewAPIForConfig(config, impersonate, impersonateGroup, timeout, 0, 0) } // NewAPIForConfig uses a Kubernetes config to construct a client for accessing // the configured cluster -func NewAPIForConfig(config *rest.Config, impersonate string, impersonateGroup []string, timeout time.Duration) (*KubernetesAPI, error) { +func NewAPIForConfig( + config *rest.Config, + impersonate string, + impersonateGroup []string, + timeout time.Duration, + qps float32, + burst int, +) (*KubernetesAPI, error) { // k8s' client-go doesn't support injecting context // https://github.com/kubernetes/kubernetes/issues/46503 // but we can set the timeout manually config.Timeout = timeout + if qps > 0 && burst > 0 { + config.QPS = qps + config.Burst = burst + prometheus.SetClientQPS("k8s", config.QPS) + prometheus.SetClientBurst("k8s", config.Burst) + } else { + prometheus.SetClientQPS("k8s", rest.DefaultQPS) + prometheus.SetClientBurst("k8s", rest.DefaultBurst) + } wt := config.WrapTransport config.WrapTransport = prometheus.ClientWithTelemetry("k8s", wt) diff --git a/pkg/prometheus/prometheus.go b/pkg/prometheus/prometheus.go index d883cbe71d373..27c6fe4dbc401 100644 --- a/pkg/prometheus/prometheus.go +++ b/pkg/prometheus/prometheus.go @@ -78,12 +78,26 @@ var ( }, []string{"client"}, ) + clientQPS = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "http_client_qps", + Help: "Max QPS used for the client config.", + }, + []string{"client"}, + ) + clientBurst = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "http_client_burst", + Help: "Burst used for the client config.", + }, + []string{"client"}, + ) ) func init() { prometheus.MustRegister( - serverCounter, serverLatency, serverResponseSize, - clientCounter, clientLatency, clientInFlight, + serverCounter, serverLatency, serverResponseSize, clientCounter, + clientLatency, clientInFlight, clientQPS, clientBurst, ) } @@ -127,3 +141,11 @@ func ClientWithTelemetry(name string, wt func(http.RoundTripper) http.RoundTripp ) } } + +func SetClientQPS(name string, qps float32) { + clientQPS.With(prometheus.Labels{"client": name}).Set(float64(qps)) +} + +func SetClientBurst(name string, burst int) { + clientBurst.With(prometheus.Labels{"client": name}).Set(float64(burst)) +}