Merge pull request #53 from ksrt12/main

feat: add podTopologySpreadConstraints and deployment annotation
blind-oracle · Nov 13, 2023 · 10d6c2d · 10d6c2d
2 parents 1f3c391 + 92d2564
commit 10d6c2d
Show file tree

Hide file tree

Showing 3 changed files with 148 additions and 100 deletions.
diff --git a/deploy/k8s/chart/README.md b/deploy/k8s/chart/README.md
@@ -8,51 +8,52 @@ A Helm Chart for cortex-tenant
 
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
-| affinity | object | `{}` |  |
-| autoscaling.enabled | bool | `true` |  |
-| autoscaling.maxReplica | int | `3` |  |
-| autoscaling.minReplica | int | `1` |  |
-| autoscaling.targetCPUUtilizationPercentage | int | `50` |  |
-| autoscaling.targetMemoryAverageValue | string | `"100Mi"` |  |
-| config.auth.enabled | bool | `false` |  |
-| config.auth.existingSecret | string | `nil` |  |
-| config.auth.password | string | `nil` |  |
-| config.auth.username | string | `nil` |  |
-| config.concurrency | int | `1000` |  |
-| config.enable_ipv6 | bool | `false` |  |
-| config.listen | string | `"0.0.0.0:8080"` |  |
-| config.listen_pprof | string | `"0.0.0.0:7008"` |  |
-| config.log_level | string | `"warn"` |  |
-| config.log_response_errors | bool | `true` |  |
-| config.max_connection_duration | string | `"0s"` |  |
-| config.metadata | bool | `false` |  |
-| config.target | string | `"http://cortex-distributor.cortex.svc:8080/api/v1/push"` |  |
-| config.tenant.accept_all | bool | `false` |  |
-| config.tenant.default | string | `"cortex-tenant-default"` |  |
-| config.tenant.header | string | `"X-Scope-OrgID"` |  |
-| config.tenant.label | string | `"tenant"` |  |
-| config.tenant.label_remove | bool | `false` |  |
-| config.tenant.prefix | string | `""` |  |
-| config.timeout | string | `"10s"` |  |
-| config.timeout_shutdown | string | `"10s"` |  |
-| envs | string | `nil` |  |
-| fullnameOverride | string | `nil` |  |
-| image.pullPolicy | string | `"IfNotPresent"` |  |
-| image.repository | string | `"ghcr.io/blind-oracle/cortex-tenant"` |  |
-| image.tag | string | `""` |  |
-| nameOverride | string | `nil` |  |
-| nodeSelector | object | `{}` |  |
-| podAnnotations | object | `{}` |  |
-| podDisruptionBudget.enabled | bool | `true` |  |
-| podDisruptionBudget.minAvailable | int | `1` |  |
-| podSecurityContext | object | `{}` |  |
-| resources.limits.memory | string | `"256Mi"` |  |
-| resources.requests.cpu | string | `"100m"` |  |
-| resources.requests.memory | string | `"128Mi"` |  |
-| securityContext | object | `{}` |  |
-| service.port | int | `8080` |  |
+| affinity | object | `{}` | [Affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity) |
+| annotations | object | `{}` | Annotations for deployment |
+| autoscaling.enabled | bool | `true` | If enabled, HorizontalPodAutoscaler resources are created |
+| autoscaling.maxReplica | int | `3` | Max number of pod replica autoscaled |
+| autoscaling.minReplica | int | `1` | Min number of pod replica autoscaled |
+| autoscaling.targetCPUUtilizationPercentage | int | `50` | Target CPU utilization percentage for autoscaling |
+| autoscaling.targetMemoryAverageValue | string | `"100Mi"` | Target memory average value for autoscaling |
+| config.auth.enabled | bool | `false` | Egress HTTP basic auth -> add `Authentication` header to outgoing requests |
+| config.auth.existingSecret | string | `nil` | Secret should pass the `CT_AUTH_EGRESS_USERNAME` and `CT_AUTH_EGRESS_PASSWORD` env variables |
+| config.auth.password | string | `nil` | Password (env: `CT_AUTH_EGRESS_PASSWORD`) |
+| config.auth.username | string | `nil` | Username (env: `CT_AUTH_EGRESS_USERNAME`) |
+| config.concurrency | int | `1000` | Max number of parallel incoming HTTP requests to handle (env: `CT_CONCURRENCY`) |
+| config.enable_ipv6 | bool | `false` | Whether to enable querying for IPv6 records (env: `CT_ENABLE_IPV6`) |
+| config.listen | string | `"0.0.0.0:8080"` | Where to listen for incoming write requests from Prometheus (env: `CT_LISTEN`) |
+| config.listen_pprof | string | `"0.0.0.0:7008"` | Profiling API, leave empty to disable (env: `CT_LISTEN_PPROF`) |
+| config.log_level | string | `"warn"` | Log level (env: `CT_LOG_LEVEL`) |
+| config.log_response_errors | bool | `true` | If true response codes from metrics backend will be logged to stdout. This setting can be used to suppress errors which can be quite verbose like 400 code - out-of-order samples or 429 on hitting ingestion limits Also, those are already reported by other services like Cortex/Mimir distributors and ingesters (env: `CT_LOG_RESPONSE_ERRORS`) |
+| config.max_connection_duration | string | `"0s"` | Maximum duration to keep outgoing connections alive (to Cortex/Mimir) Useful for resetting L4 load-balancer state Use 0 to keep them indefinitely (env: `CT_MAX_CONN_DURATION`) |
+| config.metadata | bool | `false` | Whether to forward metrics metadata from Prometheus to Cortex Since metadata requests have no timeseries in them - we cannot divide them into tenants So the metadata requests will be sent to the default tenant only, if one is not defined - they will be dropped (env: `CT_METADATA`) |
+| config.target | string | `"http://cortex-distributor.cortex.svc:8080/api/v1/push"` | Where to send the modified requests (Cortex) (env: `CT_TARGET`) |
+| config.tenant.accept_all | bool | `false` | Enable if you want all metrics from Prometheus to be accepted with a 204 HTTP code regardless of the response from Cortex. This can lose metrics if Cortex is throwing rejections. (env: `CT_TENANT_ACCEPT_ALL`) |
+| config.tenant.default | string | `"cortex-tenant-default"` | Which tenant ID to use if the label is missing in any of the timeseries If this is not set or empty then the write request with missing tenant label will be rejected with HTTP code 400 (env: `CT_TENANT_DEFAULT`) |
+| config.tenant.header | string | `"X-Scope-OrgID"` | To which header to add the tenant ID (env: `CT_TENANT_HEADER`) |
+| config.tenant.label | string | `"tenant"` | Which label to look for the tenant information (env: `CT_TENANT_LABEL`) |
+| config.tenant.label_remove | bool | `false` | Whether to remove the tenant label from the request (env: `CT_TENANT_LABEL_REMOVE`) |
+| config.tenant.prefix | string | `""` | Optional hard-coded prefix with delimeter for all tenant values. Delimeters allowed for use: https://grafana.com/docs/mimir/latest/configure/about-tenant-ids/ (env: `CT_TENANT_PREFIX`) |
+| config.timeout | string | `"10s"` | HTTP request timeout (env: `CT_TIMEOUT`) |
+| config.timeout_shutdown | string | `"10s"` | Timeout to wait on shutdown to allow load balancers detect that we're going away. During this period after the shutdown command the /alive endpoint will reply with HTTP 503. Set to 0s to disable. (env: `CT_TIMEOUT_SHUTDOWN`) |
+| envs | list | `[]` | Additional environment variables |
+| fullnameOverride | string | `nil` | Application fullname override |
+| image.pullPolicy | string | `"IfNotPresent"` | Policy when pulling images |
+| image.repository | string | `"ghcr.io/blind-oracle/cortex-tenant"` | Repository to pull the image |
+| image.tag | string | `""` | Overrides the image tag (default is `.Chart.appVersion`) |
+| nameOverride | string | `nil` | Application name override |
+| nodeSelector | object | `{}` | [Node Selection](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) |
+| podAnnotations | object | `{}` | Annotations for pods |
+| podDisruptionBudget.enabled | bool | `true` | If enabled, PodDisruptionBudget resources are created |
+| podDisruptionBudget.minAvailable | int | `1` | Minimum number of pods that must remain scheduled |
+| podSecurityContext | object | `{}` | [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context) |
+| podTopologySpreadConstraints | list | `[]` | [Pod Topology Spread Constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/) |
+| resources.limits | object | `{"memory":"256Mi"}` | Resources limits |
+| resources.requests | object | `{"cpu":"100m","memory":"128Mi"}` | Resources requests |
+| securityContext | object | `{}` | [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context) |
+| service.port | int | `8080` | The port on which the service listens for traffic |
 | service.targetPort | int | `8080` |  |
-| service.type | string | `"ClusterIP"` |  |
+| service.type | string | `"ClusterIP"` | The type of service |
 | serviceMonitor.annotations | object | `{}` | ServiceMonitor annotations |
 | serviceMonitor.enabled | bool | `false` | If enabled, ServiceMonitor resources for Prometheus Operator are created |
 | serviceMonitor.interval | string | `nil` | ServiceMonitor scrape interval |
@@ -65,5 +66,9 @@ A Helm Chart for cortex-tenant
 | serviceMonitor.scheme | string | `"http"` | ServiceMonitor will use http by default, but you can pick https as well |
 | serviceMonitor.scrapeTimeout | string | `nil` | ServiceMonitor scrape timeout in Go duration format (e.g. 15s) |
 | serviceMonitor.targetLabels | list | `[]` | ServiceMonitor will add labels from the service to the Prometheus metric https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitorspec |
+| serviceMonitor.targetPort | int | `9090` |  |
 | serviceMonitor.tlsConfig | string | `nil` | ServiceMonitor will use these tlsConfig settings to make the health check requests |
-| tolerations | list | `[]` |  |
+| tolerations | list | `[]` | [Taints and Tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) |
+
+----------------------------------------------
+Autogenerated from chart metadata using [helm-docs v1.11.2](https://github.com/norwoodj/helm-docs/releases/v1.11.2)
diff --git a/deploy/k8s/chart/templates/deployment.yaml b/deploy/k8s/chart/templates/deployment.yaml
@@ -4,6 +4,10 @@ metadata:
   labels:
     {{- include "cortex-tenant.labels" . | nindent 4 }}
   name: {{ include "cortex-tenant.fullname" . }}
+  {{- with .Values.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
 spec:
   selector:
     matchLabels:
@@ -12,7 +16,7 @@ spec:
     metadata:
       annotations:
         {{- with .Values.podAnnotations }}
-        {{- toYaml .Values.podAnnotations  | nindent 8 }}
+        {{- toYaml . | nindent 8 }}
         {{- end }}
       labels:
         {{- include "cortex-tenant.selectorLabels" . | nindent 8 }}
@@ -67,3 +71,7 @@ spec:
       tolerations:
         {{- toYaml . | nindent 8 }}
       {{- end }}
+      {{- with .Values.podTopologySpreadConstraints }}
+      topologySpreadConstraints:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
diff --git a/deploy/k8s/chart/values.yaml b/deploy/k8s/chart/values.yaml
@@ -1,128 +1,163 @@
+# -- Application name override
 nameOverride:
+# -- Application fullname override
 fullnameOverride:
 
 image:
-  repository: ghcr.io/blind-oracle/cortex-tenant # registry to pull
-  pullPolicy: IfNotPresent # policy when pulling images
-  tag: "" # Overrides the image tag (default is `.Chart.appVersion`)
+  # -- Repository to pull the image
+  repository: ghcr.io/blind-oracle/cortex-tenant
+  # -- Policy when pulling images
+  pullPolicy: IfNotPresent
+  # -- Overrides the image tag (default is `.Chart.appVersion`)
+  tag: ""
 
 service:
+  # -- The type of service
   type: ClusterIP
+  # -- The port on which the service listens for traffic
   port: 8080
+  # The target port to which traffic is forwarded
   targetPort: 8080
 
 autoscaling:
-  enabled: true # If HorizontalPodAutoscaler must be enabled
-  minReplica: 1 # Min number of pod replica autoscaled
-  maxReplica: 3 # Max number of pod replica autoscaled
+  # -- If enabled, HorizontalPodAutoscaler resources are created
+  enabled: true
+  # -- Min number of pod replica autoscaled
+  minReplica: 1
+  # -- Max number of pod replica autoscaled
+  maxReplica: 3
+  # -- Target memory average value for autoscaling
   targetMemoryAverageValue: 100Mi
+  # -- Target CPU utilization percentage for autoscaling
   targetCPUUtilizationPercentage: 50
 
+# -- Additional environment variables
 envs:
+  []
+  # - name: CT_LISTEN
+  #   value:
 
 config:
-  # Where to listen for incoming write requests from Prometheus
-  # env: CT_LISTEN
+  # -- Where to listen for incoming write requests from Prometheus
+  # (env: `CT_LISTEN`)
   listen: 0.0.0.0:8080
-  # Profiling API, leave empty to disable
-  # env: CT_LISTEN_PPROF
+  # -- Profiling API, leave empty to disable
+  # (env: `CT_LISTEN_PPROF`)
   listen_pprof: 0.0.0.0:7008
-  # Where to send the modified requests (Cortex)
-  # env: CT_TARGET
+  # -- Where to send the modified requests (Cortex)
+  # (env: `CT_TARGET`)
   target: http://cortex-distributor.cortex.svc:8080/api/v1/push
-  # Whether to enable querying for IPv6 records
-  # env: CT_ENABLE_IPV6
+  # -- Whether to enable querying for IPv6 records
+  # (env: `CT_ENABLE_IPV6`)
   enable_ipv6: false
-  # Log level
-  # env: CT_LOG_LEVEL
+  # -- Log level
+  # (env: `CT_LOG_LEVEL`)
   log_level: warn
-  # HTTP request timeout
-  # env: CT_TIMEOUT
+  # -- HTTP request timeout
+  # (env: `CT_TIMEOUT`)
   timeout: 10s
-  # Timeout to wait on shutdown to allow load balancers detect that we're going away.
+  # -- Timeout to wait on shutdown to allow load balancers detect that we're going away.
   # During this period after the shutdown command the /alive endpoint will reply with HTTP 503.
   # Set to 0s to disable.
-  # env: CT_TIMEOUT_SHUTDOWN
+  # (env: `CT_TIMEOUT_SHUTDOWN`)
   timeout_shutdown: 10s
-  # Max number of parallel incoming HTTP requests to handle
-  # env: CT_CONCURRENCY
+  # -- Max number of parallel incoming HTTP requests to handle
+  # (env: `CT_CONCURRENCY`)
   concurrency: 1000
-  # Whether to forward metrics metadata from Prometheus to Cortex
+  # -- Whether to forward metrics metadata from Prometheus to Cortex
   # Since metadata requests have no timeseries in them - we cannot divide them into tenants
   # So the metadata requests will be sent to the default tenant only, if one is not defined - they will be dropped
-  # env: CT_METADATA
+  # (env: `CT_METADATA`)
   metadata: false
-  # If true response codes from metrics backend will be logged to stdout. This setting can be used to suppress errors
+  # -- If true response codes from metrics backend will be logged to stdout. This setting can be used to suppress errors
   # which can be quite verbose like 400 code - out-of-order samples or 429 on hitting ingestion limits
   # Also, those are already reported by other services like Cortex/Mimir distributors and ingesters
-  # env: CT_LOG_RESPONSE_ERRORS
+  # (env: `CT_LOG_RESPONSE_ERRORS`)
   log_response_errors: true
-  # Maximum duration to keep outgoing connections alive (to Cortex/Mimir)
+  # -- Maximum duration to keep outgoing connections alive (to Cortex/Mimir)
   # Useful for resetting L4 load-balancer state
   # Use 0 to keep them indefinitely
-  # env: CT_MAX_CONN_DURATION
+  # (env: `CT_MAX_CONN_DURATION`)
   max_connection_duration: 0s
 
   # Authentication (optional)
   auth:
-    # Egress HTTP basic auth -> add `Authentication` header to outgoing requests
+    # -- Egress HTTP basic auth -> add `Authentication` header to outgoing requests
     enabled: false
-    # env: CT_AUTH_EGRESS_USERNAME
-    # env: CT_AUTH_EGRESS_PASSWORD
+    # -- Username
+    # (env: `CT_AUTH_EGRESS_USERNAME`)
     username:
+    # -- Password
+    # (env: `CT_AUTH_EGRESS_PASSWORD`)
     password:
-    # Secret should pass the CT_AUTH_EGRESS_USERNAME and CT_AUTH_EGRESS_PASSWORD env variables
+    # -- Secret should pass the `CT_AUTH_EGRESS_USERNAME` and `CT_AUTH_EGRESS_PASSWORD` env variables
     existingSecret:
 
   tenant:
-    # Which label to look for the tenant information
-    # env: CT_TENANT_LABEL
+    # -- Which label to look for the tenant information
+    # (env: `CT_TENANT_LABEL`)
     label: tenant
-    # Optional hard-coded prefix with delimeter for all tenant values.
+    # -- Optional hard-coded prefix with delimeter for all tenant values.
     # Delimeters allowed for use:
     # https://grafana.com/docs/mimir/latest/configure/about-tenant-ids/
-    # env: CT_TENANT_PREFIX
+    # (env: `CT_TENANT_PREFIX`)
     prefix: ""
-    # Whether to remove the tenant label from the request
-    # env: CT_TENANT_LABEL_REMOVE
+    # -- Whether to remove the tenant label from the request
+    # (env: `CT_TENANT_LABEL_REMOVE`)
     label_remove: false
-    # To which header to add the tenant ID
-    # env: CT_TENANT_HEADER
+    # -- To which header to add the tenant ID
+    # (env: `CT_TENANT_HEADER`)
     header: X-Scope-OrgID
-    # Which tenant ID to use if the label is missing in any of the timeseries
+    # -- Which tenant ID to use if the label is missing in any of the timeseries
     # If this is not set or empty then the write request with missing tenant label
     # will be rejected with HTTP code 400
-    # env: CT_TENANT_DEFAULT
+    # (env: `CT_TENANT_DEFAULT`)
     default: cortex-tenant-default
-    # Enable if you want all metrics from Prometheus to be accepted with a 204 HTTP code
+    # -- Enable if you want all metrics from Prometheus to be accepted with a 204 HTTP code
     # regardless of the response from Cortex. This can lose metrics if Cortex is
     # throwing rejections.
-    # env: CT_TENANT_ACCEPT_ALL
+    # (env: `CT_TENANT_ACCEPT_ALL`)
     accept_all: false
 
-resources: # Resource limits and requests for simu
+resources:
+  # -- Resources limits
   limits:
     # cpu: 100m
     memory: 256Mi
+  # -- Resources requests
   requests:
     cpu: 100m
     memory: 128Mi
 
 podDisruptionBudget:
-  enabled: true # If Pod disruption must be enabled
-  minAvailable: 1 # Number of min pods that must remain available
+  # -- If enabled, PodDisruptionBudget resources are created
+  enabled: true
+  # -- Minimum number of pods that must remain scheduled
+  minAvailable: 1
 
-podAnnotations: {} # Annotations for pods
+# -- Annotations for deployment
+annotations: {}
 
-podSecurityContext: {} # [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context)
+# -- Annotations for pods
+podAnnotations: {}
 
-securityContext: {} # [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context)
+# -- [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context)
+podSecurityContext: {}
 
-nodeSelector: {} # [Node Selection](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node)
+# -- [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context)
+securityContext: {}
 
-tolerations: [] # [Taints and Tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/)
+# -- [Node Selection](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node)
+nodeSelector: {}
 
-affinity: {} # [Node Selection](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node)
+# -- [Taints and Tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/)
+tolerations: []
+
+# -- [Affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity)
+affinity: {}
+
+# -- [Pod Topology Spread Constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/)
+podTopologySpreadConstraints: []
 
 # ServiceMonitor configuration
 serviceMonitor:
@@ -163,4 +198,4 @@ serviceMonitor:
     enabled: false
     additionalLabels: {}
     # namespace:
-    rules: []
+    rules: []