Skip to content

Commit

Permalink
Merge pull request #53 from ksrt12/main
Browse files Browse the repository at this point in the history
feat: add podTopologySpreadConstraints and deployment annotation
  • Loading branch information
blind-oracle authored Nov 13, 2023
2 parents 1f3c391 + 92d2564 commit 10d6c2d
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 100 deletions.
95 changes: 50 additions & 45 deletions deploy/k8s/chart/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,51 +8,52 @@ A Helm Chart for cortex-tenant

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| affinity | object | `{}` | |
| autoscaling.enabled | bool | `true` | |
| autoscaling.maxReplica | int | `3` | |
| autoscaling.minReplica | int | `1` | |
| autoscaling.targetCPUUtilizationPercentage | int | `50` | |
| autoscaling.targetMemoryAverageValue | string | `"100Mi"` | |
| config.auth.enabled | bool | `false` | |
| config.auth.existingSecret | string | `nil` | |
| config.auth.password | string | `nil` | |
| config.auth.username | string | `nil` | |
| config.concurrency | int | `1000` | |
| config.enable_ipv6 | bool | `false` | |
| config.listen | string | `"0.0.0.0:8080"` | |
| config.listen_pprof | string | `"0.0.0.0:7008"` | |
| config.log_level | string | `"warn"` | |
| config.log_response_errors | bool | `true` | |
| config.max_connection_duration | string | `"0s"` | |
| config.metadata | bool | `false` | |
| config.target | string | `"http://cortex-distributor.cortex.svc:8080/api/v1/push"` | |
| config.tenant.accept_all | bool | `false` | |
| config.tenant.default | string | `"cortex-tenant-default"` | |
| config.tenant.header | string | `"X-Scope-OrgID"` | |
| config.tenant.label | string | `"tenant"` | |
| config.tenant.label_remove | bool | `false` | |
| config.tenant.prefix | string | `""` | |
| config.timeout | string | `"10s"` | |
| config.timeout_shutdown | string | `"10s"` | |
| envs | string | `nil` | |
| fullnameOverride | string | `nil` | |
| image.pullPolicy | string | `"IfNotPresent"` | |
| image.repository | string | `"ghcr.io/blind-oracle/cortex-tenant"` | |
| image.tag | string | `""` | |
| nameOverride | string | `nil` | |
| nodeSelector | object | `{}` | |
| podAnnotations | object | `{}` | |
| podDisruptionBudget.enabled | bool | `true` | |
| podDisruptionBudget.minAvailable | int | `1` | |
| podSecurityContext | object | `{}` | |
| resources.limits.memory | string | `"256Mi"` | |
| resources.requests.cpu | string | `"100m"` | |
| resources.requests.memory | string | `"128Mi"` | |
| securityContext | object | `{}` | |
| service.port | int | `8080` | |
| affinity | object | `{}` | [Affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity) |
| annotations | object | `{}` | Annotations for deployment |
| autoscaling.enabled | bool | `true` | If enabled, HorizontalPodAutoscaler resources are created |
| autoscaling.maxReplica | int | `3` | Max number of pod replica autoscaled |
| autoscaling.minReplica | int | `1` | Min number of pod replica autoscaled |
| autoscaling.targetCPUUtilizationPercentage | int | `50` | Target CPU utilization percentage for autoscaling |
| autoscaling.targetMemoryAverageValue | string | `"100Mi"` | Target memory average value for autoscaling |
| config.auth.enabled | bool | `false` | Egress HTTP basic auth -> add `Authentication` header to outgoing requests |
| config.auth.existingSecret | string | `nil` | Secret should pass the `CT_AUTH_EGRESS_USERNAME` and `CT_AUTH_EGRESS_PASSWORD` env variables |
| config.auth.password | string | `nil` | Password (env: `CT_AUTH_EGRESS_PASSWORD`) |
| config.auth.username | string | `nil` | Username (env: `CT_AUTH_EGRESS_USERNAME`) |
| config.concurrency | int | `1000` | Max number of parallel incoming HTTP requests to handle (env: `CT_CONCURRENCY`) |
| config.enable_ipv6 | bool | `false` | Whether to enable querying for IPv6 records (env: `CT_ENABLE_IPV6`) |
| config.listen | string | `"0.0.0.0:8080"` | Where to listen for incoming write requests from Prometheus (env: `CT_LISTEN`) |
| config.listen_pprof | string | `"0.0.0.0:7008"` | Profiling API, leave empty to disable (env: `CT_LISTEN_PPROF`) |
| config.log_level | string | `"warn"` | Log level (env: `CT_LOG_LEVEL`) |
| config.log_response_errors | bool | `true` | If true response codes from metrics backend will be logged to stdout. This setting can be used to suppress errors which can be quite verbose like 400 code - out-of-order samples or 429 on hitting ingestion limits Also, those are already reported by other services like Cortex/Mimir distributors and ingesters (env: `CT_LOG_RESPONSE_ERRORS`) |
| config.max_connection_duration | string | `"0s"` | Maximum duration to keep outgoing connections alive (to Cortex/Mimir) Useful for resetting L4 load-balancer state Use 0 to keep them indefinitely (env: `CT_MAX_CONN_DURATION`) |
| config.metadata | bool | `false` | Whether to forward metrics metadata from Prometheus to Cortex Since metadata requests have no timeseries in them - we cannot divide them into tenants So the metadata requests will be sent to the default tenant only, if one is not defined - they will be dropped (env: `CT_METADATA`) |
| config.target | string | `"http://cortex-distributor.cortex.svc:8080/api/v1/push"` | Where to send the modified requests (Cortex) (env: `CT_TARGET`) |
| config.tenant.accept_all | bool | `false` | Enable if you want all metrics from Prometheus to be accepted with a 204 HTTP code regardless of the response from Cortex. This can lose metrics if Cortex is throwing rejections. (env: `CT_TENANT_ACCEPT_ALL`) |
| config.tenant.default | string | `"cortex-tenant-default"` | Which tenant ID to use if the label is missing in any of the timeseries If this is not set or empty then the write request with missing tenant label will be rejected with HTTP code 400 (env: `CT_TENANT_DEFAULT`) |
| config.tenant.header | string | `"X-Scope-OrgID"` | To which header to add the tenant ID (env: `CT_TENANT_HEADER`) |
| config.tenant.label | string | `"tenant"` | Which label to look for the tenant information (env: `CT_TENANT_LABEL`) |
| config.tenant.label_remove | bool | `false` | Whether to remove the tenant label from the request (env: `CT_TENANT_LABEL_REMOVE`) |
| config.tenant.prefix | string | `""` | Optional hard-coded prefix with delimeter for all tenant values. Delimeters allowed for use: https://grafana.com/docs/mimir/latest/configure/about-tenant-ids/ (env: `CT_TENANT_PREFIX`) |
| config.timeout | string | `"10s"` | HTTP request timeout (env: `CT_TIMEOUT`) |
| config.timeout_shutdown | string | `"10s"` | Timeout to wait on shutdown to allow load balancers detect that we're going away. During this period after the shutdown command the /alive endpoint will reply with HTTP 503. Set to 0s to disable. (env: `CT_TIMEOUT_SHUTDOWN`) |
| envs | list | `[]` | Additional environment variables |
| fullnameOverride | string | `nil` | Application fullname override |
| image.pullPolicy | string | `"IfNotPresent"` | Policy when pulling images |
| image.repository | string | `"ghcr.io/blind-oracle/cortex-tenant"` | Repository to pull the image |
| image.tag | string | `""` | Overrides the image tag (default is `.Chart.appVersion`) |
| nameOverride | string | `nil` | Application name override |
| nodeSelector | object | `{}` | [Node Selection](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) |
| podAnnotations | object | `{}` | Annotations for pods |
| podDisruptionBudget.enabled | bool | `true` | If enabled, PodDisruptionBudget resources are created |
| podDisruptionBudget.minAvailable | int | `1` | Minimum number of pods that must remain scheduled |
| podSecurityContext | object | `{}` | [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context) |
| podTopologySpreadConstraints | list | `[]` | [Pod Topology Spread Constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/) |
| resources.limits | object | `{"memory":"256Mi"}` | Resources limits |
| resources.requests | object | `{"cpu":"100m","memory":"128Mi"}` | Resources requests |
| securityContext | object | `{}` | [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context) |
| service.port | int | `8080` | The port on which the service listens for traffic |
| service.targetPort | int | `8080` | |
| service.type | string | `"ClusterIP"` | |
| service.type | string | `"ClusterIP"` | The type of service |
| serviceMonitor.annotations | object | `{}` | ServiceMonitor annotations |
| serviceMonitor.enabled | bool | `false` | If enabled, ServiceMonitor resources for Prometheus Operator are created |
| serviceMonitor.interval | string | `nil` | ServiceMonitor scrape interval |
Expand All @@ -65,5 +66,9 @@ A Helm Chart for cortex-tenant
| serviceMonitor.scheme | string | `"http"` | ServiceMonitor will use http by default, but you can pick https as well |
| serviceMonitor.scrapeTimeout | string | `nil` | ServiceMonitor scrape timeout in Go duration format (e.g. 15s) |
| serviceMonitor.targetLabels | list | `[]` | ServiceMonitor will add labels from the service to the Prometheus metric https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitorspec |
| serviceMonitor.targetPort | int | `9090` | |
| serviceMonitor.tlsConfig | string | `nil` | ServiceMonitor will use these tlsConfig settings to make the health check requests |
| tolerations | list | `[]` | |
| tolerations | list | `[]` | [Taints and Tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) |

----------------------------------------------
Autogenerated from chart metadata using [helm-docs v1.11.2](https://github.com/norwoodj/helm-docs/releases/v1.11.2)
10 changes: 9 additions & 1 deletion deploy/k8s/chart/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ metadata:
labels:
{{- include "cortex-tenant.labels" . | nindent 4 }}
name: {{ include "cortex-tenant.fullname" . }}
{{- with .Values.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
selector:
matchLabels:
Expand All @@ -12,7 +16,7 @@ spec:
metadata:
annotations:
{{- with .Values.podAnnotations }}
{{- toYaml .Values.podAnnotations | nindent 8 }}
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "cortex-tenant.selectorLabels" . | nindent 8 }}
Expand Down Expand Up @@ -67,3 +71,7 @@ spec:
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.podTopologySpreadConstraints }}
topologySpreadConstraints:
{{- toYaml . | nindent 8 }}
{{- end }}
143 changes: 89 additions & 54 deletions deploy/k8s/chart/values.yaml
Original file line number Diff line number Diff line change
@@ -1,128 +1,163 @@
# -- Application name override
nameOverride:
# -- Application fullname override
fullnameOverride:

image:
repository: ghcr.io/blind-oracle/cortex-tenant # registry to pull
pullPolicy: IfNotPresent # policy when pulling images
tag: "" # Overrides the image tag (default is `.Chart.appVersion`)
# -- Repository to pull the image
repository: ghcr.io/blind-oracle/cortex-tenant
# -- Policy when pulling images
pullPolicy: IfNotPresent
# -- Overrides the image tag (default is `.Chart.appVersion`)
tag: ""

service:
# -- The type of service
type: ClusterIP
# -- The port on which the service listens for traffic
port: 8080
# The target port to which traffic is forwarded
targetPort: 8080

autoscaling:
enabled: true # If HorizontalPodAutoscaler must be enabled
minReplica: 1 # Min number of pod replica autoscaled
maxReplica: 3 # Max number of pod replica autoscaled
# -- If enabled, HorizontalPodAutoscaler resources are created
enabled: true
# -- Min number of pod replica autoscaled
minReplica: 1
# -- Max number of pod replica autoscaled
maxReplica: 3
# -- Target memory average value for autoscaling
targetMemoryAverageValue: 100Mi
# -- Target CPU utilization percentage for autoscaling
targetCPUUtilizationPercentage: 50

# -- Additional environment variables
envs:
[]
# - name: CT_LISTEN
# value:

config:
# Where to listen for incoming write requests from Prometheus
# env: CT_LISTEN
# -- Where to listen for incoming write requests from Prometheus
# (env: `CT_LISTEN`)
listen: 0.0.0.0:8080
# Profiling API, leave empty to disable
# env: CT_LISTEN_PPROF
# -- Profiling API, leave empty to disable
# (env: `CT_LISTEN_PPROF`)
listen_pprof: 0.0.0.0:7008
# Where to send the modified requests (Cortex)
# env: CT_TARGET
# -- Where to send the modified requests (Cortex)
# (env: `CT_TARGET`)
target: http://cortex-distributor.cortex.svc:8080/api/v1/push
# Whether to enable querying for IPv6 records
# env: CT_ENABLE_IPV6
# -- Whether to enable querying for IPv6 records
# (env: `CT_ENABLE_IPV6`)
enable_ipv6: false
# Log level
# env: CT_LOG_LEVEL
# -- Log level
# (env: `CT_LOG_LEVEL`)
log_level: warn
# HTTP request timeout
# env: CT_TIMEOUT
# -- HTTP request timeout
# (env: `CT_TIMEOUT`)
timeout: 10s
# Timeout to wait on shutdown to allow load balancers detect that we're going away.
# -- Timeout to wait on shutdown to allow load balancers detect that we're going away.
# During this period after the shutdown command the /alive endpoint will reply with HTTP 503.
# Set to 0s to disable.
# env: CT_TIMEOUT_SHUTDOWN
# (env: `CT_TIMEOUT_SHUTDOWN`)
timeout_shutdown: 10s
# Max number of parallel incoming HTTP requests to handle
# env: CT_CONCURRENCY
# -- Max number of parallel incoming HTTP requests to handle
# (env: `CT_CONCURRENCY`)
concurrency: 1000
# Whether to forward metrics metadata from Prometheus to Cortex
# -- Whether to forward metrics metadata from Prometheus to Cortex
# Since metadata requests have no timeseries in them - we cannot divide them into tenants
# So the metadata requests will be sent to the default tenant only, if one is not defined - they will be dropped
# env: CT_METADATA
# (env: `CT_METADATA`)
metadata: false
# If true response codes from metrics backend will be logged to stdout. This setting can be used to suppress errors
# -- If true response codes from metrics backend will be logged to stdout. This setting can be used to suppress errors
# which can be quite verbose like 400 code - out-of-order samples or 429 on hitting ingestion limits
# Also, those are already reported by other services like Cortex/Mimir distributors and ingesters
# env: CT_LOG_RESPONSE_ERRORS
# (env: `CT_LOG_RESPONSE_ERRORS`)
log_response_errors: true
# Maximum duration to keep outgoing connections alive (to Cortex/Mimir)
# -- Maximum duration to keep outgoing connections alive (to Cortex/Mimir)
# Useful for resetting L4 load-balancer state
# Use 0 to keep them indefinitely
# env: CT_MAX_CONN_DURATION
# (env: `CT_MAX_CONN_DURATION`)
max_connection_duration: 0s

# Authentication (optional)
auth:
# Egress HTTP basic auth -> add `Authentication` header to outgoing requests
# -- Egress HTTP basic auth -> add `Authentication` header to outgoing requests
enabled: false
# env: CT_AUTH_EGRESS_USERNAME
# env: CT_AUTH_EGRESS_PASSWORD
# -- Username
# (env: `CT_AUTH_EGRESS_USERNAME`)
username:
# -- Password
# (env: `CT_AUTH_EGRESS_PASSWORD`)
password:
# Secret should pass the CT_AUTH_EGRESS_USERNAME and CT_AUTH_EGRESS_PASSWORD env variables
# -- Secret should pass the `CT_AUTH_EGRESS_USERNAME` and `CT_AUTH_EGRESS_PASSWORD` env variables
existingSecret:

tenant:
# Which label to look for the tenant information
# env: CT_TENANT_LABEL
# -- Which label to look for the tenant information
# (env: `CT_TENANT_LABEL`)
label: tenant
# Optional hard-coded prefix with delimeter for all tenant values.
# -- Optional hard-coded prefix with delimeter for all tenant values.
# Delimeters allowed for use:
# https://grafana.com/docs/mimir/latest/configure/about-tenant-ids/
# env: CT_TENANT_PREFIX
# (env: `CT_TENANT_PREFIX`)
prefix: ""
# Whether to remove the tenant label from the request
# env: CT_TENANT_LABEL_REMOVE
# -- Whether to remove the tenant label from the request
# (env: `CT_TENANT_LABEL_REMOVE`)
label_remove: false
# To which header to add the tenant ID
# env: CT_TENANT_HEADER
# -- To which header to add the tenant ID
# (env: `CT_TENANT_HEADER`)
header: X-Scope-OrgID
# Which tenant ID to use if the label is missing in any of the timeseries
# -- Which tenant ID to use if the label is missing in any of the timeseries
# If this is not set or empty then the write request with missing tenant label
# will be rejected with HTTP code 400
# env: CT_TENANT_DEFAULT
# (env: `CT_TENANT_DEFAULT`)
default: cortex-tenant-default
# Enable if you want all metrics from Prometheus to be accepted with a 204 HTTP code
# -- Enable if you want all metrics from Prometheus to be accepted with a 204 HTTP code
# regardless of the response from Cortex. This can lose metrics if Cortex is
# throwing rejections.
# env: CT_TENANT_ACCEPT_ALL
# (env: `CT_TENANT_ACCEPT_ALL`)
accept_all: false

resources: # Resource limits and requests for simu
resources:
# -- Resources limits
limits:
# cpu: 100m
memory: 256Mi
# -- Resources requests
requests:
cpu: 100m
memory: 128Mi

podDisruptionBudget:
enabled: true # If Pod disruption must be enabled
minAvailable: 1 # Number of min pods that must remain available
# -- If enabled, PodDisruptionBudget resources are created
enabled: true
# -- Minimum number of pods that must remain scheduled
minAvailable: 1

podAnnotations: {} # Annotations for pods
# -- Annotations for deployment
annotations: {}

podSecurityContext: {} # [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context)
# -- Annotations for pods
podAnnotations: {}

securityContext: {} # [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context)
# -- [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context)
podSecurityContext: {}

nodeSelector: {} # [Node Selection](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node)
# -- [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context)
securityContext: {}

tolerations: [] # [Taints and Tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/)
# -- [Node Selection](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node)
nodeSelector: {}

affinity: {} # [Node Selection](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node)
# -- [Taints and Tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/)
tolerations: []

# -- [Affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity)
affinity: {}

# -- [Pod Topology Spread Constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/)
podTopologySpreadConstraints: []

# ServiceMonitor configuration
serviceMonitor:
Expand Down Expand Up @@ -163,4 +198,4 @@ serviceMonitor:
enabled: false
additionalLabels: {}
# namespace:
rules: []
rules: []

0 comments on commit 10d6c2d

Please sign in to comment.