diff --git a/kubernetes/ansible/roles/sunbird-monitoring/defaults/main.yml b/kubernetes/ansible/roles/sunbird-monitoring/defaults/main.yml index 07d882f7c4..b2544e7739 100644 --- a/kubernetes/ansible/roles/sunbird-monitoring/defaults/main.yml +++ b/kubernetes/ansible/roles/sunbird-monitoring/defaults/main.yml @@ -157,8 +157,12 @@ kafka_graph_events_backup_threshold: 500 kafka_telemetry_raw_backup_threshold: 10000 kafka_telemetry_unique_backup_threshold: 10000 kafka_learning_failed_events_backup: 10000 -kafka_telemetry_denorm_backup: 10000 +kafka_telemetry_denorm_backup_threshold: 10000 kafka_telemetry_pipeline_metrics_backup: 1000 -kafka_telemetry_extractor_failed_backup: 1000 +kafka_telemetry_extractor_failed_backup_threshold: 1000 kafka_telemetry_assess_backup: 1000 +kafka_telemetry_assess_raw_backup_threshold: 1000 +kafka_druid_events_summary_backup_threshold: 10000 +kafka_telemetry_extractor_duplicate_backup_threshold: 1000 +kafka_telemetry_duplicate_backup_threshold: 1000 diff --git a/kubernetes/ansible/roles/sunbird-monitoring/templates/alertrules.yaml b/kubernetes/ansible/roles/sunbird-monitoring/templates/alertrules.yaml index 9c349bfa72..27b076a7e8 100644 --- a/kubernetes/ansible/roles/sunbird-monitoring/templates/alertrules.yaml +++ b/kubernetes/ansible/roles/sunbird-monitoring/templates/alertrules.yaml @@ -13,7 +13,11 @@ kafka_graph_events_backup_threshold: "{{ kafka_graph_events_backup_threshold }}" kafka_telemetry_raw_backup_threshold: "{{ kafka_telemetry_raw_backup_threshold }}" kafka_telemetry_unique_backup_threshold: "{{ kafka_telemetry_unique_backup_threshold }}" kafka_learning_failed_events_backup: "{{ kafka_learning_failed_events_backup }}" -kafka_telemetry_denorm_backup: "{{ kafka_telemetry_denorm_backup }}" +kafka_telemetry_denorm_backup_threshold: "{{ kafka_telemetry_denorm_backup_threshold }}" kafka_telemetry_pipeline_metrics_backup: "{{ kafka_telemetry_pipeline_metrics_backup }}" -kafka_telemetry_extractor_failed_backup: "{{ kafka_telemetry_extractor_failed_backup }}" +kafka_telemetry_extractor_failed_backup_threshold: "{{ kafka_telemetry_extractor_failed_backup_threshold }}" kafka_telemetry_assess_backup: "{{ kafka_telemetry_assess_backup }}" +kafka_telemetry_assess_raw_backup_threshold: "{{ kafka_telemetry_assess_raw_backup_threshold }}" +kafka_druid_events_summary_backup_threshold: "{{ kafka_druid_events_summary_backup_threshold }}" +kafka_telemetry_extractor_duplicate_backup_threshold: "{{ kafka_telemetry_extractor_duplicate_backup_threshold }}" +kafka_telemetry_duplicate_backup_threshold: "{{ kafka_telemetry_duplicate_backup_threshold }}" diff --git a/kubernetes/ansible/roles/sunbird-monitoring/templates/kafka-exporter.yaml b/kubernetes/ansible/roles/sunbird-monitoring/templates/kafka-exporter.yaml new file mode 100644 index 0000000000..864be511d7 --- /dev/null +++ b/kubernetes/ansible/roles/sunbird-monitoring/templates/kafka-exporter.yaml @@ -0,0 +1,17 @@ +kafkaExporter: + zookeeper: + servers: ["{{ groups['processing-cluster-zookeepers'] | difference(["localhost"]) | map('regex_replace', '^(.*)$', '\\1:2181') | list | join("\", \"") }}"] + kafka: + servers: ["{{ groups['processing-cluster-kafka'] | difference(["localhost"]) | map('regex_replace', '^(.*)$', '\\1:9092') | list | join("\", \"") }}"] + additionalFlags: + - --use.consumelag.zookeeper + +prometheus: + serviceMonitor: + enabled: true + namespace: monitoring + interval: "120s" + scrapeTimeout: "90s" + additionalLabels: + app: prometheus-operator + release: prometheus-operator diff --git a/kubernetes/helm_charts/monitoring/alertrules/templates/promrulesKafkaLag.yml b/kubernetes/helm_charts/monitoring/alertrules/templates/promrulesKafkaLag.yml index e04bc27add..9ebc7c6c83 100644 --- a/kubernetes/helm_charts/monitoring/alertrules/templates/promrulesKafkaLag.yml +++ b/kubernetes/helm_charts/monitoring/alertrules/templates/promrulesKafkaLag.yml @@ -94,7 +94,7 @@ spec: summary: secor consumer group lag is more for {{ .Values.kafka_topic_prefix }}.learning.failed.events.backup - alert: secor {{ .Values.kafka_topic_prefix }}.telemetry.denorm.backup consumer group lag - expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.telemetry.denorm.backup"} > {{ .Values.kafka_telemetry_denorm_backup }} + expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.telemetry.denorm.backup"} > {{ .Values.kafka_telemetry_denorm_backup_threshold }} for: 5m labels: severity: critical @@ -112,7 +112,7 @@ spec: summary: secor consumer group lag is more for {{ .Values.kafka_topic_prefix }}.pipeline_metrics - alert: secor {{ .Values.kafka_topic_prefix }}.telemetry.extractor.failed consumer group lag - expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.telemetry.extractor.failed"} > {{ .Values.kafka_telemetry_extractor_failed_backup }} + expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.telemetry.extractor.failed"} > {{ .Values.kafka_telemetry_extractor_failed_backup_threshold }} for: 5m labels: severity: critical diff --git a/kubernetes/helm_charts/monitoring/alertrules/templates/promrulesProcessingKafkaLag.yml b/kubernetes/helm_charts/monitoring/alertrules/templates/promrulesProcessingKafkaLag.yml new file mode 100644 index 0000000000..ea99aeb73b --- /dev/null +++ b/kubernetes/helm_charts/monitoring/alertrules/templates/promrulesProcessingKafkaLag.yml @@ -0,0 +1,103 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + role: alert-rules + app: {{ .Values.prometheus_rule_selector_app }} + release: {{ .Values.prometheus_rule_selector_release }} + name: {{ .Values.fullnameOverride }}-kafkalag-rules + namespace: {{ .Values.namespace }} +spec: + groups: + - name: alertrules.kafkalag + rules: + - alert: secor {{ .Values.kafka_topic_prefix }}.telemetry.assess.raw group lag + expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.telemetry.assess.raw", job="processing-kafka-exporter"} > {{ .Values.kafka_telemetry_assess_raw_backup_threshold }} + for: 5m + labels: + severity: critical + annotations: + message: {{`"`}}{{ .Values.kafka_topic_prefix }}{{`.telemetry.assess.raw consumer group lag is {{$value}} for partition: {{ $labels.partition }}"`}} + summary: secor consumer group lag is more for {{ .Values.kafka_topic_prefix }}.telemetry.assess.raw + + - alert: secor {{ .Values.kafka_topic_prefix }}.telemetry.denorm.backup group lag + expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.telemetry.denorm.backup", job="processing-kafka-exporter"} > {{ .Values.kafka_telemetry_denorm_backup_threshold }} + for: 5m + labels: + severity: critical + annotations: + message: {{`"`}}{{ .Values.kafka_topic_prefix }}{{`.telemetry.denorm.backup consumer group lag is {{$value}} for partition: {{ $labels.partition }}"`}} + summary: secor consumer group lag is more for {{ .Values.kafka_topic_prefix }}.telemetry.denorm.backup + + - alert: secor {{ .Values.kafka_topic_prefix }}.telemetry.derived.unique.backup consumer group lag + expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.telemetry.derived.unique.backup", job="processing-kafka-exporter"} > {{ .Values.kafka_telemetry_derived_backup_threshold }} + for: 5m + labels: + severity: critical + annotations: + message: {{`"`}}{{ .Values.kafka_topic_prefix }}{{`.telemetry.derived.unique.backup consumer group lag is {{$value}} for partition: {{ $labels.partition }}"`}} + summary: secor consumer group lag is more for {{ .Values.kafka_topic_prefix }}.telemetry.derived.unique.backup + + - alert: secor {{ .Values.kafka_topic_prefix }}.druid.events.summary consumer group lag + expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.druid.events.summary", job="processing-kafka-exporter"} > {{ .Values.kafka_druid_events_summary_backup_threshold }} + for: 5m + labels: + severity: critical + annotations: + message: {{`"`}}{{ .Values.kafka_topic_prefix }}{{`.druid.events.summary consumer group lag is {{$value}} for partition: {{ $labels.partition }}"`}} + summary: secor consumer group lag is more for {{ .Values.kafka_topic_prefix }}.druid.events.summary + + - alert: secor {{ .Values.kafka_topic_prefix }}.extractor.duplicate.backup consumer group lag + expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.extractor.duplicate.backup", job="processing-kafka-exporter"} > {{ .Values.kafka_telemetry_extractor_duplicate_backup_threshold }} + for: 5m + labels: + severity: critical + annotations: + message: {{`"`}}{{ .Values.kafka_topic_prefix }}{{`.extractor.duplicate.backup consumer group lag is {{$value}} for partition: {{ $labels.partition }}"`}} + summary: secor consumer group lag is more for {{ .Values.kafka_topic_prefix }}.extractor.duplicate.backup + + - alert: secor {{ .Values.kafka_topic_prefix }}.extractor.failed.backup consumer group lag + expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.extractor.failed.backup", job="processing-kafka-exporter"} > {{ .Values.kafka_telemetry_extractor_failed_backup_threshold }} + for: 5m + labels: + severity: critical + annotations: + message: {{`"`}}{{ .Values.kafka_topic_prefix }}{{`.extractor.failed.backup consumer group lag is {{$value}} for partition: {{ $labels.partition }}"`}} + summary: secor consumer group lag is more for {{ .Values.kafka_topic_prefix }}.extractor.failed.backup + + - alert: secor {{ .Values.kafka_topic_prefix }}.telemetry.failed.backup consumer group lag + expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.telemetry.failed.backup", job="processing-kafka-exporter"} > {{ .Values.kafka_telemetry_failed_backup_threshold }} + for: 5m + labels: + severity: critical + annotations: + message: {{`"`}}{{ .Values.kafka_topic_prefix }}{{`.telemetry.failed.backup consumer group lag is {{$value}} for partition: {{ $labels.partition }}"`}} + summary: secor consumer group lag is more for {{ .Values.kafka_topic_prefix }}.telemetry.failed.backup + + - alert: secor {{ .Values.kafka_topic_prefix }}.telemetry.raw.backup consumer group lag + expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.telemetry.raw.backup", job="processing-kafka-exporter"} > {{ .Values.kafka_telemetry_raw_backup_threshold }} + for: 5m + labels: + severity: critical + annotations: + message: {{`"`}}{{ .Values.kafka_topic_prefix }}{{`.telemetry.raw.backup consumer group lag is {{$value}} for partition: {{ $labels.partition }}"`}} + summary: secor consumer group lag is more for {{ .Values.kafka_topic_prefix }}.telemetry.raw.backup + + - alert: secor {{ .Values.kafka_topic_prefix }}.telemetry.duplicate.backup consumer group lag + expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.telemetry.duplicate.backup", job="processing-kafka-exporter"} > {{ .Values.kafka_telemetry_duplicate_backup_threshold }} + for: 5m + labels: + severity: critical + annotations: + message: {{`"`}}{{ .Values.kafka_topic_prefix }}{{`.telemetry.duplicate.backup consumer group lag is {{$value}} for partition: {{ $labels.partition }}"`}} + summary: secor consumer group lag is more for {{ .Values.kafka_topic_prefix }}.telemetry.duplicate.backup + + - alert: secor {{ .Values.kafka_topic_prefix }}.telemetry.unique.backup consumer group lag + expr: kafka_consumergroupzookeeper_lag_zookeeper{consumergroup="{{ .Values.kafka_topic_prefix }}.telemetry.unique.backup", job="processing-kafka-exporter"} > {{ .Values.kafka_telemetry_unique_backup_threshold }} + for: 5m + labels: + severity: critical + annotations: + message: {{`"`}}{{ .Values.kafka_topic_prefix }}{{`.telemetry.unique.backup consumer group lag is {{$value}} for partition: {{ $labels.partition }}"`}} + summary: secor consumer group lag is more for {{ .Values.kafka_topic_prefix }}.telemetry.unique.backup diff --git a/kubernetes/helm_charts/monitoring/alertrules/values.yaml b/kubernetes/helm_charts/monitoring/alertrules/values.yaml index a1b0a32076..67d9576a8d 100644 --- a/kubernetes/helm_charts/monitoring/alertrules/values.yaml +++ b/kubernetes/helm_charts/monitoring/alertrules/values.yaml @@ -20,10 +20,14 @@ kafka_graph_events_backup_threshold: 500 kafka_telemetry_raw_backup_threshold: 10000 kafka_telemetry_unique_backup_threshold: 10000 kafka_learning_failed_events_backup: 10000 -kafka_telemetry_denorm_backup: 10000 +kafka_telemetry_denorm_backup_threshold: 10000 kafka_telemetry_pipeline_metrics_backup: 1000 -kafka_telemetry_extractor_failed_backup: 1000 +kafka_telemetry_extractor_failed_backup_threshold: 1000 kafka_telemetry_assess_backup: 1000 +kafka_telemetry_assess_raw_backup_threshold: 1000 +kafka_druid_events_summary_backup_threshold: 10000 +kafka_telemetry_extractor_duplicate_backup_threshold: 1000 +kafka_telemetry_duplicate_backup_threshold: 1000 # Node Exporter vars node_cpu_usage_percentage_threshold_Warning: 75 diff --git a/kubernetes/helm_charts/monitoring/processing-kafka-exporter/.helmignore b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/.helmignore new file mode 100644 index 0000000000..50af031725 --- /dev/null +++ b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/kubernetes/helm_charts/monitoring/processing-kafka-exporter/Chart.yaml b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/Chart.yaml new file mode 100644 index 0000000000..3a5191414d --- /dev/null +++ b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +appVersion: "1.0" +description: A Helm chart for Kubernetes +name: processing-kafka-exporter +version: 1.0.0 +home: https://github.com/abhishekjiitr/kafka-exporter-helm +maintainers: + - name: abhishekjiitr + email: abhi2254015@gmail.com diff --git a/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/NOTES.txt b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/NOTES.txt new file mode 100644 index 0000000000..d9eb9e809b --- /dev/null +++ b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/NOTES.txt @@ -0,0 +1,5 @@ +1.To see the metrics +{{- if contains "ClusterIP" .Values.service.type }} + kubectl port-forward svc/{{ include "kafka-exporter.fullname" . }} {{ .Values.service.port }} + echo "Visit http://127.0.0.1:{{ .Values.service.port }} to use your application" +{{- end }} diff --git a/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/_helpers.tpl b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/_helpers.tpl new file mode 100644 index 0000000000..bc51bbfcd5 --- /dev/null +++ b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/_helpers.tpl @@ -0,0 +1,32 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "kafka-exporter.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "kafka-exporter.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "kafka-exporter.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} diff --git a/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/alertRules.yaml b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/alertRules.yaml new file mode 100644 index 0000000000..0523aaa03e --- /dev/null +++ b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/alertRules.yaml @@ -0,0 +1,49 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ include "kafka-exporter.fullname" . }} + {{- if .Values.prometheus.serviceMonitor.namespace }} + namespace: {{ .Values.prometheus.serviceMonitor.namespace }} + {{- end }} + labels: + app.kubernetes.io/name: {{ include "kafka-exporter.name" . }} + helm.sh/chart: {{ include "kafka-exporter.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- if .Values.prometheus.serviceMonitor.additionalLabels }} +{{ toYaml .Values.prometheus.serviceMonitor.additionalLabels | indent 4 -}} + {{- end }} +spec: + groups: + - name: alertrules.kafkaExporter + rules: + - record: isr_partition_difference + expr: kafka_topic_partition_in_sync_replica{topic!="__consumer_offsets",topic!="__samza_*"} - kafka_topic_partition_replicas{topic!="__consumer_offsets",topic!="__samza_*"} + + - alert: ISR_not_equal_to_partition + annotations: + message: {{`"ISR and replication difference for $labels.topic is > 0 current value: {{ humanize $value }}"`}} + summary: {{`"ISR and Replica Mismatch for {{$labels.topic}}"`}} + expr: isr_partition_difference != 0 + for: 5m + labels: + severity: critical + + - alert: Kafka_partition_leader_change_rapidly + annotations: + message: {{`"Kafaka Partition leader changing rapidly for {{ $labels.topic }}"`}} + summary: {{`"Kafaka Partition leader changing rapidly for {{ $labels.topic }}"`}} + expr: rate(kafka_topic_partition_leader{topic!="__consumer_offsets",topic!="__samza_*"}[5m]) > 0 + for: 5m + labels: + severity: critical + + - alert: kafka_broker_unavailable + annotations: + summary: {{`"Kafka brokers unavailable"`}} + message: "There are only {{`{{humanize $value}}`}} kafka brokers available; Expected count: {{len .Values.kafkaExporter.kafka.servers}}" + expr: kafka_brokers < {{ len .Values.kafkaExporter.kafka.servers }} + for: 5m + labels: + severity: critical diff --git a/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/deployment.yaml b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/deployment.yaml new file mode 100644 index 0000000000..d79b57b019 --- /dev/null +++ b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/deployment.yaml @@ -0,0 +1,91 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "kafka-exporter.fullname" . }} + labels: + app.kubernetes.io/name: {{ include "kafka-exporter.name" . }} + helm.sh/chart: {{ include "kafka-exporter.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + app.kubernetes.io/name: {{ include "kafka-exporter.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + template: + metadata: + labels: + app.kubernetes.io/name: {{ include "kafka-exporter.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + spec: + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + {{- if .Values.kafkaExporter}} + {{- range .Values.kafkaExporter.kafka.servers }} + - "--kafka.server={{ . }}" + {{- end }} + {{- range .Values.kafkaExporter.zookeeper.servers }} + - "--zookeeper.server={{ . }}" + {{- end }} + {{- range .Values.kafkaExporter.additionalFlags }} + - "{{ . }}" + {{- end }} + {{- if .Values.kafkaExporter.kafka.version }} + - --kafka.version={{ .Values.kafkaExporter.kafka.version }} + {{- end }} + {{- end}} + {{- if .Values.kafkaExporter.sasl.enabled }} + - --sasl.enabled + {{- if not .Values.kafkaExporter.sasl.handshake }} + - --sasl.handshake=false + {{- end }} + - --sasl.username={{ .Values.kafkaExporter.sasl.username }} + - --sasl.password={{ .Values.kafkaExporter.sasl.password }} + {{- end }} + {{- if .Values.kafkaExporter.tls.enabled}} + - --tls.enabled + - --tls.ca-file=/etc/tls-certs/ca-file + - --tls.cert-file=/etc/tls-certs/cert-file + - --tls.key-file=/etc/tls-certs/key-file + {{- end }} + {{- if .Values.kafkaExporter.log }} + - --log.level={{ .Values.kafkaExporter.log.level }} + {{- if .Values.kafkaExporter.log.enableSarama }} + - --log.enable-sarama + {{- end }} + {{- end }} + ports: + - name: metrics + containerPort: 9308 + protocol: TCP + {{- if .Values.kafkaExporter.tls.enabled }} + volumeMounts: + - name: tls-certs + mountPath: "/etc/tls-certs/" + readOnly: true + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.nodeSelector }} + + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.kafkaExporter.tls.enabled }} + volumes: + - name: tls-certs + secret: + secretName: {{ include "kafka-exporter.fullname" . }} + {{- end }} diff --git a/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/secret.yaml b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/secret.yaml new file mode 100644 index 0000000000..82f567f38d --- /dev/null +++ b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/secret.yaml @@ -0,0 +1,15 @@ +{{- if .Values.kafkaExporter.tls.enabled }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "kafka-exporter.fullname" . }} + labels: + app.kubernetes.io/name: {{ include "kafka-exporter.name" . }} + helm.sh/chart: {{ include "kafka-exporter.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +data: + ca-file: {{ .Values.kafkaExporter.tls.caFile | b64enc }} + cert-file: {{ .Values.kafkaExporter.tls.certFile | b64enc }} + key-file: {{ .Values.kafkaExporter.tls.keyFile | b64enc }} +{{- end }} diff --git a/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/service.yaml b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/service.yaml new file mode 100644 index 0000000000..049041fb03 --- /dev/null +++ b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "kafka-exporter.fullname" . }} + labels: + app.kubernetes.io/name: {{ include "kafka-exporter.name" . }} + helm.sh/chart: {{ include "kafka-exporter.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: metrics + protocol: TCP + name: metrics + selector: + app.kubernetes.io/name: {{ include "kafka-exporter.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} diff --git a/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/servicemonitor.yaml b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/servicemonitor.yaml new file mode 100644 index 0000000000..395d617dd3 --- /dev/null +++ b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/templates/servicemonitor.yaml @@ -0,0 +1,33 @@ +{{- if .Values.prometheus.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "kafka-exporter.fullname" . }} + {{- if .Values.prometheus.serviceMonitor.namespace }} + namespace: {{ .Values.prometheus.serviceMonitor.namespace }} + {{- end }} + labels: + app.kubernetes.io/name: {{ include "kafka-exporter.name" . }} + helm.sh/chart: {{ include "kafka-exporter.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + {{- if .Values.prometheus.serviceMonitor.additionalLabels }} +{{ toYaml .Values.prometheus.serviceMonitor.additionalLabels | indent 4 -}} + {{- end }} +spec: + jobLabel: jobLabel + selector: + matchLabels: + app.kubernetes.io/name: {{ include "kafka-exporter.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + helm.sh/chart: {{ include "kafka-exporter.chart" . }} + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} + endpoints: + - port: metrics + interval: {{ .Values.prometheus.serviceMonitor.interval }} + {{- if .Values.prometheus.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.prometheus.serviceMonitor.scrapeTimeout }} + {{- end }} +{{- end }} diff --git a/kubernetes/helm_charts/monitoring/processing-kafka-exporter/values.yaml b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/values.yaml new file mode 100644 index 0000000000..4292f73508 --- /dev/null +++ b/kubernetes/helm_charts/monitoring/processing-kafka-exporter/values.yaml @@ -0,0 +1,69 @@ +# Default values for kafka-exporter. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: danielqsj/kafka-exporter + tag: latest + pullPolicy: IfNotPresent + +nameOverride: "" +fullnameOverride: "" + +service: + type: ClusterIP + port: 9308 + +kafkaExporter: + kafka: + servers: [] + zookeeper: + servers: [] + additionalFlags: [] + # - --use.consumelag.zookeeper + + sasl: + enabled: false + handshake: true + username: "" + password: "" + + tls: + enabled: false + insecure-skip-tls-verify: false + caFile: "" + certFile: "" + keyFile: "" + + log: + level: info + enableSarama: false + +prometheus: + serviceMonitor: + enabled: true + namespace: monitoring + interval: "30s" + additionalLabels: + app: kafka-exporter + + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} + +tolerations: [] + +affinity: {}