diff --git a/.vscode/settings.json b/.vscode/settings.json index 79e2aa83db..32df076eb6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -16,8 +16,10 @@ "editor.guides.bracketPairs":"active", "cSpell.enabled": false, "yaml.schemas": { - "Kubernetes": "k8s/**/*.yaml", - "recyclarr": "https://raw.githubusercontent.com/recyclarr/recyclarr/master/schemas/config-schema.json" + "https://raw.githubusercontent.com/recyclarr/recyclarr/master/schemas/config-schema.json": "recyclarr.yaml", + "https://raw.githubusercontent.com/recyclarr/recyclarr/master/schemas/settings-schema.json": "settings.yaml", + "https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/v1.25.2-standalone/all.json": "k8s/**/*.yaml", + "recyclarr": "https://raw.githubusercontent.com/recyclarr/recyclarr/master/schemas/config-schema.json", }, "material-icon-theme.folders.associations": { ".taskfiles": "utils", @@ -44,12 +46,20 @@ "cluster-betty", "cluster-talos", "cluster-global", + "flux", + "monitoring", + "postgres", "cilium", "kured", "metrics-server", "reloader", "sealed-secrets", - "qbittorrent", + "alertmanager", + "grafana", + "kps", + "loki", + "thanos", + "vector", "cert-manager", "external-dns", "ingress-nginx", @@ -58,7 +68,6 @@ "origin-ca-issuer", "rook-ceph", "qbittorrent", - "flux", "bazarr", "overseer", "prowlarr", @@ -71,8 +80,6 @@ "sonarranime", "tautulli", "nzbget", - "static", - "blackbox", "vpn-gateway", "hajimari", "theme-park" diff --git a/k8s/global/flux/repositories/helm/cloudnative-pg-charts.yaml b/k8s/global/flux/repositories/helm/cloudnative-pg-charts.yaml new file mode 100644 index 0000000000..6cd8394ff2 --- /dev/null +++ b/k8s/global/flux/repositories/helm/cloudnative-pg-charts.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: cloudnative-pg + namespace: flux-system +spec: + interval: 1h + url: https://cloudnative-pg.github.io/charts diff --git a/k8s/global/flux/repositories/helm/grafana-charts.yaml b/k8s/global/flux/repositories/helm/grafana-charts.yaml new file mode 100644 index 0000000000..7b1df329e9 --- /dev/null +++ b/k8s/global/flux/repositories/helm/grafana-charts.yaml @@ -0,0 +1,10 @@ +--- +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: grafana + namespace: flux-system +spec: + interval: 30m + url: https://grafana.github.io/helm-charts + timeout: 3m diff --git a/k8s/global/flux/repositories/helm/kustomization.yaml b/k8s/global/flux/repositories/helm/kustomization.yaml index d8087279e5..1973587cae 100644 --- a/k8s/global/flux/repositories/helm/kustomization.yaml +++ b/k8s/global/flux/repositories/helm/kustomization.yaml @@ -2,16 +2,19 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: + - bitnami-charts.yaml - cilium-charts.yaml + - cloudnative-pg-charts.yaml - external-dns-charts.yaml + - grafana-charts.yaml - ingress-nginx-charts.yaml - jetstack-charts.yaml - k8s-gateway-charts.yaml + - kubereboot-charts.yaml - metallb-charts.yaml - metrics-server-charts.yaml - prometheus-community-charts.yaml - rook-ceph-charts.yaml - - stakater-charts.yaml - sealed-secrets-charts.yaml - - bitnami-charts.yaml - - kubereboot-charts.yaml + - stakater-charts.yaml + - vector-charts.yaml diff --git a/k8s/global/flux/repositories/helm/vector-charts.yaml b/k8s/global/flux/repositories/helm/vector-charts.yaml new file mode 100644 index 0000000000..e5090d6b23 --- /dev/null +++ b/k8s/global/flux/repositories/helm/vector-charts.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: vector + namespace: flux-system +spec: + interval: 1h + url: https://helm.vector.dev diff --git a/k8s/namespaces/clusters/base/databases/kustomization.yaml b/k8s/namespaces/clusters/base/databases/kustomization.yaml new file mode 100644 index 0000000000..809cbe53b4 --- /dev/null +++ b/k8s/namespaces/clusters/base/databases/kustomization.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml diff --git a/k8s/namespaces/clusters/base/databases/namespace.yaml b/k8s/namespaces/clusters/base/databases/namespace.yaml new file mode 100644 index 0000000000..139865c6c8 --- /dev/null +++ b/k8s/namespaces/clusters/base/databases/namespace.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: databases + labels: + kustomize.toolkit.fluxcd.io/prune: disabled diff --git a/k8s/namespaces/clusters/base/databases/postgres/cluster.yaml b/k8s/namespaces/clusters/base/databases/postgres/cluster.yaml new file mode 100644 index 0000000000..17e4e87f0f --- /dev/null +++ b/k8s/namespaces/clusters/base/databases/postgres/cluster.yaml @@ -0,0 +1,16 @@ +--- +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: postgres + namespace: databases +spec: + instances: 3 + primaryUpdateStrategy: unsupervised + storage: + size: 10Gi + storageClass: ceph-block + superuserSecret: + name: postgres-superuser + monitoring: + enablePodMonitor: true diff --git a/k8s/namespaces/clusters/base/databases/postgres/helm-release.yaml b/k8s/namespaces/clusters/base/databases/postgres/helm-release.yaml new file mode 100644 index 0000000000..9d5690b67a --- /dev/null +++ b/k8s/namespaces/clusters/base/databases/postgres/helm-release.yaml @@ -0,0 +1,33 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: postgres + namespace: databases +spec: + interval: 15m + chart: + spec: + chart: cloudnative-pg + version: 0.15.0 + sourceRef: + kind: HelmRepository + name: cloudnative-pg + namespace: flux-system + test: + enable: false + install: + remediation: + retries: 5 + upgrade: + remediation: + retries: 5 + remediateLastFailure: true + cleanupOnFail: true + rollback: + timeout: 10m + recreate: true + cleanupOnFail: true + values: + crds: + create: false diff --git a/k8s/namespaces/clusters/base/databases/postgres/kustomization.yaml b/k8s/namespaces/clusters/base/databases/postgres/kustomization.yaml new file mode 100644 index 0000000000..afc49a4461 --- /dev/null +++ b/k8s/namespaces/clusters/base/databases/postgres/kustomization.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - secret.sops.yaml + - helm-release.yaml + - cluster.yaml +configMapGenerator: + - name: cloudnative-pg-dashboard + files: + - cloudnative-pg-dashboard.json=https://raw.githubusercontent.com/cloudnative-pg/charts/main/charts/cnpg-sandbox/dashboard.json +generatorOptions: + disableNameSuffixHash: true + annotations: + kustomize.toolkit.fluxcd.io/substitute: disabled + labels: + grafana_dashboard: "true" diff --git a/k8s/namespaces/clusters/base/databases/postgres/secret.sops.yaml b/k8s/namespaces/clusters/base/databases/postgres/secret.sops.yaml new file mode 100644 index 0000000000..3f9c185ba0 --- /dev/null +++ b/k8s/namespaces/clusters/base/databases/postgres/secret.sops.yaml @@ -0,0 +1,58 @@ +# yamllint disable +apiVersion: v1 +kind: Secret +metadata: + name: postgres-superuser + namespace: databases +stringData: + username: ENC[AES256_GCM,data:FD8BSiM5fBts4Aw=,iv:Kq6646pYuvWxOv3PWATY3NMnkZhIA9/rMJugal/nDjE=,tag:XxmFZrnvQ/RUyWXQ/C2JQQ==,type:str] + password: ENC[AES256_GCM,data:UuNEPCydjrIMBe0=,iv:FQ/CxQq9Mt0bXkeGsB/7vxsrZ+olSFy/Rlt1T/l1kYE=,tag:+/ebf9ZjQ/npUsRjzE5h6g==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age1eynu35v0tpg9remal6zeecfeg9e84a2qxake027wwgdn02rdfcls7nyv8r + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSA0TENNN3V6MnVxSWlBRmVz + ekNaRjZ3MlJXVk1oZ2pZWExYYVlLM0oveDJnCklpYnVTWUQwZHJHNDR2S3U3MEQ0 + WWg4SGZWK2IrQ1FyeFk5Qmd4clc3L00KLS0tIGlvWTRLMjVjVTFlTlREVm5SRUJG + Z1R3RFV5bER5Q0VBb3RnK0diNXpNcVkK3U21Y5GWvnmPA8hxi8Us7TkNGsCYAvlD + QmY8mT6ApdiczqVo1DgFmKDSMIYNGL2wlyyriu9MLCU8a9tGxmj47g== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2022-10-04T21:03:20Z" + mac: ENC[AES256_GCM,data:iAgS0g0u73yTGjLl4OyQsuoqYYe5CE9Ad1QQ0+HYRiBS7+GEy2A5DhqwlYJuJlVTz1fdAmWq/O/3pJkNyPqOU3o/tIAuyQTwpw4tLqqJ0Dut3LnyjzzMV7lE5cMODmNfx+WhpcjKRAC2iCT50bDskEc6k4FGexfi0fs0wkUPbEw=,iv:zOLY8Xi0f+PY+6EX8vM8gc/HxOw5cRzdSa2OAzqolsM=,tag:oxeflPdBwjfmmM7viFO1FQ==,type:str] + pgp: [] + encrypted_regex: ((?i)(pass|secret($|[^N])|key|token|^data$|^stringData)) + version: 3.7.3 +--- +apiVersion: v1 +kind: Secret +metadata: + name: grafana + namespace: databases +stringData: + GF_DATABASE_USER: ENC[AES256_GCM,data:41KaDmtK5BkuTh0=,iv:dTL9A5UvgvTYvUSmNpBiuPnKRIumFz0uRYCMWRBcdRM=,tag:ZVjBTuvRzQMkEXjhYOfsTw==,type:str] + GF_DATABASE_PASSWORD: ENC[AES256_GCM,data:1lVcej/PghZeSKg=,iv:Sbu7yX8y/pSHucdUetVeiIohcCyrkBox4EPO37GJfBo=,tag:eNixQE4plTppva+hoZJXeQ==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age1eynu35v0tpg9remal6zeecfeg9e84a2qxake027wwgdn02rdfcls7nyv8r + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSA0TENNN3V6MnVxSWlBRmVz + ekNaRjZ3MlJXVk1oZ2pZWExYYVlLM0oveDJnCklpYnVTWUQwZHJHNDR2S3U3MEQ0 + WWg4SGZWK2IrQ1FyeFk5Qmd4clc3L00KLS0tIGlvWTRLMjVjVTFlTlREVm5SRUJG + Z1R3RFV5bER5Q0VBb3RnK0diNXpNcVkK3U21Y5GWvnmPA8hxi8Us7TkNGsCYAvlD + QmY8mT6ApdiczqVo1DgFmKDSMIYNGL2wlyyriu9MLCU8a9tGxmj47g== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2022-10-04T21:03:20Z" + mac: ENC[AES256_GCM,data:iAgS0g0u73yTGjLl4OyQsuoqYYe5CE9Ad1QQ0+HYRiBS7+GEy2A5DhqwlYJuJlVTz1fdAmWq/O/3pJkNyPqOU3o/tIAuyQTwpw4tLqqJ0Dut3LnyjzzMV7lE5cMODmNfx+WhpcjKRAC2iCT50bDskEc6k4FGexfi0fs0wkUPbEw=,iv:zOLY8Xi0f+PY+6EX8vM8gc/HxOw5cRzdSa2OAzqolsM=,tag:oxeflPdBwjfmmM7viFO1FQ==,type:str] + pgp: [] + encrypted_regex: ((?i)(pass|secret($|[^N])|key|token|^data$|^stringData)) + version: 3.7.3 diff --git a/k8s/namespaces/clusters/base/monitoring/alertmanager-discord/helm-release.yaml b/k8s/namespaces/clusters/base/monitoring/alertmanager-discord/helm-release.yaml new file mode 100644 index 0000000000..7ccc93cfe2 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/alertmanager-discord/helm-release.yaml @@ -0,0 +1,49 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: &app alertmanager-discord + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: app-template + version: 0.2.2 + sourceRef: + kind: HelmRepository + name: bjw-s-charts + namespace: flux-system + interval: 15m + test: + enable: false + install: + remediation: + retries: 5 + upgrade: + remediation: + retries: 5 + remediateLastFailure: true + cleanupOnFail: true + rollback: + timeout: 10m + recreate: true + cleanupOnFail: true + values: + global: + nameOverride: *app + image: + repository: benjojo/alertmanager-discord + tag: latest + service: + main: + ports: + http: + port: 9094 + resources: + requests: + cpu: 20m + memory: 100Mi + envFrom: + - secretRef: + name: alertmanager-discord-secret diff --git a/k8s/namespaces/clusters/base/monitoring/alertmanager-discord/kustomization.yaml b/k8s/namespaces/clusters/base/monitoring/alertmanager-discord/kustomization.yaml new file mode 100644 index 0000000000..73941365cd --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/alertmanager-discord/kustomization.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - helm-release.yaml + - secret.sops.yaml diff --git a/k8s/namespaces/clusters/base/monitoring/alertmanager-discord/secret.sops.yaml b/k8s/namespaces/clusters/base/monitoring/alertmanager-discord/secret.sops.yaml new file mode 100644 index 0000000000..ffdebf55e1 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/alertmanager-discord/secret.sops.yaml @@ -0,0 +1,31 @@ +# yamllint disable +apiVersion: v1 +kind: Secret +metadata: + name: alertmanager-discord-secret + namespace: monitoring + labels: + app.kubernetes.io/name: alertmanager-discord +type: Opaque +stringData: + DISCORD_WEBHOOK: ENC[AES256_GCM,data:X4wSVMjNcBzHamUy4gDDwdBlwHf+9zBm0u8TBryEHIU39w7k9PW8kDlQC6Cn5EInm85zkDydP5PNTp8z6f1ypJhAwEKcGu7daODXXpHi0UoWqYcVIkuxoxUgajAO8nNxERQ+iUavLduH1QHDJ67fgyuy6iJyUteTaQ==,iv:u89wssUCEltfsSDRIQa64dtphs16NO19ZZOvCQozhA4=,tag:oimjzmsZWO2ukAsVxXPRzA==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age1eynu35v0tpg9remal6zeecfeg9e84a2qxake027wwgdn02rdfcls7nyv8r + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBPMFI2WVlGOWhhdHdaNlkr + MFVPczFFcmJ6TDBtNDNiWFQxcGpEY3o4Z2owCnlRcW1qZUVQb2pzWEx2b3dpZDd4 + ZUVpb1orWXQ0ZkMvV0wzS0F2djRSZDQKLS0tIHA2cEl0Q05qbU51RXh4SWprajhr + Z0hDTEgyeE94azhVSkVjb3NxS29Zc28K1tSWQXfEdMZX/HcitWShuTyaRD26VeHL + N4+LidD6V69SHblAiCIf2rbWWgemobiwbuIgGxE1VqLi3KWdKCEwQQ== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2022-10-06T19:20:52Z" + mac: ENC[AES256_GCM,data:09yhy8Yl95MPVSBVdh5MWohoOumOU5UV+DktKxqQG1VM1bwAd1gF70/Yh3ok2vcHKREb7sIzwzlMI0n+y1pI2bRXLJT7M0ru7LsJDJ+xs6vj8OXL2S88TE+FRUDlxali4XBLkY94DKmBrfIK3Uqc7rClOOHMv81eeodplQmi7Zw=,iv:Pv3Og4etVefQzxCCXn+QjWCBYxXOy9OHMKPPEL5Jtq8=,tag:1ypZQi7qYJN0AlSiGtOKKw==,type:str] + pgp: [] + encrypted_regex: ((?i)(pass|secret($|[^N])|key|token|^data$|^stringData)) + version: 3.7.3 diff --git a/k8s/namespaces/clusters/base/monitoring/grafana/helm-release.yaml b/k8s/namespaces/clusters/base/monitoring/grafana/helm-release.yaml new file mode 100644 index 0000000000..928537a772 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/grafana/helm-release.yaml @@ -0,0 +1,306 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: &app grafana + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: grafana + version: 6.40.1 + sourceRef: + kind: HelmRepository + name: grafana + namespace: flux-system + test: + enable: false + install: + remediation: + retries: 5 + upgrade: + remediation: + retries: 5 + remediateLastFailure: true + cleanupOnFail: true + rollback: + timeout: 10m + recreate: true + cleanupOnFail: true + values: + replicas: 3 + deploymentStrategy: + type: Recreate + + admin: + existingSecret: grafana-admin + + envFromSecrets: + - name: grafana + + grafana.ini: + auth.basic: + enabled: true + disable_login_form: false + + dashboardProviders: + dashboardproviders.yaml: + apiVersion: 1 + providers: + - name: "default" + orgId: 1 + folder: "" + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/default + - name: "flux" + orgId: 1 + folder: "flux" + type: file + updateIntervalSeconds: 10 + disableDeletion: false + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards/flux + - name: "loki" + orgId: 1 + folder: "loki" + type: file + updateIntervalSeconds: 10 + disableDeletion: false + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards/loki + - name: "media" + orgId: 1 + folder: "media" + type: file + updateIntervalSeconds: 10 + disableDeletion: false + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards/media + - name: "networking" + orgId: 1 + folder: "networking" + type: file + updateIntervalSeconds: 10 + disableDeletion: false + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards/networking + - name: "storage" + orgId: 1 + folder: "storage" + type: file + updateIntervalSeconds: 10 + disableDeletion: false + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards/storage + - name: "thanos" + orgId: 1 + folder: "thanos" + type: file + updateIntervalSeconds: 10 + disableDeletion: false + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards/thanos + + datasources: + datasources.yaml: + apiVersion: 1 + deleteDatasources: + - name: Loki + orgId: 1 + datasources: + - name: Prometheus + type: prometheus + url: http://thanos-query.monitoring.svc.cluster.local:9090/ + access: proxy + isDefault: true + - name: Loki + type: loki + access: proxy + url: http://loki.monitoring.svc.cluster.local:3100 + + dashboards: + default: + node-exporter-full: + url: https://grafana.com/api/dashboards/1860/revisions/22/download + datasource: Prometheus + + flux: + flux-cluster: + url: https://raw.githubusercontent.com/fluxcd/flux2/main/manifests/monitoring/monitoring-config/dashboards/cluster.json + datasource: Prometheus + flux-control-plane: + url: https://raw.githubusercontent.com/fluxcd/flux2/main/manifests/monitoring/monitoring-config/dashboards/control-plane.json + datasource: Prometheus + flux-logs: + url: https://raw.githubusercontent.com/fluxcd/flux2/main/manifests/monitoring/monitoring-config/dashboards/logs.json + datasource: Loki + + storage: + # Ref: https://grafana.com/grafana/dashboards/2842 + ceph-cluster: + gnetId: 2842 + revision: 14 + datasource: Prometheus + # Ref: https://grafana.com/grafana/dashboards/5336 + ceph-osd: + gnetId: 5336 + revision: 5 + datasource: Prometheus + # Ref: https://grafana.com/grafana/dashboards/5342 + ceph-pools: + gnetId: 5342 + revision: 5 + datasource: Prometheus + # Ref: https://grafana.com/grafana/dashboards/7845 + zfs: + gnetId: 7845 + revision: 4 + datasource: Prometheus + # Ref: https://grafana.com/grafana/dashboards/7845 + netdata: + gnetId: 7107 + revision: 1 + datasource: Prometheus + + media: + radarr: + url: https://raw.githubusercontent.com/k8s-at-home/grafana-dashboards/main/radarr.json + datasource: Prometheus + sonarr: + url: https://raw.githubusercontent.com/k8s-at-home/grafana-dashboards/main/sonarr.json + datasource: Prometheus + + networking: + blackbox: + url: https://raw.githubusercontent.com/jr0dd/grafana-dashboards/main/blackbox.json + datasource: Prometheus + # Ref: https://grafana.com/grafana/dashboards/15513 + cert-manager: + url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/cert-manager/dashboards/cert-manager.json + datasource: Prometheus + cilium-agent: + gnetId: 15513 + revision: 1 + datasource: Prometheus + # Ref: https://grafana.com/grafana/dashboards/15514 + cilium-operator: + gnetId: 15514 + revision: 1 + datasource: Prometheus + # Ref: https://grafana.com/grafana/dashboards/15515 + cilium-hubble: + gnetId: 15515 + revision: 1 + datasource: Prometheus + # Ref: https://grafana.com/grafana/dashboards/13665 + speedtest: + gnetId: 13665 + revision: 4 + datasource: Prometheus + nginx-dashboard: + url: https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/grafana/dashboards/nginx.json + datasource: Prometheus + + thanos: + bucket-replicate: + url: https://raw.githubusercontent.com/thanos-io/thanos/main/examples/dashboards/bucket-replicate.json + datasource: Prometheus + compact: + url: https://raw.githubusercontent.com/thanos-io/thanos/main/examples/dashboards/compact.json + datasource: Prometheus + overview: + url: https://raw.githubusercontent.com/thanos-io/thanos/main/examples/dashboards/overview.json + datasource: Prometheus + query: + url: https://raw.githubusercontent.com/thanos-io/thanos/main/examples/dashboards/query.json + datasource: Prometheus + query-frontend: + url: https://raw.githubusercontent.com/thanos-io/thanos/main/examples/dashboards/query-frontend.json + datasource: Prometheus + receive: + url: https://raw.githubusercontent.com/thanos-io/thanos/main/examples/dashboards/receive.json + datasource: Prometheus + rule: + url: https://raw.githubusercontent.com/thanos-io/thanos/main/examples/dashboards/rule.json + datasource: Prometheus + sidecar: + url: https://raw.githubusercontent.com/thanos-io/thanos/main/examples/dashboards/sidecar.json + datasource: Prometheus + store: + url: https://raw.githubusercontent.com/thanos-io/thanos/main/examples/dashboards/store.json + datasource: Prometheus + + sidecar: + dashboards: + enabled: true + searchNamespace: ALL + datasources: + enabled: true + searchNamespace: ALL + + imageRenderer: + enabled: true + + plugins: + - natel-discrete-panel + - pr0ps-trackmap-panel + - vonage-status-panel + - grafana-piechart-panel + - grafana-polystat-panel + - grafana-worldmap-panel + - grafana-clock-panel + - grafana-singlestat-panel + - mxswat-separator-panel + - farski-blendstat-panel + - speakyourcode-button-panel + - snuids-trafficlights-panel + + serviceMonitor: + enabled: true + + ingress: + enabled: true + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-staging" + ingressClassName: nginx + hosts: + - "grafana.${SECRET_DOMAIN}" + tls: + - secretName: grafana-tls + hosts: + - "grafana.${SECRET_DOMAIN}" + + persistence: + enabled: false + + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: ["grafana"] + topologyKey: kubernetes.io/hostnam + + resources: + requests: + cpu: 23m + memory: 110M + limits: + memory: 152M + + podAnnotations: + configmap.reloader.stakater.com/reload: "grafana" diff --git a/k8s/namespaces/clusters/base/monitoring/grafana/kustomization.yaml b/k8s/namespaces/clusters/base/monitoring/grafana/kustomization.yaml new file mode 100644 index 0000000000..966cf96e8f --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/grafana/kustomization.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - config-pvc.yaml + - helm-release.yaml + - secret.sops.yaml diff --git a/k8s/namespaces/clusters/base/monitoring/grafana/patches/env.yaml b/k8s/namespaces/clusters/base/monitoring/grafana/patches/env.yaml new file mode 100644 index 0000000000..77942c380d --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/grafana/patches/env.yaml @@ -0,0 +1,27 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: grafana + namespace: monitoring +spec: + values: + env: + GF_ANALYTICS_CHECK_FOR_UPDATES: false + GF_DATABASE_HOST: postgres-rw.monitoring.svc.cluster.local:5432 + GF_DATABASE_NAME: grafana + GF_DATABASE_SSL_MODE: disable + GF_DATABASE_TYPE: postgres + GF_DATE_FORMATS_FULL_DATE: "MMM Do, YYYY hh:mm:ss a" + GF_EXPLORE_ENABLED: true + GF_GRAFANA_NET_URL: https://grafana.net + GF_LOG_FILTERS: rendering:debug + GF_LOG_MODE: console + GF_PANELS_DISABLE_SANITIZE_HTML: true + GF_PATHS_DATA: /var/lib/grafana/data + GF_PATHS_LOGS: /var/log/grafana + GF_PATHS_PLUGINS: /var/lib/grafana/plugins + GF_PATHS_PROVISIONING: /etc/grafana/provisioning + GF_SECURITY_ALLOW_EMBEDDING: true + GF_SECURITY_COOKIE_SAMESITE: grafana + GF_SERVER_ROOT_URL: "https://grafana.${SECRET_DOMAIN}" diff --git a/k8s/namespaces/clusters/base/monitoring/grafana/patches/postgres.yaml b/k8s/namespaces/clusters/base/monitoring/grafana/patches/postgres.yaml new file mode 100644 index 0000000000..ad3cd31ce5 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/grafana/patches/postgres.yaml @@ -0,0 +1,31 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: grafana + namespace: monitoring +spec: + values: + extraInitContainers: + - name: init-db + image: ghcr.io/onedr0p/postgres-initdb:14.5 + env: + - name: POSTGRES_HOST + value: postgres-rw.monitoring.svc.cluster.local + - name: POSTGRES_DB + value: grafana + - name: POSTGRES_SUPER_PASS + valueFrom: + secretKeyRef: + name: postgres-superuser + key: password + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: grafana + key: GF_DATABASE_USER + - name: POSTGRES_PASS + valueFrom: + secretKeyRef: + name: grafana + key: GF_DATABASE_PASSWORD diff --git a/k8s/namespaces/clusters/base/monitoring/grafana/secret.sops.yaml b/k8s/namespaces/clusters/base/monitoring/grafana/secret.sops.yaml new file mode 100644 index 0000000000..91361a5a86 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/grafana/secret.sops.yaml @@ -0,0 +1,58 @@ +# yamllint disable +apiVersion: v1 +kind: Secret +metadata: + name: grafana-admin + namespace: monitoring +stringData: + admin-user: ENC[AES256_GCM,data:kuidUh1LPnS28yk=,iv:7e25n5CWeLyIhsAG5xMBOIbJQsc9VFz65Ro1AD+ZDcA=,tag:33OHQ7Q5MMr0MClo8DGP3w==,type:str] + admin-password: ENC[AES256_GCM,data:OUc2cGtG9AM++DY=,iv:KJTepyEQsxZVtG6AbyQch/2hXzZVOEGPsCWCXX1oRCg=,tag:EYgaf07OKJUwI+WmMG9rhQ==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age1eynu35v0tpg9remal6zeecfeg9e84a2qxake027wwgdn02rdfcls7nyv8r + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSA0TENNN3V6MnVxSWlBRmVz + ekNaRjZ3MlJXVk1oZ2pZWExYYVlLM0oveDJnCklpYnVTWUQwZHJHNDR2S3U3MEQ0 + WWg4SGZWK2IrQ1FyeFk5Qmd4clc3L00KLS0tIGlvWTRLMjVjVTFlTlREVm5SRUJG + Z1R3RFV5bER5Q0VBb3RnK0diNXpNcVkK3U21Y5GWvnmPA8hxi8Us7TkNGsCYAvlD + QmY8mT6ApdiczqVo1DgFmKDSMIYNGL2wlyyriu9MLCU8a9tGxmj47g== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2022-10-06T19:15:56Z" + mac: ENC[AES256_GCM,data:7gN0iYVMIeAlSF9c21RI5hEdXdQoCUYf3M487gVkMNWgCY256Cf0gmP5CyiusgHjQ/5IIJmRupNZvLX2/BDtSl4FPhJcIcTVWk/zspTaDfvzA5O7uwH+LTKeUYbCIvCqRsRbeNmFTSjTSiItCoc5VPo+oKjK4hZ5SML4D6YABBc=,iv:G55upHE7qQNG+/jYqp4ACyqWnT44/L90BCLF3OQ8UxU=,tag:no+fcHquWpmajVRkqzq7EQ==,type:str] + pgp: [] + encrypted_regex: ((?i)(pass|secret($|[^N])|key|token|^data$|^stringData)) + version: 3.7.3 +--- +apiVersion: v1 +kind: Secret +metadata: + name: grafana + namespace: monitoring +stringData: + GF_DATABASE_USER: ENC[AES256_GCM,data:66EKHH1SNTbhQOA=,iv:ISp1bxw+62g/+kg6iYOM2NrU5youD7WYuJ/WE3l3Feg=,tag:N2yqF47NKOKtWxj21fODqg==,type:str] + GF_DATABASE_PASSWORD: ENC[AES256_GCM,data:/1QtvaJrJywR0g4=,iv:UKwhiLq05kATf6QJgapk4NrJ/WGRAFj9FpERTfe1Cdk=,tag:COKoQ86J71gladhxZEChlA==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age1eynu35v0tpg9remal6zeecfeg9e84a2qxake027wwgdn02rdfcls7nyv8r + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSA0TENNN3V6MnVxSWlBRmVz + ekNaRjZ3MlJXVk1oZ2pZWExYYVlLM0oveDJnCklpYnVTWUQwZHJHNDR2S3U3MEQ0 + WWg4SGZWK2IrQ1FyeFk5Qmd4clc3L00KLS0tIGlvWTRLMjVjVTFlTlREVm5SRUJG + Z1R3RFV5bER5Q0VBb3RnK0diNXpNcVkK3U21Y5GWvnmPA8hxi8Us7TkNGsCYAvlD + QmY8mT6ApdiczqVo1DgFmKDSMIYNGL2wlyyriu9MLCU8a9tGxmj47g== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2022-10-06T19:15:56Z" + mac: ENC[AES256_GCM,data:7gN0iYVMIeAlSF9c21RI5hEdXdQoCUYf3M487gVkMNWgCY256Cf0gmP5CyiusgHjQ/5IIJmRupNZvLX2/BDtSl4FPhJcIcTVWk/zspTaDfvzA5O7uwH+LTKeUYbCIvCqRsRbeNmFTSjTSiItCoc5VPo+oKjK4hZ5SML4D6YABBc=,iv:G55upHE7qQNG+/jYqp4ACyqWnT44/L90BCLF3OQ8UxU=,tag:no+fcHquWpmajVRkqzq7EQ==,type:str] + pgp: [] + encrypted_regex: ((?i)(pass|secret($|[^N])|key|token|^data$|^stringData)) + version: 3.7.3 diff --git a/k8s/namespaces/clusters/base/monitoring/kube-prometheus-stack/helm-release.yaml b/k8s/namespaces/clusters/base/monitoring/kube-prometheus-stack/helm-release.yaml new file mode 100644 index 0000000000..32a7e1c960 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/kube-prometheus-stack/helm-release.yaml @@ -0,0 +1,279 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: &app kube-prometheus-stack + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: kube-prometheus-stack + version: 40.3.1 + sourceRef: + kind: HelmRepository + name: prometheus-community + namespace: flux-system + test: + enable: false + install: + remediation: + retries: 5 + upgrade: + remediation: + retries: 5 + remediateLastFailure: true + cleanupOnFail: true + rollback: + timeout: 10m + recreate: true + cleanupOnFail: true + values: + alertmanager: + config: + global: + resolve_timeout: 5m + receivers: + - name: "null" + - name: discord + webhook_configs: + - url: http://alertmanager-discord:9094 + route: + group_by: ["alertname", "job"] + group_wait: 30s + group_interval: 5m + repeat_interval: 6h + receiver: "discord" + routes: + - receiver: "null" + matchers: + - alertname =~ "InfoInhibitor|Watchdog" + - receiver: "discord" + matchers: + - severity = "critical" + continue: true + inhibit_rules: + - source_matchers: + - severity = "critical" + target_matchers: + - severity = "warning" + equal: ["alertname", "namespace"] + ingress: + enabled: true + pathType: Prefix + ingressClassName: "nginx" + annotations: + # cert-manager.io/cluster-issuer: ${CLUSTER_CERT} + cert-manager.io/cluster-issuer: "letsencrypt-staging" + hosts: + - &host "alert-manager.${SECRET_PUBLIC_DOMAIN}" + tls: + - secretName: "alert-manager-tls" + hosts: + - *host + alertmanagerSpec: + replicas: 3 + podAntiAffinity: hard + storage: + volumeClaimTemplate: + spec: + storageClassName: "ceph-block" + resources: + requests: + storage: 1Gi + + alertmanagerSpec: + replicas: 1 + + kubeApiServer: + enabled: true + + kubeControllerManager: + enabled: true + endpoints: + - 192.168.20.5 + - 192.168.20.6 + - 192.168.20.7 + + kubeScheduler: + enabled: true + endpoints: + - 192.168.20.5 + - 192.168.20.6 + - 192.168.20.7 + + kubeEtcd: + enabled: true + endpoints: + - 192.168.20.5 + - 192.168.20.6 + - 192.168.20.7 + service: + enabled: true + port: 2381 + targetPort: 2381 + + kubelet: + enabled: true + serviceMonitor: + metricRelabelings: + - action: replace + sourceLabels: + - node + targetLabel: instance + + kubeProxy: + enabled: false + + kubeStateMetrics: + enabled: true + + kube-state-metrics: + metricLabelsAllowlist: + - "persistentvolumeclaims=[*]" + prometheus: + monitor: + enabled: true + relabelings: + - action: replace + regex: (.*) + replacement: $1 + sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: kubernetes_node + resources: + requests: + cpu: 15m + memory: 127M + limits: + memory: 153M + + grafana: + enabled: false + forceDeployDashboards: true + sidecar: + dashboards: + multicluster: + etcd: + enabled: true + + nodeExporter: + enabled: true + + prometheus-node-exporter: + resources: + requests: + cpu: 23m + memory: 64M + limits: + memory: 64M + + prometheus: + monitor: + enabled: true + relabelings: + - action: replace + regex: (.*) + replacement: $1 + sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: kubernetes_node + + prometheusOperator: + resources: + requests: + cpu: 35m + memory: 273M + limits: + memory: 326M + + prometheusConfigReloader: + resources: + requests: + cpu: 11m + memory: 32M + limits: + memory: 32M + + prometheus: + ingress: + enabled: true + pathType: Prefix + ingressClassName: "nginx" + annotations: + # cert-manager.io/cluster-issuer: ${CLUSTER_CERT} + cert-manager.io/cluster-issuer: "letsencrypt-staging" + hosts: + - &host "prometheus.${SECRET_DOMAIN}" + tls: + - secretName: "prometheus-tls" + hosts: + - *host + + thanosService: + enabled: true + + thanosServiceMonitor: + enabled: true + + thanosIngress: + enabled: true + pathType: Prefix + ingressClassName: "nginx" + annotations: + # cert-manager.io/cluster-issuer: ${CLUSTER_CERT} + cert-manager.io/cluster-issuer: "letsencrypt-staging" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/backend-protocol: "GRPC" + hosts: + - &host "thanos-sidecar.${SECRET_DOMAIN}" + tls: + - secretName: "thanos-sidecar-tls" + hosts: + - *host + + prometheusSpec: + replicas: 3 + replicaExternalLabelName: __replica__ + externalLabels: + cluster: betty + podAntiAffinity: hard + ruleSelectorNilUsesHelmValues: false + serviceMonitorSelectorNilUsesHelmValues: false + podMonitorSelectorNilUsesHelmValues: false + probeSelectorNilUsesHelmValues: false + retention: 14d + retentionSize: 45GB + enableAdminAPI: true + walCompression: true + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: "ceph-block" + resources: + requests: + storage: 50Gi + resources: + requests: + cpu: 10m + memory: 500Mi + limits: + memory: 1000Mi + + thanos: + image: quay.io/thanos/thanos:v0.28.0 + version: v0.28.0 + + additionalScrapeConfigs: + - job_name: node-exporter + scrape_interval: 1m + scrape_timeout: 10s + honor_timestamps: true + static_configs: + - targets: + - "kmaster1.${SECRET_DOMAIN}:9100" + - "kmaster2.${SECRET_DOMAIN}:9100" + - "kmaster3.${SECRET_DOMAIN}:9100" + - "kworker1.${SECRET_DOMAIN}:9100" + - "kworker2.${SECRET_DOMAIN}:9100" + - "kworker3.${SECRET_DOMAIN}:9100" diff --git a/k8s/namespaces/clusters/base/monitoring/kube-prometheus-stack/kustomization.yaml b/k8s/namespaces/clusters/base/monitoring/kube-prometheus-stack/kustomization.yaml new file mode 100644 index 0000000000..2fa2de20ca --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/kube-prometheus-stack/kustomization.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - helm-release.yaml diff --git a/k8s/namespaces/clusters/base/monitoring/kustomization.yaml b/k8s/namespaces/clusters/base/monitoring/kustomization.yaml new file mode 100644 index 0000000000..809cbe53b4 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/kustomization.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml diff --git a/k8s/namespaces/clusters/base/monitoring/loki/config-map.yaml b/k8s/namespaces/clusters/base/monitoring/loki/config-map.yaml new file mode 100644 index 0000000000..4dfcc1f5c2 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/loki/config-map.yaml @@ -0,0 +1,46 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-alerting-rules + namespace: monitoring +data: + loki-alerting-rules.yaml: |- + groups: + # + # SMART Failures + # + - name: smart-failure + rules: + - alert: SmartFailures + expr: | + sum by (hostname) (count_over_time({hostname=~".+"} | json | _SYSTEMD_UNIT = "smartmontools.service" !~ "(?i)previous self-test completed without error" !~ "(?i)Prefailure" |~ "(?i)(error|fail)"[2m])) > 0 + for: 2m + labels: + severity: critical + category: logs + annotations: + summary: "SMART has reported failures on host {{ $labels.hostname }}" + # + # *arr + # + - name: arr + rules: + - alert: ArrDatabaseIsLocked + expr: | + sum by (app) (count_over_time({app=~".*arr"} |~ "(?i)database is locked"[2m])) > 0 + for: 2m + labels: + severity: critical + category: logs + annotations: + summary: "{{ $labels.app }} is experiencing locked database issues" + - alert: ArrDatabaseIsMalformed + expr: | + sum by (app) (count_over_time({app=~".*arr"} |~ "(?i)database disk image is malformed"[2m])) > 0 + for: 2m + labels: + severity: critical + category: logs + annotations: + summary: "{{ $labels.app }} is experiencing malformed database disk image issues" diff --git a/k8s/namespaces/clusters/base/monitoring/loki/helm-release.yaml b/k8s/namespaces/clusters/base/monitoring/loki/helm-release.yaml new file mode 100644 index 0000000000..eb37297074 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/loki/helm-release.yaml @@ -0,0 +1,148 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: &app loki + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: loki + version: 3.2.0 + sourceRef: + kind: HelmRepository + name: grafana-charts + namespace: flux-system + test: + enable: false + install: + remediation: + retries: 5 + upgrade: + remediation: + retries: 5 + remediateLastFailure: true + cleanupOnFail: true + rollback: + timeout: 10m + recreate: true + cleanupOnFail: true + values: + loki: + structuredConfig: + auth_enabled: false + server: + log_level: info + http_listen_port: 3100 + grpc_listen_port: 9095 + memberlist: + join_members: ["loki-memberlist"] + limits_config: + retention_period: 14d + enforce_metric_name: false + reject_old_samples: true + reject_old_samples_max_age: 168h + max_cache_freshness_per_query: 10m + split_queries_by_interval: 15m + ingestion_rate_mb: 8 + ingestion_burst_size_mb: 16 + schema_config: + configs: + - from: "2021-08-01" + store: boltdb-shipper + object_store: s3 + schema: v11 + index: + prefix: loki_index_ + period: 24h + common: + path_prefix: /var/loki + replication_factor: 3 + storage: + s3: + s3: null + insecure: true + s3forcepathstyle: true + ring: + kvstore: + store: memberlist + ruler: + enable_api: true + enable_alertmanager_v2: true + alertmanager_url: http://kube-prometheus-stack-alertmanager:9093 + storage: + type: local + local: + directory: /rules + rule_path: /tmp/scratch + ring: + kvstore: + store: memberlist + distributor: + ring: + kvstore: + store: memberlist + compactor: + working_directory: /var/loki/boltdb-shipper-compactor + shared_store: s3 + compaction_interval: 10m + retention_enabled: true + retention_delete_delay: 2h + retention_delete_worker_count: 150 + ingester: + max_chunk_age: 1h + lifecycler: + ring: + kvstore: + store: memberlist + analytics: + reporting_enabled: false + gateway: + enabled: true + replicas: 3 + ingress: + enabled: true + ingressClassName: "nginx" + annotations: + # cert-manager.io/cluster-issuer: ${CLUSTER_CERT} + cert-manager.io/cluster-issuer: "letsencrypt-staging" + hosts: + - host: &host "loki.${SECRET_DOMAIN}" + paths: + - path: / + pathType: Prefix + tls: + - secretName: "loki-tls" + hosts: + - *host + write: + replicas: 3 + persistence: + size: 10Gi + storageClass: ceph-block + read: + replicas: 3 + extraVolumeMounts: + - name: loki-rules + mountPath: /rules/fake + - name: loki-rules-tmp + mountPath: /tmp/scratch + - name: loki-tmp + mountPath: /tmp/loki-tmp + extraVolumes: + - name: loki-rules + configMap: + name: loki-alerting-rules + - name: loki-rules-tmp + emptyDir: {} + - name: loki-tmp + emptyDir: {} + persistence: + size: 10Gi + storageClass: ceph-block + monitoring: + selfMonitoring: + enabled: false + grafanaAgent: + installOperator: false diff --git a/k8s/namespaces/clusters/base/monitoring/loki/kustomization.yaml b/k8s/namespaces/clusters/base/monitoring/loki/kustomization.yaml new file mode 100644 index 0000000000..c2b538cdda --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/loki/kustomization.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - config-map.yaml + - helm-release.yaml diff --git a/k8s/namespaces/clusters/base/monitoring/namespace.yaml b/k8s/namespaces/clusters/base/monitoring/namespace.yaml new file mode 100644 index 0000000000..ef4dd87a43 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/namespace.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring + labels: + kustomize.toolkit.fluxcd.io/prune: disabled diff --git a/k8s/namespaces/clusters/base/monitoring/thanos/helm-release.yaml b/k8s/namespaces/clusters/base/monitoring/thanos/helm-release.yaml new file mode 100644 index 0000000000..1ac73f3e7e --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/thanos/helm-release.yaml @@ -0,0 +1,69 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: thanos + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: thanos + version: 11.5.4 + sourceRef: + kind: HelmRepository + name: bitnami + namespace: flux-system + test: + enable: false + install: + remediation: + retries: 5 + upgrade: + remediation: + retries: 5 + remediateLastFailure: true + cleanupOnFail: true + rollback: + timeout: 10m + recreate: true + cleanupOnFail: true + dependsOn: + - name: kube-prometheus-stack + namespace: monitoring + values: + query: + enabled: true + replicaCount: 3 + podAntiAffinityPreset: hard + replicaLabel: + - __replica__ + dnsDiscovery: + sidecarsService: kube-prometheus-stack-thanos-discovery + sidecarsNamespace: monitoring + ingress: + enabled: true + hostname: &host "thanos-query.${SECRET_DOMAIN}" + ingressClassName: "nginx" + annotations: + # cert-manager.io/cluster-issuer: ${CLUSTER_CERT} + cert-manager.io/cluster-issuer: "letsencrypt-staging" + tls: true + extraTls: + - secretName: "thanos-query-tls" + hosts: + - *host + queryFrontend: + enabled: false + bucketweb: + enabled: false + compactor: + enabled: false + storegateway: + enabled: false + ruler: + enabled: false + metrics: + enabled: true + serviceMonitor: + enabled: true diff --git a/k8s/namespaces/clusters/base/monitoring/thanos/kustomization.yaml b/k8s/namespaces/clusters/base/monitoring/thanos/kustomization.yaml new file mode 100644 index 0000000000..2fa2de20ca --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/thanos/kustomization.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - helm-release.yaml diff --git a/k8s/namespaces/clusters/base/monitoring/vector/agent/helm-release.yaml b/k8s/namespaces/clusters/base/monitoring/vector/agent/helm-release.yaml new file mode 100644 index 0000000000..dbd0a391d8 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/vector/agent/helm-release.yaml @@ -0,0 +1,73 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: vector-agent + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: vector + version: 0.16.0 + sourceRef: + kind: HelmRepository + name: vector + namespace: flux-system + test: + enable: false + install: + remediation: + retries: 5 + upgrade: + remediation: + retries: 5 + remediateLastFailure: true + cleanupOnFail: true + rollback: + timeout: 10m + recreate: true + cleanupOnFail: true + dependsOn: + - name: loki + namespace: monitoring + - name: vector-aggregator + namespace: monitoring + values: + image: + repository: timberio/vector + tag: 0.24.1-debian + role: Agent + customConfig: + data_dir: /vector-data-dir + api: + enabled: false + sources: + journal_logs: + type: journald + journal_directory: /var/log/journal + kubernetes_logs: + type: kubernetes_logs + pod_annotation_fields: + container_image: container_image + container_name: container_name + pod_annotations: pod_annotations + pod_labels: pod_labels + pod_name: pod_name + sinks: + loki_journal_sink: + type: vector + inputs: + - journal_logs + address: vector-aggregator:6000 + version: "2" + loki_kubernetes_sink: + type: vector + inputs: + - kubernetes_logs + address: vector-aggregator:6010 + version: "2" + service: + enabled: false + securityContext: + privileged: true diff --git a/k8s/namespaces/clusters/base/monitoring/vector/agent/kustomization.yaml b/k8s/namespaces/clusters/base/monitoring/vector/agent/kustomization.yaml new file mode 100644 index 0000000000..2fa2de20ca --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/vector/agent/kustomization.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - helm-release.yaml diff --git a/k8s/namespaces/clusters/base/monitoring/vector/aggregator/filterlog-regex.txt b/k8s/namespaces/clusters/base/monitoring/vector/aggregator/filterlog-regex.txt new file mode 100644 index 0000000000..59d572d034 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/vector/aggregator/filterlog-regex.txt @@ -0,0 +1,20 @@ +# +# IPv4: TCP +# Regex: ^(?P(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?Ptcp),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*))$ +# Example: 94,,,ef794793b2e3764b938bd04cba88e8a3,igb0,match,pass,out,4,0x0,,62,16800,0,DF,6,tcp,60,xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx,11715,443,0,S,3876953207,,64240,,mss;sackOK;TS;nop;wscale +# +# IPv6: TCP +# Regex: ? +# Example: ? +# +# IPv4 / IPv6: UDP +# Regex: ^(?P(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?Pudp),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*))$ +# Example: 90,,,91e2443ae2e8caf012f9a6e5a8a455c8,lo0,match,pass,in,4,0x4,,255,4660,0,none,17,udp,914,xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx,5353,5353,894 +# Example: 15,,,91515c100a3692cb94121964974ce513,igb1_vlan150,match,block,in,6,0x00,0x00000,255,udp,17,391,xxxx::xxxx:xxxx:xxxx:xxxx,xxxx::xx,5353,5353,391 +# +# IPv4: ICMP / IGMP / GRE +# Regex: ^(?P(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?Picmp|igmp|gre),(?P[^,]*),(?P[^,]*),(?P[^,]*),(?P[^,]*))$ +# Example: 94,,,ef794793b2e3764b938bd04cba88e8a3,igb0,match,pass,out,4,0x0,,63,44871,0,DF,1,icmp,84,xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx,datalength=64 +# Example: 16,,,02f4bab031b57d1e30553ce08e0ec131,igb1_vlan150,match,block,in,4,0xc0,,1,15472,0,none,2,igmp,32,xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx,datalength=8 +# Example: 16,,,02f4bab031b57d1e30553ce08e0ec131,igb0,match,block,in,4,0x0,,57,20354,0,DF,47,gre,564,xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx,datalength=544 +# diff --git a/k8s/namespaces/clusters/base/monitoring/vector/aggregator/helm-release.yaml b/k8s/namespaces/clusters/base/monitoring/vector/aggregator/helm-release.yaml new file mode 100644 index 0000000000..43e9638539 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/vector/aggregator/helm-release.yaml @@ -0,0 +1,186 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: vector-aggregator + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: vector + version: 0.16.0 + sourceRef: + kind: HelmRepository + name: vector + namespace: flux-system + test: + enable: false + install: + remediation: + retries: 5 + upgrade: + remediation: + retries: 5 + remediateLastFailure: true + cleanupOnFail: true + rollback: + timeout: 10m + recreate: true + cleanupOnFail: true + dependsOn: + - name: loki + namespace: monitoring + values: + image: + repository: timberio/vector + tag: 0.24.1-debian + role: Stateless-Aggregator + replicas: 3 + customConfig: + data_dir: /vector-data-dir + api: + enabled: false + sources: + journal_logs: + type: vector + address: 0.0.0.0:6000 + version: "2" + kubernetes_logs: + type: vector + address: 0.0.0.0:6010 + version: "2" + opnsense_filterlog_logs: + type: syslog + address: 0.0.0.0:5140 + mode: udp + transforms: + kubernetes_logs_remap: + type: remap + inputs: + - kubernetes_logs + source: | + # Standardize 'app' index + .custom_app_name = .pod_labels."app.kubernetes.io/name" || .pod_labels.app || .pod_labels."k8s-app" || "unknown" + opnsense_filterlog_remap: + type: remap + inputs: + - opnsense_filterlog_logs + source: | + msg = parse_csv!(string!(.message)) + # Only parse IPv4 / IPv6 + if msg[8] == "4" || msg[8] == "6" { + .filter_interface = msg[4] + .filter_direction = msg[7] + .filter_action = msg[6] + .filter_ip_version = msg[8] + .filter_protocol = msg[16] + .filter_source_ip = msg[18] + .filter_destination_ip = msg[19] + if (msg[16] == "icmp" || msg[16] == "igmp" || msg[16] == "gre") { + .filter_data = msg[20] + } else { + .filter_source_port = msg[20] + .filter_destination_port = msg[21] + .filter_data_length = msg[22] + if msg[8] == "4" && msg[16] == "tcp" { + .filter_tcp_flags = msg[23] + } + } + } + opnsense_filterlog_route: + type: route + inputs: + - opnsense_filterlog_remap + route: + pass_action: >- + .filter_action == "pass" + opnsense_filterlog_geoip: + type: geoip + inputs: + - opnsense_filterlog_route.pass_action + database: /geoip/GeoLite2-City.mmdb + source: filter_source_ip + target: geoip + sinks: + loki_journal: + type: loki + inputs: + - journal_logs + endpoint: http://loki-gateway:80 + encoding: + codec: json + batch: + max_bytes: 2049000 + out_of_order_action: accept + remove_label_fields: true + remove_timestamp: true + labels: + hostname: >- + {{`{{ host }}`}} + loki_kubernetes: + type: loki + inputs: + - kubernetes_logs_remap + endpoint: http://loki-gateway:80 + encoding: + codec: json + batch: + max_bytes: 2049000 + out_of_order_action: accept + remove_label_fields: true + remove_timestamp: true + labels: + app: >- + {{`{{ custom_app_name }}`}} + namespace: >- + {{`{{ kubernetes.pod_namespace }}`}} + node: >- + {{`{{ kubernetes.pod_node_name }}`}} + loki_opnsense_filterlog: + type: loki + inputs: + - opnsense_filterlog_route._unmatched + - opnsense_filterlog_geoip + endpoint: http://loki-gateway:80 + encoding: + codec: json + batch: + max_bytes: 2049000 + out_of_order_action: accept + labels: + hostname: opnsense + extraVolumeMounts: + - name: geoip + mountPath: /geoip + extraVolumes: + - name: geoip + persistentVolumeClaim: + claimName: vector-geoipupdate-config-v1 + service: + enabled: true + type: LoadBalancer + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: ["Stateless-Aggregator"] + topologyKey: kubernetes.io/hostname + postRenderers: + - kustomize: + patchesJson6902: + - target: + kind: Service + name: vector-aggregator + patch: + - op: add + path: /spec/externalIPs + value: ["${SVC_SYSLOG_ADDR}"] + - op: replace + path: /spec/externalTrafficPolicy + value: Local diff --git a/k8s/namespaces/clusters/base/monitoring/vector/aggregator/kustomization.yaml b/k8s/namespaces/clusters/base/monitoring/vector/aggregator/kustomization.yaml new file mode 100644 index 0000000000..2fa2de20ca --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/vector/aggregator/kustomization.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - helm-release.yaml diff --git a/k8s/namespaces/clusters/base/monitoring/vector/geoipupdate/config-pvc.yaml b/k8s/namespaces/clusters/base/monitoring/vector/geoipupdate/config-pvc.yaml new file mode 100644 index 0000000000..66f4d04c37 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/vector/geoipupdate/config-pvc.yaml @@ -0,0 +1,15 @@ +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: vector-geoipupdate-config-v1 + namespace: monitoring + labels: + excluded_from_alerts: "true" +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 1Gi + storageClassName: ceph-filesystem diff --git a/k8s/namespaces/clusters/base/monitoring/vector/geoipupdate/cron-job.yaml b/k8s/namespaces/clusters/base/monitoring/vector/geoipupdate/cron-job.yaml new file mode 100644 index 0000000000..cc25f3ca0a --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/vector/geoipupdate/cron-job.yaml @@ -0,0 +1,46 @@ +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: &app vector-geoipupdate + namespace: monitoring +spec: + schedule: "@daily" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 2 + jobTemplate: + spec: + ttlSecondsAfterFinished: 86400 + template: + spec: + automountServiceAccountToken: false + restartPolicy: Never + containers: + - name: *app + image: docker.io/maxmindinc/geoipupdate:v4.10 + imagePullPolicy: IfNotPresent + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - rm -rf /usr/share/GeoIP/.geoipupdate.lock + env: + - name: GEOIPUPDATE_EDITION_IDS + value: GeoLite2-City + - name: GEOIPUPDATE_FREQUENCY + value: "0" + - name: GEOIPUPDATE_VERBOSE + value: "true" + envFrom: + - secretRef: + name: *app + volumeMounts: + - name: *app + mountPath: /usr/share/GeoIP + volumes: + - name: *app + persistentVolumeClaim: + claimName: vector-geoipupdate-config-v1 diff --git a/k8s/namespaces/clusters/base/monitoring/vector/geoipupdate/kustomization.yaml b/k8s/namespaces/clusters/base/monitoring/vector/geoipupdate/kustomization.yaml new file mode 100644 index 0000000000..2fa2de20ca --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/vector/geoipupdate/kustomization.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - helm-release.yaml diff --git a/k8s/namespaces/clusters/base/monitoring/vector/geoipupdate/secrets.sops.yaml b/k8s/namespaces/clusters/base/monitoring/vector/geoipupdate/secrets.sops.yaml new file mode 100644 index 0000000000..658b2fb917 --- /dev/null +++ b/k8s/namespaces/clusters/base/monitoring/vector/geoipupdate/secrets.sops.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +kind: Secret +metadata: + name: vector-geoipupdate + namespace: monitoring +type: Opaque +stringData: + GEOUPDATE_ACCOUNT_ID: ENC[AES256_GCM,data:Sjq0DHmf,iv:0MsyPvCY511ZsvPcg99Keles1NIBgjNwlEb3qKpXn44=,tag:FoVYjet3KFaqQOnaUVB59Q==,type:str] + GEOUPDATE_LICENSE_KEY: ENC[AES256_GCM,data:ouZ8q4mLmbFVvXnefC53ug==,iv:s0a6pfHRriE6CV7c8KwYRn8zPsfnl4EhKUl2fiyHfE4=,tag:VRhVwjHvCQepq6QHRdayMQ==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age1eynu35v0tpg9remal6zeecfeg9e84a2qxake027wwgdn02rdfcls7nyv8r + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBrRkJ4VTViM0NTS254NU1V + QW95MGJqV1NGNVplRXB1aDJ5WFlaOWNaRGtFCnhUeGR5aCtDZFV2b2c4Q2phbHFJ + dzU2MURjV0NBWkk0a2N6WCt3cTFKRjQKLS0tIDlEQU5KeUNQakNJRkdtK0RWdnc4 + TGxoQ2RBRVRRbW9Ib3lpcUpheDlTTHMKfzDVtap+CWIqOOo7SOe2GvKQPGyazB9/ + AJpzXvIV6uxKjd5KhSxdgtXz4wJPXN93MalwC90gZdDMi3WegZOKLg== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2022-10-05T21:12:26Z" + mac: ENC[AES256_GCM,data:Q+5BEI/bG2upEUMGlnOMt0C9oDHzZPLuzYyVGRzCSt8zOwj+ZmDYcTdfO88oVuPLZqO+0b89KPSvqrYevSXHChR5APqwGoQBcJWejotAfYfn1bPLkxCSMCv0b7pCFY+IOP3M0ocsCm83DyboIeQc1ftfkZOBuMxNQesp/Sz6x08=,iv:Us8OUzwa90eZ+SpV1U6h9r0ebQyfjlNP+xBU+VI6kF8=,tag:NDfaKBNcoysm5gz2HSh5Pg==,type:str] + pgp: [] + encrypted_regex: ((?i)(pass|secret($|[^N])|key|token|^data$|^stringData)) + version: 3.7.3 diff --git a/k8s/namespaces/clusters/base/rook-ceph/rook-cluster/helm-release.yaml b/k8s/namespaces/clusters/base/rook-ceph/rook-cluster/helm-release.yaml index e860e53610..d94cb2bf8e 100644 --- a/k8s/namespaces/clusters/base/rook-ceph/rook-cluster/helm-release.yaml +++ b/k8s/namespaces/clusters/base/rook-ceph/rook-cluster/helm-release.yaml @@ -29,7 +29,8 @@ spec: cleanupOnFail: true values: monitoring: - enabled: false + enabled: true + createPrometheusRules: true ingress: dashboard: ingressClassName: "nginx" diff --git a/k8s/namespaces/clusters/base/rook-ceph/rook-operator/helm-release.yaml b/k8s/namespaces/clusters/base/rook-ceph/rook-operator/helm-release.yaml index d16fe52a7f..2cd42a6042 100644 --- a/k8s/namespaces/clusters/base/rook-ceph/rook-operator/helm-release.yaml +++ b/k8s/namespaces/clusters/base/rook-ceph/rook-operator/helm-release.yaml @@ -30,6 +30,8 @@ spec: values: crds: enabled: false + monitoring: + enabled: true resources: requests: cpu: 10m diff --git a/k8s/namespaces/overlays/base/database.yaml b/k8s/namespaces/overlays/base/database.yaml new file mode 100644 index 0000000000..025319bd93 --- /dev/null +++ b/k8s/namespaces/overlays/base/database.yaml @@ -0,0 +1,46 @@ +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: database-namespace + namespace: flux-system +spec: + interval: 5m + path: "./k8s/namespaces/clusters/base/databases" + prune: true + wait: true + sourceRef: + kind: GitRepository + name: home-cluster +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: networking-origin-ca-issuer + namespace: flux-system +spec: + interval: 5m + path: "./k8s/namespaces/clusters/base/databases/postgres" + prune: true + wait: true + sourceRef: + kind: GitRepository + name: home-cluster + decryption: + provider: sops + secretRef: + name: sops-age + dependsOn: + - name: networking-cert-manager + namespace: flux-system + - name: database-namespace + namespace: flux-system + postBuild: + substitute: {} + substituteFrom: + - kind: ConfigMap + name: cluster-config + - kind: ConfigMap + name: global-config + - kind: Secret + name: cluster-secrets diff --git a/k8s/namespaces/overlays/base/monitoring.yaml b/k8s/namespaces/overlays/base/monitoring.yaml new file mode 100644 index 0000000000..f2c6e3b919 --- /dev/null +++ b/k8s/namespaces/overlays/base/monitoring.yaml @@ -0,0 +1,272 @@ +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: monitoring-namespace + namespace: flux-system +spec: + interval: 5m + path: "./k8s/namespaces/clusters/base/monitoring" + prune: true + wait: true + sourceRef: + kind: GitRepository + name: home-cluster +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: monitoring-alertmanager-discord + namespace: flux-system +spec: + interval: 5m + path: "./k8s/namespaces/clusters/base/monitoring/alertmanager-discord" + prune: true + wait: true + sourceRef: + kind: GitRepository + name: home-cluster + decryption: + provider: sops + secretRef: + name: sops-age + dependsOn: + - name: networking-cert-manager + namespace: flux-system + - name: monitoring-namespace + namespace: flux-system + postBuild: + substitute: {} + substituteFrom: + - kind: ConfigMap + name: cluster-config + - kind: ConfigMap + name: global-config + - kind: Secret + name: cluster-secrets +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: monitoring-kube-prometheus-stack + namespace: flux-system +spec: + interval: 5m + path: "./k8s/namespaces/clusters/base/monitoring/kube-prometheus-stack" + prune: true + wait: true + sourceRef: + kind: GitRepository + name: home-cluster + decryption: + provider: sops + secretRef: + name: sops-age + dependsOn: + - name: networking-cert-manager + namespace: flux-system + - name: monitoring-namespace + namespace: flux-system + postBuild: + substitute: {} + substituteFrom: + - kind: ConfigMap + name: cluster-config + - kind: ConfigMap + name: global-config + - kind: Secret + name: cluster-secrets +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: monitoring-grafana + namespace: flux-system +spec: + interval: 5m + path: "./k8s/namespaces/clusters/base/monitoring/grafana" + prune: true + wait: true + sourceRef: + kind: GitRepository + name: home-cluster + decryption: + provider: sops + secretRef: + name: sops-age + dependsOn: + - name: networking-cert-manager + namespace: flux-system + - name: monitoring-namespace + namespace: flux-system + postBuild: + substitute: {} + substituteFrom: + - kind: ConfigMap + name: cluster-config + - kind: ConfigMap + name: global-config + - kind: Secret + name: cluster-secrets +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: monitoring-grafana-patches + namespace: flux-system +spec: + interval: 5m + path: "./k8s/namespaces/clusters/base/monitoring/grafana/patches" + prune: true + wait: true + sourceRef: + kind: GitRepository + name: home-cluster + decryption: + provider: sops + secretRef: + name: sops-age + dependsOn: + - name: networking-cert-manager + namespace: flux-system + - name: monitoring-namespace + namespace: flux-system + - name: monitoring-grafana + namespace: flux-system + postBuild: + substitute: {} + substituteFrom: + - kind: ConfigMap + name: cluster-config + - kind: ConfigMap + name: global-config + - kind: Secret + name: cluster-secrets +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: monitoring-thanos + namespace: flux-system +spec: + interval: 5m + path: "./k8s/namespaces/clusters/base/monitoring/thanos" + prune: true + wait: true + sourceRef: + kind: GitRepository + name: home-cluster + decryption: + provider: sops + secretRef: + name: sops-age + dependsOn: + - name: networking-cert-manager + namespace: flux-system + - name: monitoring-namespace + namespace: flux-system + postBuild: + substitute: {} + substituteFrom: + - kind: ConfigMap + name: cluster-config + - kind: ConfigMap + name: global-config + - kind: Secret + name: cluster-secrets +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: monitoring-vector-agent + namespace: flux-system +spec: + interval: 5m + path: "./k8s/namespaces/clusters/base/monitoring/vector/agent" + prune: true + wait: true + sourceRef: + kind: GitRepository + name: home-cluster + decryption: + provider: sops + secretRef: + name: sops-age + dependsOn: + - name: networking-cert-manager + namespace: flux-system + - name: monitoring-namespace + namespace: flux-system + postBuild: + substitute: {} + substituteFrom: + - kind: ConfigMap + name: cluster-config + - kind: ConfigMap + name: global-config + - kind: Secret + name: cluster-secrets +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: monitoring-vector-aggregator + namespace: flux-system +spec: + interval: 5m + path: "./k8s/namespaces/clusters/base/monitoring/vector/aggregator" + prune: true + wait: true + sourceRef: + kind: GitRepository + name: home-cluster + decryption: + provider: sops + secretRef: + name: sops-age + dependsOn: + - name: networking-cert-manager + namespace: flux-system + - name: monitoring-namespace + namespace: flux-system + postBuild: + substitute: {} + substituteFrom: + - kind: ConfigMap + name: cluster-config + - kind: ConfigMap + name: global-config + - kind: Secret + name: cluster-secrets +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: monitoring-vector-geoipupdate + namespace: flux-system +spec: + interval: 5m + path: "./k8s/namespaces/clusters/base/monitoring/vector/geoipupdate" + prune: true + wait: true + sourceRef: + kind: GitRepository + name: home-cluster + decryption: + provider: sops + secretRef: + name: sops-age + dependsOn: + - name: networking-cert-manager + namespace: flux-system + - name: monitoring-namespace + namespace: flux-system + postBuild: + substitute: {} + substituteFrom: + - kind: ConfigMap + name: cluster-config + - kind: ConfigMap + name: global-config + - kind: Secret + name: cluster-secrets