diff --git a/helm-charts/common/text2image/.helmignore b/helm-charts/common/text2image/.helmignore new file mode 100644 index 00000000..d2c43a2a --- /dev/null +++ b/helm-charts/common/text2image/.helmignore @@ -0,0 +1,25 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ +# CI values +ci*-values.yaml diff --git a/helm-charts/common/text2image/Chart.yaml b/helm-charts/common/text2image/Chart.yaml new file mode 100644 index 00000000..56935f24 --- /dev/null +++ b/helm-charts/common/text2image/Chart.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +appVersion: "1.1" +description: A Helm chart for deploying opea text2image as microservice +name: text2image +type: application +version: 0-latest diff --git a/helm-charts/common/text2image/README.md b/helm-charts/common/text2image/README.md new file mode 100644 index 00000000..b1708755 --- /dev/null +++ b/helm-charts/common/text2image/README.md @@ -0,0 +1,50 @@ +# OPEA text2image microservice + +Helm chart for deploying OPEA text2image service. + +## Installing the Chart + +To install the chart, run the following: + +```console +cd GenAIInfra/helm-charts/common +export MODELDIR=/mnt/opea-models +export MODELNAME=stable-diffusion-v1-5/stable-diffusion-v1-5 +export HFTOKEN="insert-your-huggingface-token-here" +helm install text2image text2image --set global.modelUseHostPath=${MODELDIR} --set MODEL=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} +# To deploy on Gaudi enabled kubernetes cluster +# helm install text2image text2image --set global.modelUseHostPath=${MODELDIR} --set MODEL=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values gaudi-values.yaml +``` + +By default, the text2image service will downloading the "stable-diffusion-v1-5/stable-diffusion-v1-5" which is about 45GB. + +If you already cached the model locally, you can pass it to container like this example: + +MODELDIR=/mnt/opea-models + +MODELNAME="/data/models--stable-diffusion-v1-5--stable-diffusion-v1-5" + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng and in ready state. + +Then run the command `kubectl port-forward svc/text2image 9379:9379` to expose the text2image service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:9379/v1/text2image \ + -XPOST \ + -d '{"prompt":"An astronaut riding a green horse", "num_images_per_prompt":1}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ------------------------------- | ------ | ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| MODEL | string | `"stable-diffusion-v1-5/stable-diffusion-v1-5"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | +| global.modelUseHostPath | string | `""` | Cached models directory, text2image will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | +| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | +| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | diff --git a/helm-charts/common/text2image/ci-gaudi-values.yaml b/helm-charts/common/text2image/ci-gaudi-values.yaml new file mode 120000 index 00000000..7243d31b --- /dev/null +++ b/helm-charts/common/text2image/ci-gaudi-values.yaml @@ -0,0 +1 @@ +gaudi-values.yaml \ No newline at end of file diff --git a/helm-charts/common/text2image/ci-values.yaml b/helm-charts/common/text2image/ci-values.yaml new file mode 120000 index 00000000..7d101009 --- /dev/null +++ b/helm-charts/common/text2image/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/helm-charts/common/text2image/gaudi-values.yaml b/helm-charts/common/text2image/gaudi-values.yaml new file mode 100644 index 00000000..9c02c7b5 --- /dev/null +++ b/helm-charts/common/text2image/gaudi-values.yaml @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +accelDevice: "gaudi" + +image: + repository: opea/text2image-gaudi + tag: "latest" + +resources: + limits: + habana.ai/gaudi: 1 + # The following hugepage related settings is for default MODEL stable-diffusion-v1-5/stable-diffusion-v1-5 + # User should change the resource limits for other models + hugepages-2Mi: 256Mi + +volumes: + - name: hugepage-2mi + emptyDir: + medium: HugePages-2Mi +volumeMounts: + - name: hugepage-2mi + mountPath: /hugepages-2Mi + +OMPI_MCA_btl_vader_single_copy_mechanism: "none" + +readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/common/text2image/templates/_helpers.tpl b/helm-charts/common/text2image/templates/_helpers.tpl new file mode 100644 index 00000000..23993c43 --- /dev/null +++ b/helm-charts/common/text2image/templates/_helpers.tpl @@ -0,0 +1,64 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "text2image.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "text2image.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "text2image.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "text2image.labels" -}} +helm.sh/chart: {{ include "text2image.chart" . }} +{{ include "text2image.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "text2image.selectorLabels" -}} +app.kubernetes.io/name: {{ include "text2image.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "text2image.serviceAccountName" -}} +{{- if .Values.global.sharedSAName }} +{{- .Values.global.sharedSAName }} +{{- else if .Values.serviceAccount.create }} +{{- default (include "text2image.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/common/text2image/templates/configmap.yaml b/helm-charts/common/text2image/templates/configmap.yaml new file mode 100644 index 00000000..205259ce --- /dev/null +++ b/helm-charts/common/text2image/templates/configmap.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "text2image.fullname" . }}-config + labels: + {{- include "text2image.labels" . | nindent 4 }} +data: + MODEL: {{ .Values.MODEL | quote }} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + no_proxy: {{ .Values.global.no_proxy | quote }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} + HF_HOME: "/tmp/.cache/huggingface" + HF_HUB_CACHE: "/data" + {{- if contains "text2image-gaudi" .Values.image.repository }} + HABANA_LOGS: "/tmp/habana_logs" + {{- if .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES }} + PT_HPU_ENABLE_LAZY_COLLECTIVES: {{ .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES | quote }} + {{- end }} + {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }} + OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote}} + {{- end }} + {{- end }} diff --git a/helm-charts/common/text2image/templates/deployment.yaml b/helm-charts/common/text2image/templates/deployment.yaml new file mode 100644 index 00000000..ff04719d --- /dev/null +++ b/helm-charts/common/text2image/templates/deployment.yaml @@ -0,0 +1,164 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "text2image.fullname" . }} + labels: + {{- include "text2image.labels" . | nindent 4 }} +spec: + {{- if ne (int .Values.replicaCount) 1 }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "text2image.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "text2image.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "text2image.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- if not (hasPrefix "/data/" .Values.MODEL) }} + initContainers: + - name: model-downloader + envFrom: + - configMapRef: + name: {{ include "text2image.fullname" . }}-config + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + {{- if hasKey .Values.securityContext "runAsGroup" }} + runAsGroup: {{ .Values.securityContext.runAsGroup }} + {{- end }} + capabilities: + drop: + - ALL + add: + - DAC_OVERRIDE + # To be able to make data model directory group writable for + # previously downloaded model by old versions of helm chart + - FOWNER + seccompProfile: + type: RuntimeDefault + image: huggingface/downloader:0.17.3 + command: ['sh', '-ec'] + args: + - | + echo "Huggingface log in ..."; + huggingface-cli login --token $(HF_TOKEN); + echo "Download model {{ .Values.MODEL }} ... "; + huggingface-cli download --cache-dir /data {{ .Values.MODEL | quote }}; + echo "Change model files mode ..."; + chmod -R g+w /data/models--{{ replace "/" "--" .Values.MODEL }} + {{- if contains "text2image-gaudi" .Values.image.repository }} + echo "Download hardcoded model Habana/stable-diffusion ..."; + huggingface-cli download --cache-dir /data "Habana/stable-diffusion"; + chmod -R g+w /data/models--Habana--stable-diffusion + {{- end }} + # NOTE: Buggy logout command; + # huggingface-cli logout; + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + {{- end }} + containers: + - name: {{ .Chart.Name }} + envFrom: + - configMapRef: + name: {{ include "text2image.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: text2image + containerPort: {{ .Values.service.port }} + protocol: TCP + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + {{- with .Values.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + volumes: + {{- with .Values.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + - name: model-volume + {{- if .Values.global.modelUsePVC }} + persistentVolumeClaim: + claimName: {{ .Values.global.modelUsePVC }} + {{- else if .Values.global.modelUseHostPath }} + hostPath: + path: {{ .Values.global.modelUseHostPath }} + type: Directory + {{- else }} + emptyDir: {} + {{- end }} + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if not .Values.accelDevice }} + # extra time to finish processing buffered requests on CPU before pod is forcibly terminated + terminationGracePeriodSeconds: 120 + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "text2image.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/common/text2image/templates/horizontal-pod-autoscaler.yaml b/helm-charts/common/text2image/templates/horizontal-pod-autoscaler.yaml new file mode 100644 index 00000000..4b835576 --- /dev/null +++ b/helm-charts/common/text2image/templates/horizontal-pod-autoscaler.yaml @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "text2image.fullname" . }} + labels: + {{- include "text2image.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "text2image.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/helm-charts/common/text2image/templates/service.yaml b/helm-charts/common/text2image/templates/service.yaml new file mode 100644 index 00000000..a258d040 --- /dev/null +++ b/helm-charts/common/text2image/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "text2image.fullname" . }} + labels: + {{- include "text2image.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: text2image + protocol: TCP + name: text2image + selector: + {{- include "text2image.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/common/text2image/templates/serviceaccount.yaml b/helm-charts/common/text2image/templates/serviceaccount.yaml new file mode 100644 index 00000000..9ece6bf9 --- /dev/null +++ b/helm-charts/common/text2image/templates/serviceaccount.yaml @@ -0,0 +1,16 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "text2image.serviceAccountName" . }} + labels: + {{- include "text2image.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/helm-charts/common/text2image/templates/servicemonitor.yaml b/helm-charts/common/text2image/templates/servicemonitor.yaml new file mode 100644 index 00000000..793be797 --- /dev/null +++ b/helm-charts/common/text2image/templates/servicemonitor.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "text2image.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "text2image.selectorLabels" . | nindent 6 }} + endpoints: + - port: text2image + interval: 5s +{{- end }} diff --git a/helm-charts/common/text2image/templates/tests/test-pod.yaml b/helm-charts/common/text2image/templates/tests/test-pod.yaml new file mode 100644 index 00000000..5bd0edcc --- /dev/null +++ b/helm-charts/common/text2image/templates/tests/test-pod.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "text2image.fullname" . }}-testpod" + labels: + {{- include "text2image.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: curl + image: python:3.10 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "text2image.fullname" . }}:{{ .Values.service.port }}/v1/text2image -sS --fail-with-body \ + -X POST \ + -d '{"prompt":"An astronaut riding a green horse", "num_images_per_prompt":1}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/common/text2image/values.yaml b/helm-charts/common/text2image/values.yaml new file mode 100644 index 00000000..92e83042 --- /dev/null +++ b/helm-charts/common/text2image/values.yaml @@ -0,0 +1,142 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for text2image. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Configurations for OPEA microservice text2image +# Set to "true" to enable verbose logging +LOGFLAG: "" +# Set to one of the Stable Diffusion models, stable-diffusion-v1-5/stable-diffusion-v1-5, stabilityai/stable-diffusion-2-1, stabilityai/stable-diffusion-xl-base-1.0, stabilityai/stable-diffusion-3-medium-diffusers +MODEL: stable-diffusion-v1-5/stable-diffusion-v1-5 + +accelDevice: "" + +# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ +replicaCount: 1 + +# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ +image: + repository: opea/text2image + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +# This is for the secretes for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ +imagePullSecrets: [] +# This is to override the chart name. +nameOverride: "" +fullnameOverride: "" + +# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/ +serviceAccount: + # Specifies whether a service account should be created + create: false + # Automatically mount a ServiceAccount's API credentials? + automount: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +# This is for setting Kubernetes Annotations to a Pod. +# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +podAnnotations: {} +# This is for setting Kubernetes Labels to a Pod. +# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +podLabels: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + # Init container sets the downloaded model dir to be group writable, so that container + # can keep its lock file there. This relies on both containers using the same group ID. + runAsGroup: 0 + +# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/ +service: + # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types + type: ClusterIP + # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports + port: 9379 + +resources: + # limits: + # cpu: 100m + # memory: 128Mi + requests: + cpu: 100m + memory: 128Mi + +# Set up pod health monitoring probes, see https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ +# Use TCP probe instead of HTTP health_check because text2image-gaudi would fail normal requests in HTTP probe +readinessProbe: + tcpSocket: + port: text2image + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + tcpSocket: + port: text2image + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/ +# Note: Do not use (above) "replicaCount" with HPA (Chart ignores value=1 as it's k8s default) +# Note: Because HPA can sometimes change replica counts up and down rather frequently, the microservice needs to handle SIGTERM elegantly: +# - stop accepting new requests +# - handle all of its buffered requests +# - terminate after tthose have been processed +# See https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-termination. +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 4 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# Additional volumes on the output Deployment definition. +volumes: [] +# Additional volumeMounts on the output Deployment definition. +volumeMounts: [] + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + # service account name to be shared with all parent/child charts. + # If set, it will overwrite serviceAccount.name. + # If set, and serviceAccount.create is false, it will assume this service account is already created by others. + sharedSAName: "" + + # Choose where to save your downloaded models + # Set modelUseHostPath for local directory, this is good for one node test, e.g /mnt/opea-models + # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment, e.g. model-volume + # You can only set one of the following variable, the behavior is not defined if both of them are set. + # By default, both are set to empty, the model will be downloaded every time and will not be saved into any permanent storage medium. + modelUseHostPath: "" + modelUsePVC: "" + + monitoring: false + # Prometheus Helm install release name for serviceMonitor + prometheusRelease: prometheus-stack