opea-project · Ruoyu-y · Dec 11, 2024 · Nov 27, 2024 · eero-t · Dec 10, 2024
@@ -47,11 +47,10 @@ curl http://localhost:9090/v1/guardrails \
 
 ## Values
 
-| Key                             | Type   | Default                              | Description                                                                                                                                                  |
-| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| global.HUGGINGFACEHUB_API_TOKEN | string | `""`                                 | Your own Hugging Face API token                                                                                                                              |
-| global.modelUseHostPath         | string | `"/mnt/opea-models"`                 | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory |
-| image.repository                | string | `"opea/guardrails-usvc"`             |                                                                                                                                                              |
-| service.port                    | string | `"9090"`                             |                                                                                                                                                              |
-| SAFETY_GUARD_ENDPOINT           | string | `""`                                 | LLM endpoint                                                                                                                                                 |
-| SAFETY_GUARD_MODEL_ID           | string | `"meta-llama/Meta-Llama-Guard-2-8B"` | Model ID for the underlying LLM service is using                                                                                                             |
+| Key                             | Type   | Default                              | Description                                      |
+| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------ |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `""`                                 | Your own Hugging Face API token                  |
+| image.repository                | string | `"opea/guardrails-usvc"`             |                                                  |
+| service.port                    | string | `"9090"`                             |                                                  |
+| SAFETY_GUARD_ENDPOINT           | string | `""`                                 | LLM endpoint                                     |
+| SAFETY_GUARD_MODEL_ID           | string | `"meta-llama/Meta-Llama-Guard-2-8B"` | Model ID for the underlying LLM service is using |
@@ -18,3 +18,4 @@ data:
   HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
   {{- end }}
   HUGGINGFACE_HUB_CACHE: "/data"
+  HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
@@ -28,6 +28,40 @@ spec:
       serviceAccountName: {{ include "speecht5.serviceAccountName" . }}
       securityContext:
         {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      {{- if not (hasPrefix "/data/" .Values.TTS_MODEL_PATH) }}
+      initContainers:
+        - name: model-downloader
+          envFrom:
+            - configMapRef:
+                name: {{ include "speecht5.fullname" . }}-config
+          securityContext:
+            allowPrivilegeEscalation: false
+            readOnlyRootFilesystem: true
+            {{- if hasKey .Values.securityContext "runAsGroup" }}
+            runAsGroup: {{ .Values.securityContext.runAsGroup }}
+            {{- end }}
+            capabilities:
+              drop:
+              - ALL
+              add:
+              - DAC_OVERRIDE
+              # To be able to make data model directory group writable for
+              # previously downloaded model by old versions of helm chart
+              - FOWNER
+            seccompProfile:
+              type: RuntimeDefault
+          image: huggingface/downloader:0.17.3
+          command: ['sh', '-c']
+          args:
+            - |
+              huggingface-cli download --cache-dir /data --token $(HF_TOKEN)  {{ .Values.TTS_MODEL_PATH | quote }};
+              huggingface-cli download --cache-dir /data --token $(HF_TOKEN)  {{ .Values.VOCODER_MODEL| quote }};
+              chmod -R g+w /data/models--{{ replace "/" "--" .Values.TTS_MODEL_PATH }};
+              chmod -R g+w /data/models--{{ replace "/" "--" .Values.VOCODER_MODEL }}
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+      {{- end }}
       containers:
         - name: {{ .Release.Name }}
           envFrom:
@@ -39,11 +73,7 @@ spec:
                 optional: true
             {{- end }}
           securityContext:
-            {{- if .Values.global.modelUseHostPath }}
-            {}
-            {{- else }}
             {{- toYaml .Values.securityContext | nindent 12 }}
-            {{- end }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
           {{- if .Values.image.pullPolicy }}
           imagePullPolicy: {{ .Values.image.pullPolicy }}

@@ -8,7 +8,7 @@
 replicaCount: 1
 
 TTS_MODEL_PATH: "microsoft/speecht5_tts"
-# VOCODE_MODEL: "microsoft/speecht5_hifigan"
+VOCODER_MODEL: "microsoft/speecht5_hifigan"
 
 image:
   repository: opea/speecht5
@@ -47,6 +47,9 @@ securityContext:
     - ALL
   seccompProfile:
     type: RuntimeDefault
+  # Init container sets the downloaded model dir to be group writable, so that container
+  # can keep its lock file there. This relies on both containers using the same group ID.
+  runAsGroup: 0
 
 service:
   type: ClusterIP
@@ -96,6 +99,7 @@ global:
   http_proxy: ""
   https_proxy: ""
   no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
   # service account name to be shared with all parent/child charts.
   # If set, it will overwrite serviceAccount.name.
   # If set, and serviceAccount.create is false, it will assume this service account is already created by others.

@@ -32,3 +32,4 @@ data:
   {{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }}
   MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }}
   {{- end }}
+  HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
@@ -31,6 +31,38 @@ spec:
       serviceAccountName: {{ include "tei.serviceAccountName" . }}
       securityContext:
         {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      {{- if not (hasPrefix "/data/" .Values.EMBEDDING_MODEL_ID) }}
+      initContainers:
+        - name: model-downloader
+          envFrom:
+            - configMapRef:
+                name: {{ include "tei.fullname" . }}-config
+          securityContext:
+            allowPrivilegeEscalation: false
+            readOnlyRootFilesystem: true
+            {{- if hasKey .Values.securityContext "runAsGroup" }}
+            runAsGroup: {{ .Values.securityContext.runAsGroup }}
+            {{- end }}
+            capabilities:
+              drop:
+              - ALL
+              add:
+              - DAC_OVERRIDE
+              # To be able to make data model directory group writable for
+              # previously downloaded model by old versions of helm chart
+              - FOWNER
+            seccompProfile:
+              type: RuntimeDefault
+          image: huggingface/downloader:0.17.3
+          command: ['sh', '-c']
+          args:
+            - |
+              huggingface-cli download --cache-dir /data --token $(HF_TOKEN) $(MODEL_ID);
+              chmod -R g+w /data/models--{{ replace "/" "--" .Values.EMBEDDING_MODEL_ID }}
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+      {{- end }}
       containers:
         - name: {{ .Chart.Name }}
           envFrom:
@@ -42,11 +74,7 @@ spec:
                 optional: true
             {{- end }}
           securityContext:
-            {{- if .Values.global.modelUseHostPath }}
-            {}
-            {{- else }}
             {{- toYaml .Values.securityContext | nindent 12 }}
-            {{- end }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
           {{- if .Values.image.pullPolicy }}
           imagePullPolicy: {{ .Values.image.pullPolicy }}

@@ -61,6 +61,9 @@ securityContext:
     - ALL
   seccompProfile:
     type: RuntimeDefault
+  # Init container sets the downloaded model dir to be group writable, so that container
+  # can keep its lock file there. This relies on both containers using the same group ID.
+  runAsGroup: 0
 
 service:
   type: ClusterIP
@@ -110,6 +113,7 @@ global:
   http_proxy: ""
   https_proxy: ""
   no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
   # service account name to be shared with all parent/child charts.
   # If set, it will overwrite serviceAccount.name.
   # If set, and serviceAccount.create is false, it will assume this service account is already created by others.

@@ -32,3 +32,4 @@ data:
   {{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }}
   MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }}
   {{- end }}
+  HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
@@ -31,6 +31,38 @@ spec:
       serviceAccountName: {{ include "teirerank.serviceAccountName" . }}
       securityContext:
         {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      {{- if not (hasPrefix "/data/" .Values.RERANK_MODEL_ID) }}
+      initContainers:
+        - name: model-downloader
+          envFrom:
+            - configMapRef:
+                name: {{ include "teirerank.fullname" . }}-config
+          securityContext:
+            allowPrivilegeEscalation: false
+            readOnlyRootFilesystem: true
+            {{- if hasKey .Values.securityContext "runAsGroup" }}
+            runAsGroup: {{ .Values.securityContext.runAsGroup }}
+            {{- end }}
+            capabilities:
+              drop:
+              - ALL
+              add:
+              - DAC_OVERRIDE
+              # To be able to make data model directory group writable for
+              # previously downloaded model by old versions of helm chart
+              - FOWNER
+            seccompProfile:
+              type: RuntimeDefault
+          image: huggingface/downloader:0.17.3
+          command: ['sh', '-c']
+          args:
+            - |
+              huggingface-cli download --cache-dir /data --token $(HF_TOKEN) $(MODEL_ID);
+              chmod -R g+w /data/models--{{ replace "/" "--" .Values.RERANK_MODEL_ID }}
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+      {{- end }}
       containers:
         - name: {{ .Chart.Name }}
           envFrom:

@@ -61,6 +61,9 @@ securityContext:
     - ALL
   seccompProfile:
     type: RuntimeDefault
+  # Init container sets the downloaded model dir to be group writable, so that container
+  # can keep its lock file there. This relies on both containers using the same group ID.
+  runAsGroup: 0
 
 service:
   type: ClusterIP
@@ -110,6 +113,7 @@ global:
   http_proxy: ""
   https_proxy: ""
   no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
   # service account name to be shared with all parent/child charts.
   # If set, it will overwrite serviceAccount.name.
   # If set, and serviceAccount.create is false, it will assume this service account is already created by others.

@@ -9,20 +9,20 @@ To install the chart, run the following:
 ```console
 cd GenAIInfra/helm-charts/common
 export MODELDIR=/mnt/opea-models
-export MODELNAME="bigscience/bloom-560m"
+export MODELNAME="Intel/neural-chat-7b-v3-3"
 export HFTOKEN="insert-your-huggingface-token-here"
 helm install tgi tgi --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN}
 # To deploy on Gaudi enabled kubernetes cluster
 # helm install tgi tgi --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values gaudi-values.yaml
 ```
 
-By default, the tgi service will downloading the "bigscience/bloom-560m" which is about 1.1GB.
+By default, the tgi service will downloading the "Intel/neural-chat-7b-v3-3" which is about 54GB.
 
 If you already cached the model locally, you can pass it to container like this example:
 
 MODELDIR=/mnt/opea-models
 
-MODELNAME="/data/models--bigscience--bloom-560m"
+MODELNAME="/data/models--Intel--neural-chat-7b-v3-3"
 
 ## Verify
 
@@ -41,12 +41,10 @@ curl http://localhost:2080/generate \
 
 ## Values
 
-| Key                             | Type   | Default                                           | Description                                                                                                                                                                                                           |
-| ------------------------------- | ------ | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| LLM_MODEL_ID                    | string | `"bigscience/bloom-560m"`                         | Models id from https://huggingface.co/, or predownloaded model directory                                                                                                                                              |
-| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here`              | Hugging Face API token                                                                                                                                                                                                |
-| global.modelUseHostPath         | string | `"/mnt/opea-models"`                              | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
-| image.repository                | string | `"ghcr.io/huggingface/text-generation-inference"` |                                                                                                                                                                                                                       |
-| image.tag                       | string | `"1.4"`                                           |                                                                                                                                                                                                                       |
-| autoscaling.enabled             | bool   | `false`                                           | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling!                                                                                 |
-| global.monitoring               | bool   | `false`                                           | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling!                                                                                           |
+| Key                             | Type   | Default                              | Description                                                                                                                                                                                                           |
+| ------------------------------- | ------ | ------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| LLM_MODEL_ID                    | string | `"Intel/neural-chat-7b-v3-3"`        | Models id from https://huggingface.co/, or predownloaded model directory                                                                                                                                              |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token                                                                                                                                                                                                |
+| global.modelUseHostPath         | string | `""`                                 | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
+| autoscaling.enabled             | bool   | `false`                              | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling!                                                                                 |
+| global.monitoring               | bool   | `false`                              | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling!                                                                                           |
@@ -31,6 +31,38 @@ spec:
       serviceAccountName: {{ include "tgi.serviceAccountName" . }}
       securityContext:
         {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      {{- if not (hasPrefix "/data/" .Values.LLM_MODEL_ID) }}
+      initContainers:
+        - name: model-downloader
+          envFrom:
+            - configMapRef:
+                name: {{ include "tgi.fullname" . }}-config
+          securityContext:
+            allowPrivilegeEscalation: false
+            readOnlyRootFilesystem: true
+            {{- if hasKey .Values.securityContext "runAsGroup" }}
+            runAsGroup: {{ .Values.securityContext.runAsGroup }}
+            {{- end }}
+            capabilities:
+              drop:
+              - ALL
+              add:
+              - DAC_OVERRIDE
+              # To be able to make data model directory group writable for
+              # previously downloaded model by old versions of helm chart
+              - FOWNER
+            seccompProfile:
+              type: RuntimeDefault
+          image: huggingface/downloader:0.17.3
+          command: ['sh', '-c']
+          args:
+            - |
+              huggingface-cli download --cache-dir /data --token $(HF_TOKEN) $(MODEL_ID);
+              chmod -R g+w /data/models--{{ replace "/" "--" .Values.LLM_MODEL_ID }}
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+      {{- end }}
       containers:
         - name: {{ .Chart.Name }}
           envFrom:
@@ -42,11 +74,7 @@ spec:
                 optional: true
             {{- end }}
           securityContext:
-            {{- if .Values.global.modelUseHostPath }}
-            {}
-            {{- else }}
             {{- toYaml .Values.securityContext | nindent 12 }}
-            {{- end }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
           {{- if .Values.image.pullPolicy }}
           imagePullPolicy: {{ .Values.image.pullPolicy }}

@@ -62,6 +62,9 @@ securityContext:
     - ALL
   seccompProfile:
     type: RuntimeDefault
+  # Init container sets the downloaded model dir to be group writable, so that container
+  # can keep its lock file there. This relies on both containers using the same group ID.
+  runAsGroup: 0
 
 service:
   type: ClusterIP