docsum: reduce microservices in docsum

Update latest changes to docsum Signed-off-by: Lianhao Lu <[email protected]>
opea-project · Dec 30, 2024 · 97ade6e · 97ade6e
1 parent 590991b
commit 97ade6e
Show file tree

Hide file tree

Showing 10 changed files with 38 additions and 255 deletions.
diff --git a/helm-charts/common/llm-uservice/ci-docsum-values.yaml b/helm-charts/common/llm-uservice/ci-docsum-values.yaml
@@ -4,5 +4,11 @@
 image:
   repository: opea/llm-docsum-tgi
   tag: "latest"
+
+MAX_INPUT_TOKENS: 2048
+MAX_TOTAL_TOKENS: 4096
+
 tgi:
   enabled: true
+  MAX_INPUT_LENGTH: 2048
+  MAX_TOTAL_TOKENS: 4096
diff --git a/helm-charts/common/llm-uservice/templates/configmap.yaml b/helm-charts/common/llm-uservice/templates/configmap.yaml
@@ -19,7 +19,14 @@ data:
   vLLM_ENDPOINT: "http://{{ .Release.Name }}-vllm"
   {{- end }}
   {{- if .Values.LLM_MODEL_ID }}
+  # NOTE:
+  # delete LLM_MODEL once https://github.com/opea-project/GenAIComps/pull/1089 is merged
   LLM_MODEL: {{ .Values.LLM_MODEL_ID | quote}}
+  LLM_MODEL_ID: {{ .Values.LLM_MODEL_ID | quote}}
+  {{- end }}
+  {{- if contains "opea/llm-docsum" .Values.image.repository }}
+  MAX_INPUT_TOKENS: {{ .Values.MAX_INPUT_TOKENS | quote }}
+  MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }}
   {{- end }}
   HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
   HF_HOME: "/tmp/.cache/huggingface"

diff --git a/helm-charts/common/llm-uservice/values.yaml b/helm-charts/common/llm-uservice/values.yaml
@@ -16,6 +16,8 @@ TGI_LLM_ENDPOINT: ""
 # For vllm, set the LLM_MODEL_ID the same as vllm sub chart
 vLLM_ENDPOINT: ""
 LLM_MODEL_ID: ""
+MAX_INPUT_TOKENS: ""
+MAX_TOTAL_TOKENS: ""
 
 # Set it as a non-null string, such as true, if you want to enable logging facility,
 # otherwise, keep it as "" to disable it.

diff --git a/helm-charts/common/llm-uservice/variant_docsum-values.yaml b/helm-charts/common/llm-uservice/variant_docsum-values.yaml
@@ -4,3 +4,6 @@
 image:
   repository: opea/llm-docsum-tgi
   tag: "latest"
+
+MAX_INPUT_TOKENS: 2048
+MAX_TOTAL_TOKENS: 4096
diff --git a/helm-charts/docsum/gaudi-values.yaml b/helm-charts/docsum/gaudi-values.yaml
@@ -9,9 +9,11 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  MAX_INPUT_LENGTH: "1024"
-  MAX_TOTAL_TOKENS: "2048"
   CUDA_GRAPHS: ""
+  ENABLE_HPU_GRAPH: true
+  LIMIT_HPU_GRAPH: true
+  USE_FLASH_ATTENTION: true
+  FLASH_ATTENTION_RECOMPUTE: true
   livenessProbe:
     initialDelaySeconds: 5
     periodSeconds: 5

diff --git a/helm-charts/docsum/templates/deployment.yaml b/helm-charts/docsum/templates/deployment.yaml
@@ -35,13 +35,13 @@ spec:
         - name: {{ .Release.Name }}
           env:
             - name: LLM_SERVICE_HOST_IP
-              {{- if .Values.LLM_SERVICE_HOST_IP }}
-              value: {{ .Values.LLM_SERVICE_HOST_IP | quote}}
-              {{- else }}
-              value: {{ .Release.Name }}-llm-uservice
-              {{- end }}
-            - name: DATA_SERVICE_HOST_IP
-              value: {{ .Release.Name }}-m2t
+              value: {{ include "llm-uservice.fullname" (index .Subcharts "llm-uservice") }}
+            - name: LLM_SERVICE_PORT
+              value: {{ index .Values "llm-uservice" "service" "port" | quote }}
+            - name: ASR_SERVICE_HOST_IP
+              value: {{ include "whisper.fullname" (index .Subcharts "whisper") }}
+            - name: ASR_SERVICE_PORT
+              value: {{ index .Values "whisper" "service" "port" | quote }}
           securityContext:
             {{- toYaml .Values.securityContext | nindent 12 }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"

diff --git a/helm-charts/docsum/templates/m2t.yaml b/helm-charts/docsum/templates/m2t.yaml
diff --git a/helm-charts/docsum/templates/tests/test-pod.yaml b/helm-charts/docsum/templates/tests/test-pod.yaml
@@ -21,9 +21,11 @@ spec:
           for ((i=1; i<=max_retry; i++)); do
             curl http://{{ include "docsum.fullname" . }}:{{ .Values.service.port }}/v1/docsum -sS --fail-with-body \
             -H 'Content-Type: multipart/form-data' \
-            -H "type=text" \
+            -F "type=text" \
             -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
-            -F "max_tokens=32" && break;
+            -F "max_tokens=32" \
+            -F "language=en" \
+            -F "stream=true" && break;
             curlcode=$?
             if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
           done;

diff --git a/helm-charts/docsum/templates/v2a.yaml b/helm-charts/docsum/templates/v2a.yaml
diff --git a/helm-charts/docsum/values.yaml b/helm-charts/docsum/values.yaml
@@ -6,32 +6,13 @@
 # Declare variables to be passed into your templates.
 
 replicaCount: 1
-LLM_SERVICE_HOST_IP: ""
 
 image:
   repository: opea/docsum
   # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
   # pullPolicy: ""
   # Overrides the image tag whose default is the chart appVersion.
   tag: "latest"
-v2a:
-  image:
-    repository: opea/dataprep-video2audio
-    # Overrides the image tag whose default is the chart appVersion.
-    tag: "latest"
-  port: 7078
-  service:
-    type: ClusterIP
-    port: 7078
-m2t:
-  image:
-    repository: opea/dataprep-multimedia2text
-    # Overrides the image tag whose default is the chart appVersion.
-    tag: "latest"
-  port: 7079
-  service:
-    type: ClusterIP
-    port: 7079
 
 imagePullSecrets: []
 nameOverride: ""
@@ -83,10 +64,15 @@ affinity: {}
 llm-uservice:
   image:
     repository: opea/llm-docsum-tgi
+  MAX_INPUT_TOKENS: "1024"
+  MAX_TOTAL_TOKENS: "2048"
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
 
 # To override values in subchart tgi
 tgi:
   LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
 
 docsum-ui:
   image: