From 97ade6e594750872874a1e562153be784e475764 Mon Sep 17 00:00:00 2001
From: Lianhao Lu <lianhao.lu@intel.com>
Date: Mon, 30 Dec 2024 05:11:33 +0000
Subject: [PATCH] docsum: reduce microservices in docsum

Update latest changes to docsum

Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
---
 .../common/llm-uservice/ci-docsum-values.yaml |   6 +
 .../llm-uservice/templates/configmap.yaml     |   7 ++
 helm-charts/common/llm-uservice/values.yaml   |   2 +
 .../llm-uservice/variant_docsum-values.yaml   |   3 +
 helm-charts/docsum/gaudi-values.yaml          |   6 +-
 helm-charts/docsum/templates/deployment.yaml  |  14 +--
 helm-charts/docsum/templates/m2t.yaml         | 115 ------------------
 .../docsum/templates/tests/test-pod.yaml      |   6 +-
 helm-charts/docsum/templates/v2a.yaml         | 110 -----------------
 helm-charts/docsum/values.yaml                |  24 +---
 10 files changed, 38 insertions(+), 255 deletions(-)
 delete mode 100644 helm-charts/docsum/templates/m2t.yaml
 delete mode 100644 helm-charts/docsum/templates/v2a.yaml

diff --git a/helm-charts/common/llm-uservice/ci-docsum-values.yaml b/helm-charts/common/llm-uservice/ci-docsum-values.yaml
index b9f269c5a..da00321e2 100644
--- a/helm-charts/common/llm-uservice/ci-docsum-values.yaml
+++ b/helm-charts/common/llm-uservice/ci-docsum-values.yaml
@@ -4,5 +4,11 @@
 image:
   repository: opea/llm-docsum-tgi
   tag: "latest"
+
+MAX_INPUT_TOKENS: 2048
+MAX_TOTAL_TOKENS: 4096
+
 tgi:
   enabled: true
+  MAX_INPUT_LENGTH: 2048
+  MAX_TOTAL_TOKENS: 4096
diff --git a/helm-charts/common/llm-uservice/templates/configmap.yaml b/helm-charts/common/llm-uservice/templates/configmap.yaml
index bd49777dc..27cb37fac 100644
--- a/helm-charts/common/llm-uservice/templates/configmap.yaml
+++ b/helm-charts/common/llm-uservice/templates/configmap.yaml
@@ -19,7 +19,14 @@ data:
   vLLM_ENDPOINT: "http://{{ .Release.Name }}-vllm"
   {{- end }}
   {{- if .Values.LLM_MODEL_ID }}
+  # NOTE:
+  # delete LLM_MODEL once https://github.com/opea-project/GenAIComps/pull/1089 is merged
   LLM_MODEL: {{ .Values.LLM_MODEL_ID | quote}}
+  LLM_MODEL_ID: {{ .Values.LLM_MODEL_ID | quote}}
+  {{- end }}
+  {{- if contains "opea/llm-docsum" .Values.image.repository }}
+  MAX_INPUT_TOKENS: {{ .Values.MAX_INPUT_TOKENS | quote }}
+  MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }}
   {{- end }}
   HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
   HF_HOME: "/tmp/.cache/huggingface"
diff --git a/helm-charts/common/llm-uservice/values.yaml b/helm-charts/common/llm-uservice/values.yaml
index 84ae7d32a..fc9f8788f 100644
--- a/helm-charts/common/llm-uservice/values.yaml
+++ b/helm-charts/common/llm-uservice/values.yaml
@@ -16,6 +16,8 @@ TGI_LLM_ENDPOINT: ""
 # For vllm, set the LLM_MODEL_ID the same as vllm sub chart
 vLLM_ENDPOINT: ""
 LLM_MODEL_ID: ""
+MAX_INPUT_TOKENS: ""
+MAX_TOTAL_TOKENS: ""
 
 # Set it as a non-null string, such as true, if you want to enable logging facility,
 # otherwise, keep it as "" to disable it.
diff --git a/helm-charts/common/llm-uservice/variant_docsum-values.yaml b/helm-charts/common/llm-uservice/variant_docsum-values.yaml
index 9e1f33bde..f0819540a 100644
--- a/helm-charts/common/llm-uservice/variant_docsum-values.yaml
+++ b/helm-charts/common/llm-uservice/variant_docsum-values.yaml
@@ -4,3 +4,6 @@
 image:
   repository: opea/llm-docsum-tgi
   tag: "latest"
+
+MAX_INPUT_TOKENS: 2048
+MAX_TOTAL_TOKENS: 4096
diff --git a/helm-charts/docsum/gaudi-values.yaml b/helm-charts/docsum/gaudi-values.yaml
index e5367383a..bbd8f36ad 100644
--- a/helm-charts/docsum/gaudi-values.yaml
+++ b/helm-charts/docsum/gaudi-values.yaml
@@ -9,9 +9,11 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  MAX_INPUT_LENGTH: "1024"
-  MAX_TOTAL_TOKENS: "2048"
   CUDA_GRAPHS: ""
+  ENABLE_HPU_GRAPH: true
+  LIMIT_HPU_GRAPH: true
+  USE_FLASH_ATTENTION: true
+  FLASH_ATTENTION_RECOMPUTE: true
   livenessProbe:
     initialDelaySeconds: 5
     periodSeconds: 5
diff --git a/helm-charts/docsum/templates/deployment.yaml b/helm-charts/docsum/templates/deployment.yaml
index cc2a281a4..486820715 100644
--- a/helm-charts/docsum/templates/deployment.yaml
+++ b/helm-charts/docsum/templates/deployment.yaml
@@ -35,13 +35,13 @@ spec:
         - name: {{ .Release.Name }}
           env:
             - name: LLM_SERVICE_HOST_IP
-              {{- if .Values.LLM_SERVICE_HOST_IP }}
-              value: {{ .Values.LLM_SERVICE_HOST_IP | quote}}
-              {{- else }}
-              value: {{ .Release.Name }}-llm-uservice
-              {{- end }}
-            - name: DATA_SERVICE_HOST_IP
-              value: {{ .Release.Name }}-m2t
+              value: {{ include "llm-uservice.fullname" (index .Subcharts "llm-uservice") }}
+            - name: LLM_SERVICE_PORT
+              value: {{ index .Values "llm-uservice" "service" "port" | quote }}
+            - name: ASR_SERVICE_HOST_IP
+              value: {{ include "whisper.fullname" (index .Subcharts "whisper") }}
+            - name: ASR_SERVICE_PORT
+              value: {{ index .Values "whisper" "service" "port" | quote }}
           securityContext:
             {{- toYaml .Values.securityContext | nindent 12 }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
diff --git a/helm-charts/docsum/templates/m2t.yaml b/helm-charts/docsum/templates/m2t.yaml
deleted file mode 100644
index 583156e22..000000000
--- a/helm-charts/docsum/templates/m2t.yaml
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ .Release.Name }}-m2t
-  labels:
-    {{- include "docsum.labels" . | nindent 4 }}
-    app: {{ .Release.Name }}-m2t
-spec:
-  replicas: {{ .Values.replicaCount }}
-  selector:
-    matchLabels:
-      {{- include "docsum.selectorLabels" . | nindent 6 }}
-      app: {{ .Release.Name }}-m2t
-  template:
-    metadata:
-      {{- with .Values.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        {{- include "docsum.selectorLabels" . | nindent 8 }}
-        app: {{ .Release.Name }}-m2t
-    spec:
-      {{- with .Values.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      serviceAccountName: {{ include "docsum.serviceAccountName" . }}
-      securityContext:
-        {{- toYaml .Values.podSecurityContext | nindent 8 }}
-      containers:
-        - name: {{ .Release.Name }}-m2t
-          env:
-            - name: V2A_ENDPOINT
-              value: {{ .Release.Name }}-v2a:{{ .Values.v2a.service.port }}
-            - name: A2T_ENDPOINT
-              value: {{ .Release.Name }}-whisper:{{ .Values.whisper.service.port }}
-          securityContext:
-            {{- toYaml .Values.securityContext | nindent 12 }}
-          image: "{{ .Values.m2t.image.repository }}:{{ .Values.m2t.image.tag | default .Chart.AppVersion }}"
-          {{- if .Values.image.pullPolicy }}
-          imagePullPolicy: {{ .Values.image.pullPolicy }}
-          {{- end }}
-          volumeMounts:
-            - mountPath: /tmp
-              name: tmp
-          ports:
-            - name: m2t
-              containerPort: {{ .Values.m2t.port }}
-              protocol: TCP
-          resources:
-            {{- toYaml .Values.resources | nindent 12 }}
-      volumes:
-        - name: tmp
-          emptyDir: {}
-      {{- with .Values.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.affinity }}
-      affinity:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- if .Values.evenly_distributed }}
-      topologySpreadConstraints:
-        - maxSkew: 1
-          topologyKey: kubernetes.io/hostname
-          whenUnsatisfiable: ScheduleAnyway
-          labelSelector:
-            matchLabels:
-              {{- include "docsum.selectorLabels" . | nindent 14 }}
-              app: {{ .Release.Name }}-m2t
-      {{- end }}
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ .Release.Name }}-m2t
-  labels:
-    {{- include "docsum.labels" . | nindent 4 }}
-    app: {{ .Release.Name }}-m2t
-spec:
-  type: {{ .Values.m2t.service.type }}
-  ports:
-    - port: {{ .Values.m2t.service.port }}
-      targetPort: {{ .Values.m2t.port }}
-      protocol: TCP
-      name: m2t
-  selector:
-    {{- include "docsum.selectorLabels" . | nindent 4 }}
-    app: {{ .Release.Name }}-m2t
----
-{{- if .Values.global.monitoring }}
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
-  name: {{ include "docsum.fullname" . }}-m2t
-  labels:
-    release: {{ .Values.global.prometheusRelease }}
-spec:
-  selector:
-    matchLabels:
-      {{- include "docsum.selectorLabels" . | nindent 6 }}
-      app: {{ .Release.Name }}-m2t
-  endpoints:
-  - port: m2t
-    interval: 5s
-{{- end }}
diff --git a/helm-charts/docsum/templates/tests/test-pod.yaml b/helm-charts/docsum/templates/tests/test-pod.yaml
index 3c63d346b..6a64874a7 100644
--- a/helm-charts/docsum/templates/tests/test-pod.yaml
+++ b/helm-charts/docsum/templates/tests/test-pod.yaml
@@ -21,9 +21,11 @@ spec:
           for ((i=1; i<=max_retry; i++)); do
             curl http://{{ include "docsum.fullname" . }}:{{ .Values.service.port }}/v1/docsum -sS --fail-with-body \
             -H 'Content-Type: multipart/form-data' \
-            -H "type=text" \
+            -F "type=text" \
             -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
-            -F "max_tokens=32" && break;
+            -F "max_tokens=32" \
+            -F "language=en" \
+            -F "stream=true" && break;
             curlcode=$?
             if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
           done;
diff --git a/helm-charts/docsum/templates/v2a.yaml b/helm-charts/docsum/templates/v2a.yaml
deleted file mode 100644
index 9057f415d..000000000
--- a/helm-charts/docsum/templates/v2a.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ .Release.Name }}-v2a
-  labels:
-    {{- include "docsum.labels" . | nindent 4 }}
-    app: {{ .Release.Name }}-v2a
-spec:
-  replicas: {{ .Values.replicaCount }}
-  selector:
-    matchLabels:
-      {{- include "docsum.selectorLabels" . | nindent 6 }}
-      app: {{ .Release.Name }}-v2a
-  template:
-    metadata:
-      {{- with .Values.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        {{- include "docsum.selectorLabels" . | nindent 8 }}
-        app: {{ .Release.Name }}-v2a
-    spec:
-      {{- with .Values.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      serviceAccountName: {{ include "docsum.serviceAccountName" . }}
-      securityContext:
-        {{- toYaml .Values.podSecurityContext | nindent 8 }}
-      containers:
-        - name: {{ .Release.Name }}-v2a
-          securityContext:
-            {{- toYaml .Values.securityContext | nindent 12 }}
-          image: "{{ .Values.v2a.image.repository }}:{{ .Values.v2a.image.tag | default .Chart.AppVersion }}"
-          {{- if .Values.image.pullPolicy }}
-          imagePullPolicy: {{ .Values.image.pullPolicy }}
-          {{- end }}
-          volumeMounts:
-            - mountPath: /tmp
-              name: tmp
-          ports:
-            - name: v2a
-              containerPort: {{ .Values.v2a.port }}
-              protocol: TCP
-          resources:
-            {{- toYaml .Values.resources | nindent 12 }}
-      volumes:
-        - name: tmp
-          emptyDir: {}
-      {{- with .Values.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.affinity }}
-      affinity:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- if .Values.evenly_distributed }}
-      topologySpreadConstraints:
-        - maxSkew: 1
-          topologyKey: kubernetes.io/hostname
-          whenUnsatisfiable: ScheduleAnyway
-          labelSelector:
-            matchLabels:
-              {{- include "docsum.selectorLabels" . | nindent 14 }}
-              app: {{ .Release.Name }}-v2a
-      {{- end }}
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ .Release.Name }}-v2a
-  labels:
-    {{- include "docsum.labels" . | nindent 4 }}
-    app: {{ .Release.Name }}-v2a
-spec:
-  type: {{ .Values.v2a.service.type }}
-  ports:
-    - port: {{ .Values.v2a.service.port }}
-      targetPort: {{ .Values.v2a.port }}
-      protocol: TCP
-      name: v2a
-  selector:
-    {{- include "docsum.selectorLabels" . | nindent 4 }}
-    app: {{ .Release.Name }}-v2a
----
-{{- if .Values.global.monitoring }}
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
-  name: {{ include "docsum.fullname" . }}-v2a
-  labels:
-    release: {{ .Values.global.prometheusRelease }}
-spec:
-  selector:
-    matchLabels:
-      {{- include "docsum.selectorLabels" . | nindent 6 }}
-      app: {{ .Release.Name }}-v2a
-  endpoints:
-  - port: v2a
-    interval: 5s
-{{- end }}
diff --git a/helm-charts/docsum/values.yaml b/helm-charts/docsum/values.yaml
index a473a15f3..406746165 100644
--- a/helm-charts/docsum/values.yaml
+++ b/helm-charts/docsum/values.yaml
@@ -6,7 +6,6 @@
 # Declare variables to be passed into your templates.
 
 replicaCount: 1
-LLM_SERVICE_HOST_IP: ""
 
 image:
   repository: opea/docsum
@@ -14,24 +13,6 @@ image:
   # pullPolicy: ""
   # Overrides the image tag whose default is the chart appVersion.
   tag: "latest"
-v2a:
-  image:
-    repository: opea/dataprep-video2audio
-    # Overrides the image tag whose default is the chart appVersion.
-    tag: "latest"
-  port: 7078
-  service:
-    type: ClusterIP
-    port: 7078
-m2t:
-  image:
-    repository: opea/dataprep-multimedia2text
-    # Overrides the image tag whose default is the chart appVersion.
-    tag: "latest"
-  port: 7079
-  service:
-    type: ClusterIP
-    port: 7079
 
 imagePullSecrets: []
 nameOverride: ""
@@ -83,10 +64,15 @@ affinity: {}
 llm-uservice:
   image:
     repository: opea/llm-docsum-tgi
+  MAX_INPUT_TOKENS: "1024"
+  MAX_TOTAL_TOKENS: "2048"
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
 
 # To override values in subchart tgi
 tgi:
   LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
 
 docsum-ui:
   image: