From 97ade6e594750872874a1e562153be784e475764 Mon Sep 17 00:00:00 2001 From: Lianhao Lu Date: Mon, 30 Dec 2024 05:11:33 +0000 Subject: [PATCH] docsum: reduce microservices in docsum Update latest changes to docsum Signed-off-by: Lianhao Lu --- .../common/llm-uservice/ci-docsum-values.yaml | 6 + .../llm-uservice/templates/configmap.yaml | 7 ++ helm-charts/common/llm-uservice/values.yaml | 2 + .../llm-uservice/variant_docsum-values.yaml | 3 + helm-charts/docsum/gaudi-values.yaml | 6 +- helm-charts/docsum/templates/deployment.yaml | 14 +-- helm-charts/docsum/templates/m2t.yaml | 115 ------------------ .../docsum/templates/tests/test-pod.yaml | 6 +- helm-charts/docsum/templates/v2a.yaml | 110 ----------------- helm-charts/docsum/values.yaml | 24 +--- 10 files changed, 38 insertions(+), 255 deletions(-) delete mode 100644 helm-charts/docsum/templates/m2t.yaml delete mode 100644 helm-charts/docsum/templates/v2a.yaml diff --git a/helm-charts/common/llm-uservice/ci-docsum-values.yaml b/helm-charts/common/llm-uservice/ci-docsum-values.yaml index b9f269c5a..da00321e2 100644 --- a/helm-charts/common/llm-uservice/ci-docsum-values.yaml +++ b/helm-charts/common/llm-uservice/ci-docsum-values.yaml @@ -4,5 +4,11 @@ image: repository: opea/llm-docsum-tgi tag: "latest" + +MAX_INPUT_TOKENS: 2048 +MAX_TOTAL_TOKENS: 4096 + tgi: enabled: true + MAX_INPUT_LENGTH: 2048 + MAX_TOTAL_TOKENS: 4096 diff --git a/helm-charts/common/llm-uservice/templates/configmap.yaml b/helm-charts/common/llm-uservice/templates/configmap.yaml index bd49777dc..27cb37fac 100644 --- a/helm-charts/common/llm-uservice/templates/configmap.yaml +++ b/helm-charts/common/llm-uservice/templates/configmap.yaml @@ -19,7 +19,14 @@ data: vLLM_ENDPOINT: "http://{{ .Release.Name }}-vllm" {{- end }} {{- if .Values.LLM_MODEL_ID }} + # NOTE: + # delete LLM_MODEL once https://github.com/opea-project/GenAIComps/pull/1089 is merged LLM_MODEL: {{ .Values.LLM_MODEL_ID | quote}} + LLM_MODEL_ID: {{ .Values.LLM_MODEL_ID | quote}} + {{- end }} + {{- if contains "opea/llm-docsum" .Values.image.repository }} + MAX_INPUT_TOKENS: {{ .Values.MAX_INPUT_TOKENS | quote }} + MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }} {{- end }} HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} HF_HOME: "/tmp/.cache/huggingface" diff --git a/helm-charts/common/llm-uservice/values.yaml b/helm-charts/common/llm-uservice/values.yaml index 84ae7d32a..fc9f8788f 100644 --- a/helm-charts/common/llm-uservice/values.yaml +++ b/helm-charts/common/llm-uservice/values.yaml @@ -16,6 +16,8 @@ TGI_LLM_ENDPOINT: "" # For vllm, set the LLM_MODEL_ID the same as vllm sub chart vLLM_ENDPOINT: "" LLM_MODEL_ID: "" +MAX_INPUT_TOKENS: "" +MAX_TOTAL_TOKENS: "" # Set it as a non-null string, such as true, if you want to enable logging facility, # otherwise, keep it as "" to disable it. diff --git a/helm-charts/common/llm-uservice/variant_docsum-values.yaml b/helm-charts/common/llm-uservice/variant_docsum-values.yaml index 9e1f33bde..f0819540a 100644 --- a/helm-charts/common/llm-uservice/variant_docsum-values.yaml +++ b/helm-charts/common/llm-uservice/variant_docsum-values.yaml @@ -4,3 +4,6 @@ image: repository: opea/llm-docsum-tgi tag: "latest" + +MAX_INPUT_TOKENS: 2048 +MAX_TOTAL_TOKENS: 4096 diff --git a/helm-charts/docsum/gaudi-values.yaml b/helm-charts/docsum/gaudi-values.yaml index e5367383a..bbd8f36ad 100644 --- a/helm-charts/docsum/gaudi-values.yaml +++ b/helm-charts/docsum/gaudi-values.yaml @@ -9,9 +9,11 @@ tgi: resources: limits: habana.ai/gaudi: 1 - MAX_INPUT_LENGTH: "1024" - MAX_TOTAL_TOKENS: "2048" CUDA_GRAPHS: "" + ENABLE_HPU_GRAPH: true + LIMIT_HPU_GRAPH: true + USE_FLASH_ATTENTION: true + FLASH_ATTENTION_RECOMPUTE: true livenessProbe: initialDelaySeconds: 5 periodSeconds: 5 diff --git a/helm-charts/docsum/templates/deployment.yaml b/helm-charts/docsum/templates/deployment.yaml index cc2a281a4..486820715 100644 --- a/helm-charts/docsum/templates/deployment.yaml +++ b/helm-charts/docsum/templates/deployment.yaml @@ -35,13 +35,13 @@ spec: - name: {{ .Release.Name }} env: - name: LLM_SERVICE_HOST_IP - {{- if .Values.LLM_SERVICE_HOST_IP }} - value: {{ .Values.LLM_SERVICE_HOST_IP | quote}} - {{- else }} - value: {{ .Release.Name }}-llm-uservice - {{- end }} - - name: DATA_SERVICE_HOST_IP - value: {{ .Release.Name }}-m2t + value: {{ include "llm-uservice.fullname" (index .Subcharts "llm-uservice") }} + - name: LLM_SERVICE_PORT + value: {{ index .Values "llm-uservice" "service" "port" | quote }} + - name: ASR_SERVICE_HOST_IP + value: {{ include "whisper.fullname" (index .Subcharts "whisper") }} + - name: ASR_SERVICE_PORT + value: {{ index .Values "whisper" "service" "port" | quote }} securityContext: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" diff --git a/helm-charts/docsum/templates/m2t.yaml b/helm-charts/docsum/templates/m2t.yaml deleted file mode 100644 index 583156e22..000000000 --- a/helm-charts/docsum/templates/m2t.yaml +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Release.Name }}-m2t - labels: - {{- include "docsum.labels" . | nindent 4 }} - app: {{ .Release.Name }}-m2t -spec: - replicas: {{ .Values.replicaCount }} - selector: - matchLabels: - {{- include "docsum.selectorLabels" . | nindent 6 }} - app: {{ .Release.Name }}-m2t - template: - metadata: - {{- with .Values.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "docsum.selectorLabels" . | nindent 8 }} - app: {{ .Release.Name }}-m2t - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "docsum.serviceAccountName" . }} - securityContext: - {{- toYaml .Values.podSecurityContext | nindent 8 }} - containers: - - name: {{ .Release.Name }}-m2t - env: - - name: V2A_ENDPOINT - value: {{ .Release.Name }}-v2a:{{ .Values.v2a.service.port }} - - name: A2T_ENDPOINT - value: {{ .Release.Name }}-whisper:{{ .Values.whisper.service.port }} - securityContext: - {{- toYaml .Values.securityContext | nindent 12 }} - image: "{{ .Values.m2t.image.repository }}:{{ .Values.m2t.image.tag | default .Chart.AppVersion }}" - {{- if .Values.image.pullPolicy }} - imagePullPolicy: {{ .Values.image.pullPolicy }} - {{- end }} - volumeMounts: - - mountPath: /tmp - name: tmp - ports: - - name: m2t - containerPort: {{ .Values.m2t.port }} - protocol: TCP - resources: - {{- toYaml .Values.resources | nindent 12 }} - volumes: - - name: tmp - emptyDir: {} - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- if .Values.evenly_distributed }} - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - {{- include "docsum.selectorLabels" . | nindent 14 }} - app: {{ .Release.Name }}-m2t - {{- end }} ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Release.Name }}-m2t - labels: - {{- include "docsum.labels" . | nindent 4 }} - app: {{ .Release.Name }}-m2t -spec: - type: {{ .Values.m2t.service.type }} - ports: - - port: {{ .Values.m2t.service.port }} - targetPort: {{ .Values.m2t.port }} - protocol: TCP - name: m2t - selector: - {{- include "docsum.selectorLabels" . | nindent 4 }} - app: {{ .Release.Name }}-m2t ---- -{{- if .Values.global.monitoring }} -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: {{ include "docsum.fullname" . }}-m2t - labels: - release: {{ .Values.global.prometheusRelease }} -spec: - selector: - matchLabels: - {{- include "docsum.selectorLabels" . | nindent 6 }} - app: {{ .Release.Name }}-m2t - endpoints: - - port: m2t - interval: 5s -{{- end }} diff --git a/helm-charts/docsum/templates/tests/test-pod.yaml b/helm-charts/docsum/templates/tests/test-pod.yaml index 3c63d346b..6a64874a7 100644 --- a/helm-charts/docsum/templates/tests/test-pod.yaml +++ b/helm-charts/docsum/templates/tests/test-pod.yaml @@ -21,9 +21,11 @@ spec: for ((i=1; i<=max_retry; i++)); do curl http://{{ include "docsum.fullname" . }}:{{ .Values.service.port }}/v1/docsum -sS --fail-with-body \ -H 'Content-Type: multipart/form-data' \ - -H "type=text" \ + -F "type=text" \ -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \ - -F "max_tokens=32" && break; + -F "max_tokens=32" \ + -F "language=en" \ + -F "stream=true" && break; curlcode=$? if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; done; diff --git a/helm-charts/docsum/templates/v2a.yaml b/helm-charts/docsum/templates/v2a.yaml deleted file mode 100644 index 9057f415d..000000000 --- a/helm-charts/docsum/templates/v2a.yaml +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Release.Name }}-v2a - labels: - {{- include "docsum.labels" . | nindent 4 }} - app: {{ .Release.Name }}-v2a -spec: - replicas: {{ .Values.replicaCount }} - selector: - matchLabels: - {{- include "docsum.selectorLabels" . | nindent 6 }} - app: {{ .Release.Name }}-v2a - template: - metadata: - {{- with .Values.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "docsum.selectorLabels" . | nindent 8 }} - app: {{ .Release.Name }}-v2a - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "docsum.serviceAccountName" . }} - securityContext: - {{- toYaml .Values.podSecurityContext | nindent 8 }} - containers: - - name: {{ .Release.Name }}-v2a - securityContext: - {{- toYaml .Values.securityContext | nindent 12 }} - image: "{{ .Values.v2a.image.repository }}:{{ .Values.v2a.image.tag | default .Chart.AppVersion }}" - {{- if .Values.image.pullPolicy }} - imagePullPolicy: {{ .Values.image.pullPolicy }} - {{- end }} - volumeMounts: - - mountPath: /tmp - name: tmp - ports: - - name: v2a - containerPort: {{ .Values.v2a.port }} - protocol: TCP - resources: - {{- toYaml .Values.resources | nindent 12 }} - volumes: - - name: tmp - emptyDir: {} - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- if .Values.evenly_distributed }} - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - {{- include "docsum.selectorLabels" . | nindent 14 }} - app: {{ .Release.Name }}-v2a - {{- end }} ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Release.Name }}-v2a - labels: - {{- include "docsum.labels" . | nindent 4 }} - app: {{ .Release.Name }}-v2a -spec: - type: {{ .Values.v2a.service.type }} - ports: - - port: {{ .Values.v2a.service.port }} - targetPort: {{ .Values.v2a.port }} - protocol: TCP - name: v2a - selector: - {{- include "docsum.selectorLabels" . | nindent 4 }} - app: {{ .Release.Name }}-v2a ---- -{{- if .Values.global.monitoring }} -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: {{ include "docsum.fullname" . }}-v2a - labels: - release: {{ .Values.global.prometheusRelease }} -spec: - selector: - matchLabels: - {{- include "docsum.selectorLabels" . | nindent 6 }} - app: {{ .Release.Name }}-v2a - endpoints: - - port: v2a - interval: 5s -{{- end }} diff --git a/helm-charts/docsum/values.yaml b/helm-charts/docsum/values.yaml index a473a15f3..406746165 100644 --- a/helm-charts/docsum/values.yaml +++ b/helm-charts/docsum/values.yaml @@ -6,7 +6,6 @@ # Declare variables to be passed into your templates. replicaCount: 1 -LLM_SERVICE_HOST_IP: "" image: repository: opea/docsum @@ -14,24 +13,6 @@ image: # pullPolicy: "" # Overrides the image tag whose default is the chart appVersion. tag: "latest" -v2a: - image: - repository: opea/dataprep-video2audio - # Overrides the image tag whose default is the chart appVersion. - tag: "latest" - port: 7078 - service: - type: ClusterIP - port: 7078 -m2t: - image: - repository: opea/dataprep-multimedia2text - # Overrides the image tag whose default is the chart appVersion. - tag: "latest" - port: 7079 - service: - type: ClusterIP - port: 7079 imagePullSecrets: [] nameOverride: "" @@ -83,10 +64,15 @@ affinity: {} llm-uservice: image: repository: opea/llm-docsum-tgi + MAX_INPUT_TOKENS: "1024" + MAX_TOTAL_TOKENS: "2048" + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # To override values in subchart tgi tgi: LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + MAX_INPUT_LENGTH: "1024" + MAX_TOTAL_TOKENS: "2048" docsum-ui: image: