Skip to content

Commit

Permalink
docsum: reduce microservices in docsum
Browse files Browse the repository at this point in the history
Update latest changes to docsum

Signed-off-by: Lianhao Lu <[email protected]>
  • Loading branch information
lianhao committed Dec 30, 2024
1 parent 590991b commit 97ade6e
Show file tree
Hide file tree
Showing 10 changed files with 38 additions and 255 deletions.
6 changes: 6 additions & 0 deletions helm-charts/common/llm-uservice/ci-docsum-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,11 @@
image:
repository: opea/llm-docsum-tgi
tag: "latest"

MAX_INPUT_TOKENS: 2048
MAX_TOTAL_TOKENS: 4096

tgi:
enabled: true
MAX_INPUT_LENGTH: 2048
MAX_TOTAL_TOKENS: 4096
7 changes: 7 additions & 0 deletions helm-charts/common/llm-uservice/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@ data:
vLLM_ENDPOINT: "http://{{ .Release.Name }}-vllm"
{{- end }}
{{- if .Values.LLM_MODEL_ID }}
# NOTE:
# delete LLM_MODEL once https://github.com/opea-project/GenAIComps/pull/1089 is merged
LLM_MODEL: {{ .Values.LLM_MODEL_ID | quote}}
LLM_MODEL_ID: {{ .Values.LLM_MODEL_ID | quote}}
{{- end }}
{{- if contains "opea/llm-docsum" .Values.image.repository }}
MAX_INPUT_TOKENS: {{ .Values.MAX_INPUT_TOKENS | quote }}
MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }}
{{- end }}
HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
HF_HOME: "/tmp/.cache/huggingface"
Expand Down
2 changes: 2 additions & 0 deletions helm-charts/common/llm-uservice/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ TGI_LLM_ENDPOINT: ""
# For vllm, set the LLM_MODEL_ID the same as vllm sub chart
vLLM_ENDPOINT: ""
LLM_MODEL_ID: ""
MAX_INPUT_TOKENS: ""
MAX_TOTAL_TOKENS: ""

# Set it as a non-null string, such as true, if you want to enable logging facility,
# otherwise, keep it as "" to disable it.
Expand Down
3 changes: 3 additions & 0 deletions helm-charts/common/llm-uservice/variant_docsum-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
image:
repository: opea/llm-docsum-tgi
tag: "latest"

MAX_INPUT_TOKENS: 2048
MAX_TOTAL_TOKENS: 4096
6 changes: 4 additions & 2 deletions helm-charts/docsum/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ tgi:
resources:
limits:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
Expand Down
14 changes: 7 additions & 7 deletions helm-charts/docsum/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ spec:
- name: {{ .Release.Name }}
env:
- name: LLM_SERVICE_HOST_IP
{{- if .Values.LLM_SERVICE_HOST_IP }}
value: {{ .Values.LLM_SERVICE_HOST_IP | quote}}
{{- else }}
value: {{ .Release.Name }}-llm-uservice
{{- end }}
- name: DATA_SERVICE_HOST_IP
value: {{ .Release.Name }}-m2t
value: {{ include "llm-uservice.fullname" (index .Subcharts "llm-uservice") }}
- name: LLM_SERVICE_PORT
value: {{ index .Values "llm-uservice" "service" "port" | quote }}
- name: ASR_SERVICE_HOST_IP
value: {{ include "whisper.fullname" (index .Subcharts "whisper") }}
- name: ASR_SERVICE_PORT
value: {{ index .Values "whisper" "service" "port" | quote }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
Expand Down
115 changes: 0 additions & 115 deletions helm-charts/docsum/templates/m2t.yaml

This file was deleted.

6 changes: 4 additions & 2 deletions helm-charts/docsum/templates/tests/test-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ spec:
for ((i=1; i<=max_retry; i++)); do
curl http://{{ include "docsum.fullname" . }}:{{ .Values.service.port }}/v1/docsum -sS --fail-with-body \
-H 'Content-Type: multipart/form-data' \
-H "type=text" \
-F "type=text" \
-F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
-F "max_tokens=32" && break;
-F "max_tokens=32" \
-F "language=en" \
-F "stream=true" && break;
curlcode=$?
if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
done;
Expand Down
110 changes: 0 additions & 110 deletions helm-charts/docsum/templates/v2a.yaml

This file was deleted.

24 changes: 5 additions & 19 deletions helm-charts/docsum/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,13 @@
# Declare variables to be passed into your templates.

replicaCount: 1
LLM_SERVICE_HOST_IP: ""

image:
repository: opea/docsum
# Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
# pullPolicy: ""
# Overrides the image tag whose default is the chart appVersion.
tag: "latest"
v2a:
image:
repository: opea/dataprep-video2audio
# Overrides the image tag whose default is the chart appVersion.
tag: "latest"
port: 7078
service:
type: ClusterIP
port: 7078
m2t:
image:
repository: opea/dataprep-multimedia2text
# Overrides the image tag whose default is the chart appVersion.
tag: "latest"
port: 7079
service:
type: ClusterIP
port: 7079

imagePullSecrets: []
nameOverride: ""
Expand Down Expand Up @@ -83,10 +64,15 @@ affinity: {}
llm-uservice:
image:
repository: opea/llm-docsum-tgi
MAX_INPUT_TOKENS: "1024"
MAX_TOTAL_TOKENS: "2048"
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3

# To override values in subchart tgi
tgi:
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"

docsum-ui:
image:
Expand Down

0 comments on commit 97ade6e

Please sign in to comment.