diff --git a/.github/workflows/scripts/e2e/gmc_gaudi_test.sh b/.github/workflows/scripts/e2e/gmc_gaudi_test.sh index 231baec5a..aed416c56 100755 --- a/.github/workflows/scripts/e2e/gmc_gaudi_test.sh +++ b/.github/workflows/scripts/e2e/gmc_gaudi_test.sh @@ -637,7 +637,7 @@ function validate_docsum() { export CLIENT_POD=$(kubectl get pod -n $DOCSUM_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $DOCSUM_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='docsum')].status.accessUrl}") - kubectl exec "$CLIENT_POD" -n $DOCSUM_NAMESPACE -- curl $accessUrl -X POST -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_docsum.log + kubectl exec "$CLIENT_POD" -n $DOCSUM_NAMESPACE -- curl $accessUrl -X POST -d '{"type": "text", "messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_docsum.log exit_code=$? if [ $exit_code -ne 0 ]; then echo "docsum failed, please check the logs in ${LOG_PATH}!" diff --git a/.github/workflows/scripts/e2e/gmc_xeon_test.sh b/.github/workflows/scripts/e2e/gmc_xeon_test.sh index b47ad5f1f..1c7caf9ea 100755 --- a/.github/workflows/scripts/e2e/gmc_xeon_test.sh +++ b/.github/workflows/scripts/e2e/gmc_xeon_test.sh @@ -658,7 +658,7 @@ function validate_docsum() { export CLIENT_POD=$(kubectl get pod -n $DOCSUM_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $DOCSUM_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='docsum')].status.accessUrl}") - kubectl exec "$CLIENT_POD" -n $DOCSUM_NAMESPACE -- curl $accessUrl -X POST -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_docsum.log + kubectl exec "$CLIENT_POD" -n $DOCSUM_NAMESPACE -- curl $accessUrl -X POST -d '{"type": "text", "messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_docsum.log exit_code=$? if [ $exit_code -ne 0 ]; then echo "docsum failed, please check the logs in ${LOG_PATH}!" diff --git a/.github/workflows/scripts/e2e/manifest_gaudi_test.sh b/.github/workflows/scripts/e2e/manifest_gaudi_test.sh index 40cadb884..4186fa5b2 100755 --- a/.github/workflows/scripts/e2e/manifest_gaudi_test.sh +++ b/.github/workflows/scripts/e2e/manifest_gaudi_test.sh @@ -86,7 +86,7 @@ function validate_docsum() { # Curl the DocSum LLM Service curl http://${ip_address}:${port}/v1/chat/docsum \ -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -d '{"type": "text", "messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ -H 'Content-Type: application/json' > $LOG_PATH/curl_docsum.log exit_code=$? if [ $exit_code -ne 0 ]; then diff --git a/.github/workflows/scripts/e2e/manifest_xeon_test.sh b/.github/workflows/scripts/e2e/manifest_xeon_test.sh index 78d9f3ce5..b66528e75 100755 --- a/.github/workflows/scripts/e2e/manifest_xeon_test.sh +++ b/.github/workflows/scripts/e2e/manifest_xeon_test.sh @@ -86,7 +86,7 @@ function validate_docsum() { # Curl the DocSum LLM Service curl http://${ip_address}:${port}/v1/chat/docsum \ -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -d '{"type", "text", "messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ -H 'Content-Type: application/json' > $LOG_PATH/curl_docsum.log exit_code=$? if [ $exit_code -ne 0 ]; then diff --git a/helm-charts/common/ui/templates/configmap.yaml b/helm-charts/common/ui/templates/configmap.yaml index 8757effb8..af6cf6fde 100644 --- a/helm-charts/common/ui/templates/configmap.yaml +++ b/helm-charts/common/ui/templates/configmap.yaml @@ -16,6 +16,7 @@ data: BASE_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} {{- else if (contains "docsum-ui" .Values.image.repository) }} DOC_BASE_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} + BACKEND_SERVICE_ENDPOINT: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} {{- else if (contains "docsum-react-ui" .Values.image.repository) }} VITE_DOC_SUM_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} {{- else if contains "chatqna-ui" .Values.image.repository }} diff --git a/helm-charts/docsum/Chart.yaml b/helm-charts/docsum/Chart.yaml index 2ffb549dd..95375c6f4 100644 --- a/helm-charts/docsum/Chart.yaml +++ b/helm-charts/docsum/Chart.yaml @@ -12,6 +12,9 @@ dependencies: - name: llm-uservice version: 1.0.0 repository: "file://../common/llm-uservice" + - name: whisper + version: 1.0.0 + repository: "file://../common/whisper" - name: ui version: 1.0.0 repository: "file://../common/ui" diff --git a/helm-charts/docsum/README.md b/helm-charts/docsum/README.md index 34ff1dcd2..d9f4035a1 100644 --- a/helm-charts/docsum/README.md +++ b/helm-charts/docsum/README.md @@ -34,8 +34,10 @@ Open another terminal and run the following command to verify the service if wor ```console curl http://localhost:8888/v1/docsum \ - -H 'Content-Type: application/json' \ - -d '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + -H 'Content-Type: multipart/form-data' \ + -F "type=text" \ + -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \ + -F "max_tokens=32" ``` ### Verify the workload through UI diff --git a/helm-charts/docsum/templates/deployment.yaml b/helm-charts/docsum/templates/deployment.yaml index 73068b74b..33b58d886 100644 --- a/helm-charts/docsum/templates/deployment.yaml +++ b/helm-charts/docsum/templates/deployment.yaml @@ -39,8 +39,8 @@ spec: {{- else }} value: {{ .Release.Name }}-llm-uservice {{- end }} - #- name: MEGA_SERVICE_PORT - # value: {{ .Values.port }} + - name: DATA_SERVICE_HOST_IP + value: {{ .Release.Name }}-m2t securityContext: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" diff --git a/helm-charts/docsum/templates/m2t.yaml b/helm-charts/docsum/templates/m2t.yaml new file mode 100644 index 000000000..84d8d410c --- /dev/null +++ b/helm-charts/docsum/templates/m2t.yaml @@ -0,0 +1,94 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-m2t + labels: + {{- include "docsum.labels" . | nindent 4 }} + app: {{ .Release.Name }}-m2t +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "docsum.selectorLabels" . | nindent 6 }} + app: {{ .Release.Name }}-m2t + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "docsum.selectorLabels" . | nindent 8 }} + app: {{ .Release.Name }}-m2t + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }}-m2t + env: + - name: V2A_ENDPOINT + value: {{ .Release.Name }}-v2a:{{ .Values.v2a.service.port }} + - name: A2T_ENDPOINT + value: {{ .Release.Name }}-whisper:{{ .Values.whisper.service.port }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.m2t.image.repository }}:{{ .Values.m2t.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: m2t + containerPort: {{ .Values.m2t.port }} + protocol: TCP + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "docsum.selectorLabels" . | nindent 14 }} + app: {{ .Release.Name }}-m2t + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-m2t + labels: + {{- include "docsum.labels" . | nindent 4 }} +spec: + type: {{ .Values.m2t.service.type }} + ports: + - port: {{ .Values.m2t.service.port }} + targetPort: {{ .Values.m2t.port }} + protocol: TCP + name: m2t + selector: + {{- include "docsum.selectorLabels" . | nindent 4 }} + app: {{ .Release.Name }}-m2t diff --git a/helm-charts/docsum/templates/tests/test-pod.yaml b/helm-charts/docsum/templates/tests/test-pod.yaml index 50c5730b3..3c63d346b 100644 --- a/helm-charts/docsum/templates/tests/test-pod.yaml +++ b/helm-charts/docsum/templates/tests/test-pod.yaml @@ -21,6 +21,7 @@ spec: for ((i=1; i<=max_retry; i++)); do curl http://{{ include "docsum.fullname" . }}:{{ .Values.service.port }}/v1/docsum -sS --fail-with-body \ -H 'Content-Type: multipart/form-data' \ + -H "type=text" \ -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \ -F "max_tokens=32" && break; curlcode=$? diff --git a/helm-charts/docsum/templates/v2a.yaml b/helm-charts/docsum/templates/v2a.yaml new file mode 100644 index 000000000..e4f107505 --- /dev/null +++ b/helm-charts/docsum/templates/v2a.yaml @@ -0,0 +1,89 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-v2a + labels: + {{- include "docsum.labels" . | nindent 4 }} + app: {{ .Release.Name }}-v2a +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "docsum.selectorLabels" . | nindent 6 }} + app: {{ .Release.Name }}-v2a + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "docsum.selectorLabels" . | nindent 8 }} + app: {{ .Release.Name }}-v2a + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }}-v2a + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.v2a.image.repository }}:{{ .Values.v2a.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: v2a + containerPort: {{ .Values.v2a.port }} + protocol: TCP + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "docsum.selectorLabels" . | nindent 14 }} + app: {{ .Release.Name }}-v2a + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-v2a + labels: + {{- include "docsum.labels" . | nindent 4 }} +spec: + type: {{ .Values.v2a.service.type }} + ports: + - port: {{ .Values.v2a.service.port }} + targetPort: {{ .Values.v2a.port }} + protocol: TCP + name: v2a + selector: + {{- include "docsum.selectorLabels" . | nindent 4 }} + app: {{ .Release.Name }}-v2a diff --git a/helm-charts/docsum/values.yaml b/helm-charts/docsum/values.yaml index a1b8d9ee0..1eb12333b 100644 --- a/helm-charts/docsum/values.yaml +++ b/helm-charts/docsum/values.yaml @@ -13,6 +13,24 @@ image: pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. tag: "latest" +v2a: + image: + repository: opea/dataprep-video2audio + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + port: 7078 + service: + type: ClusterIP + port: 7078 +m2t: + image: + repository: opea/dataprep-multimedia2text + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + port: 7079 + service: + type: ClusterIP + port: 7079 port: 8888 service: diff --git a/microservices-connector/config/manifests/docsum_svelte-ui.yaml b/microservices-connector/config/manifests/docsum_svelte-ui.yaml index a2f81e824..a55d78446 100644 --- a/microservices-connector/config/manifests/docsum_svelte-ui.yaml +++ b/microservices-connector/config/manifests/docsum_svelte-ui.yaml @@ -15,6 +15,7 @@ metadata: app.kubernetes.io/managed-by: Helm data: DOC_BASE_URL: "/v1/docsum" + BACKEND_SERVICE_ENDPOINT: "/v1/docsum" --- # Source: ui/templates/service.yaml # Copyright (C) 2024 Intel Corporation