Merge branch 'main' into log

opea-project · Sep 10, 2024 · 5f606a9 · 5f606a9
2 parents 7e8767b + 267d828
commit 5f606a9
Show file tree

Hide file tree

Showing 189 changed files with 31,832 additions and 312 deletions.
diff --git a/.github/workflows/manual-helm-cd-workflow.yaml b/.github/workflows/manual-helm-cd-workflow.yaml
@@ -45,7 +45,7 @@ jobs:
             find "$CHARTS_DIR" -mindepth 1 -maxdepth 1 -type d ! -name "common" -exec basename {} \; | while read -r subfolder; do
               for file in "$CHARTS_DIR/$subfolder"/*values.yaml; do
                 if [ -f "$file" ]; then
-                  if [[ "$file" == *"nv-values.yaml" ]]; then
+                  if [[ "$file" == *"nv-values.yaml" ]] || [[ "$file" == *"hpa-values.yaml" ]]; then
                     continue
                   fi
                   filename=$(basename "$file" .yaml)
@@ -60,7 +60,7 @@ jobs:
             find "$CHARTS_DIR/common" -mindepth 1 -maxdepth 1 -type d -exec basename {} \; | while read -r subfolder; do
               for file in "$CHARTS_DIR/common/$subfolder"/*values.yaml; do
                 if [ -f "$file" ]; then
-                  if [[ "$file" == *"nv-values.yaml" ]]; then
+                  if [[ "$file" == *"nv-values.yaml" ]] || [[ "$file" == *"hpa-values.yaml" ]]; then
                     continue
                   fi
                   filename=$(basename "$file" .yaml)
@@ -77,7 +77,7 @@ jobs:
             for service in $service_list; do
               for file in "$CHARTS_DIR/$service"/*values.yaml; do
                 if [ -f "$file" ]; then
-                  if [[ "$file" == *"nv-values.yaml" ]]; then
+                  if [ "$file" == *"nv-values.yaml" ] || [ "$file" == *"hpa-values.yaml" ]; then
                     continue
                   fi
                   filename=$(basename "$file" .yaml)

diff --git a/.github/workflows/pr-chart-e2e.yaml b/.github/workflows/pr-chart-e2e.yaml
@@ -49,7 +49,7 @@ jobs:
           for chart in ${e2e_charts}; do
             for file in "$CHARTS_DIR/$chart"/*values.yaml; do
               if [ -f "$file" ]; then
-                if [[ "$file" == *"nv-values.yaml" ]]; then
+                if [[ "$file" == *"nv-values.yaml" ]] || [[ "$file" == *"hpa-values.yaml" ]]; then
                   continue
                 fi
                 filename=$(basename "$file" .yaml)
@@ -64,7 +64,7 @@ jobs:
           for chart in ${common_charts}; do
             for file in "$CHARTS_DIR/common/$chart"/*values.yaml; do
               if [ -f "$file" ]; then
-                if [[ "$file" == *"nv-values.yaml" ]]; then
+                if [[ "$file" == *"nv-values.yaml" ]] || [[ "$file" == *"hpa-values.yaml" ]]; then
                   continue
                 fi
                 filename=$(basename "$file" .yaml)

diff --git a/.github/workflows/pr-go-unittests.yaml b/.github/workflows/pr-go-unittests.yaml
@@ -9,6 +9,7 @@ on:
     types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
     paths:
       - microservices-connector/**
+      - kubernetes-addons/memory-bandwidth-exporter/**
       - "!microservices-connector/helm/**"
       - "!**.md"
       - "!**.txt"
@@ -21,11 +22,45 @@ concurrency:
   cancel-in-progress: true
 
 env:
-  GOSRC_DIR: "microservices-connector"
+  GMC_DIR: "microservices-connector"
+  MBE_DIR: "kubernetes-addons/memory-bandwidth-exporter"
 
 jobs:
+  job1:
+    name: Get-test-matrix
+    runs-on: ubuntu-latest
+    outputs:
+      run_matrix: ${{ steps.get-test-matrix.outputs.run_matrix }}
+    steps:
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Get test matrix
+        id: get-test-matrix
+        run: |
+          set -xe
+          base_commit=${{ github.event.pull_request.base.sha }}
+          merged_commit=${{ github.event.pull_request.head.sha }}
+          run_matrix="["
+          if git diff --name-only ${base_commit} ${merged_commit} | grep -q "^${GMC_DIR}/"; then
+            run_matrix="${run_matrix}\"${GMC_DIR}\","
+          fi
+          if git diff --name-only ${base_commit} ${merged_commit} | grep -q "^${MBE_DIR}/"; then
+            run_matrix="${run_matrix}\"${MBE_DIR}\","
+          fi
+          run_matrix=${run_matrix%,}"]"
+          echo "run_matrix=${run_matrix}"
+          echo "run_matrix=${run_matrix}" >> $GITHUB_OUTPUT
+
   go-unittests:
+    needs: [job1]
     runs-on: ubuntu-latest
+    if: always() && ${{ needs.job1.outputs.run_matrix.length }} > 0
+    strategy:
+      matrix:
+        gopath: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
     permissions:
       contents: write
       actions: write
@@ -47,15 +82,15 @@ jobs:
         with:
           args: --timeout=10m
           version: v1.55.2
-          working-directory: ${{ env.GOSRC_DIR }}
+          working-directory: ${{ matrix.gopath }}
 
       - name: Run golangunit test
         run: |
-          cd $GOSRC_DIR
+          cd ${{ matrix.gopath }}
           make test
 
       - name: Run tests and generate coverage
         run: |
-          cd $GOSRC_DIR
+          cd ${{ matrix.gopath }}
           go test -coverprofile=coverage.out $(go list ./... | grep -v /e2e)
-          ../.github/workflows/scripts/go-coverage.sh
+          ${{ github.workspace }}/.github/workflows/scripts/go-coverage.sh
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,6 @@
 *.out
 **/Chart.lock
 **/charts/*.tgz
+
+bazel-*
+compile_commands.json
diff --git a/README.md b/README.md
@@ -16,31 +16,31 @@ The GenAIInfra repository is organized under four main directories, which includ
 
 ## Prerequisite
 
-GenAIInfra uses Kubernetes as the cloud native infrastructure. Please follow the steps below to prepare the Kubernetes environment.
+GenAIInfra uses Kubernetes as the cloud native infrastructure. Follow the steps below to prepare the Kubernetes environment.
 
 ### Setup Kubernetes cluster
 
-Please follow [Kubernetes official setup guide](https://kubernetes.io/docs/setup/) to setup Kubernetes. We recommend to use Kubernetes with version >= 1.27.
+Follow [Kubernetes official setup guide](https://kubernetes.io/docs/setup/) to setup Kubernetes. We recommend to use Kubernetes with version >= 1.27.
 
 There are different methods to setup Kubernetes production cluster, such as [kubeadm](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/), [kubespray](https://kubespray.io/), and [more](https://kubernetes.io/docs/setup/production-environment/tools/).
 
 NOTE: We recommend to use containerd when choosing the container runtime during Kubernetes setup. Docker engine is also verified on Ubuntu 22.04 and above.
 
-### (Optional) To run GenAIInfra on [Intel Gaudi](https://habana.ai/products/) product:
+### (Optional) To run GenAIInfra on [Intel Gaudi](https://habana.ai/products/) product
 
 The following steps are optional. They're only required if you want to run the workloads on Intel Gaudi product.
 
-1. Please check the [support matrix](https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix.html) to make sure that environment meets the requirements.
+1. Check the [support matrix](https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix.html) to make sure that environment meets the requirements.
 
 2. [Install Intel Gaudi software stack](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#driver-fw-install-bare).
 
 3. [Install and setup container runtime](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#set-up-container-usage), based on the container runtime used by Kubernetes.
 
-NOTE: Please make sure you configure the appropriate container runtime based on the type of container runtime you installed during Kubernetes setup.
+   NOTE: Make sure you configure the appropriate container runtime based on the type of container runtime you installed during Kubernetes setup.
 
 4. [Install Intel Gaudi device plugin for Kubernetes](https://docs.habana.ai/en/latest/Orchestration/Gaudi_Kubernetes/Device_Plugin_for_Kubernetes.html).
 
-Alternatively, Intel provides a base operator to manage the Gaudi software stack. Please refer to [this file](kubernetes-addons/Intel-Gaudi-Base-Operator/README.md) for details.
+   Alternatively, Intel provides a base operator to manage the Gaudi software stack. Refer to [this file](kubernetes-addons/Intel-Gaudi-Base-Operator/README.md) for details.
 
 ## Usages
 

diff --git a/helm-charts/chatqna/Chart.yaml b/helm-charts/chatqna/Chart.yaml
@@ -6,6 +6,15 @@ name: chatqna
 description: The Helm chart to deploy ChatQnA
 type: application
 dependencies:
+  - name: tgi
+    version: 1.0.0
+    alias: tgi-guardrails
+    repository: "file://../common/tgi"
+    condition: guardrails-usvc.enabled
+  - name: guardrails-usvc
+    version: 1.0.0
+    repository: "file://../common/guardrails-usvc"
+    condition: guardrails-usvc.enabled
   - name: tgi
     version: 1.0.0
     repository: "file://../common/tgi"
@@ -33,5 +42,9 @@ dependencies:
   - name: data-prep
     version: 1.0.0
     repository: "file://../common/data-prep"
+  - name: ui
+    alias: chatqna-ui
+    version: 1.0.0
+    repository: "file://../common/ui"
 version: 1.0.0
 appVersion: "v1.0"
diff --git a/helm-charts/chatqna/README.md b/helm-charts/chatqna/README.md
@@ -23,11 +23,17 @@ helm dependency update chatqna
 export HFTOKEN="insert-your-huggingface-token-here"
 export MODELDIR="/mnt/opea-models"
 export MODELNAME="Intel/neural-chat-7b-v3-3"
+# If you would like to use the traditional UI, please change the image as well as the containerport within the values
+# append these at the end of the command "--set chatqna-ui.image.repository=opea/chatqna-ui,chatqna-ui.image.tag=latest,chatqna-ui.containerPort=5173"
 helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME}
 # To use Gaudi device
 #helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml
 # To use Nvidia GPU
 #helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml
+# To include guardrail component in chatqna on Xeon
+#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-values.yaml
+# To include guardrail component in chatqna on Gaudi
+#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-gaudi-values.yaml
 ```
 
 ### IMPORTANT NOTE
@@ -54,32 +60,14 @@ curl http://localhost:8888/v1/chatqna \
 
 ### Verify the workload through UI
 
-UI need to get installed before accessing. Follow the steps below to build and install UI:
+The UI has already been installed via the Helm chart. To access it, use the external IP of one your Kubernetes node along with the NGINX port. You can find the NGINX port using the following command:
 
 ```bash
-# expose endpoint of ChatQnA service and dataprep service
-kubectl port-forward svc/chatqna --address 0.0.0.0 8888:8888
-kubectl port-forward svc/chatqna-data-prep --address 0.0.0.0 6007:6007
-
-# build and push the UI image if not exist
-# skip these steps if the image already exists
-git clone https://github.com/opea-project/GenAIExamples.git
-cd GenAIExamples/ChatQnA/docker/ui/
-docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
-# push the image to your cluster, make sure the image exists on each node of your cluster
-docker save -o ui.tar opea/chatqna-conversation-ui:latest
-sudo ctr -n k8s.io image import ui.tar
-
-# install UI using helm chart. Replace image tag if required
-cd
-cd GenAIInfra/helm-charts/
-helm install ui common/chatqna-ui --set BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna",DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep",image.tag="latest"
-
-# expose the endpoint of UI for verification
-kubectl port-forward svc/ui --address 0.0.0.0 5174:5174
+export port=$(kubectl get service chatqna-nginx --output='jsonpath={.spec.ports[0].nodePort}')
+echo $port
 ```
 
-Access `http://localhost:5174` to play with the ChatQnA workload through UI.
+Open a browser to access `http://<k8s-node-ip-address>:${port}` to play with the ChatQnA workload.
 
 ## Values
 

diff --git a/helm-charts/chatqna/guardrails-gaudi-values.yaml b/helm-charts/chatqna/guardrails-gaudi-values.yaml
@@ -0,0 +1,46 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/chatqna-guardrails
+  tag: "latest"
+
+# guardrails related config
+guardrails-usvc:
+  enabled: true
+  SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
+  SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
+
+# gaudi related config
+tei:
+  image:
+    repository: ghcr.io/huggingface/tei-gaudi
+    tag: synapse_1.16
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  securityContext:
+    readOnlyRootFilesystem: false
+
+tgi:
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.0.1"
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
+  CUDA_GRAPHS: ""
+
+tgi-guardrails:
+  LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.0.1"
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
+  CUDA_GRAPHS: ""
diff --git a/helm-charts/chatqna/guardrails-values.yaml b/helm-charts/chatqna/guardrails-values.yaml
@@ -0,0 +1,14 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/chatqna-guardrails
+  tag: "latest"
+
+# guardrails related config
+guardrails-usvc:
+  enabled: true
+  SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
+  SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
+tgi-guardrails:
+  LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
diff --git a/helm-charts/chatqna/templates/deployment.yaml b/helm-charts/chatqna/templates/deployment.yaml
@@ -7,11 +7,13 @@ metadata:
   name: {{ include "chatqna.fullname" . }}
   labels:
     {{- include "chatqna.labels" . | nindent 4 }}
+    app: {{ include "chatqna.fullname" . }}
 spec:
   replicas: {{ .Values.replicaCount }}
   selector:
     matchLabels:
       {{- include "chatqna.selectorLabels" . | nindent 6 }}
+      app: {{ include "chatqna.fullname" . }}
   template:
     metadata:
       {{- with .Values.podAnnotations }}
@@ -20,6 +22,7 @@ spec:
       {{- end }}
       labels:
         {{- include "chatqna.selectorLabels" . | nindent 8 }}
+        app: {{ include "chatqna.fullname" . }}
     spec:
       {{- with .Values.imagePullSecrets }}
       imagePullSecrets:
@@ -38,6 +41,8 @@ spec:
               value: {{ .Release.Name }}-retriever-usvc
             - name: EMBEDDING_SERVICE_HOST_IP
               value: {{ .Release.Name }}-embedding-usvc
+            - name: GUARDRAIL_SERVICE_HOST_IP
+              value: {{ .Release.Name }}-guardrails-usvc
           securityContext:
             {{- toYaml .Values.securityContext | nindent 12 }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
@@ -49,22 +54,6 @@ spec:
             - name: chatqna
               containerPort: {{ .Values.port }}
               protocol: TCP
-                #          startupProbe:
-                #            httpGet:
-                #              host: {{ .Release.Name }}-llm-uservice
-                #              port: {{ index .Values "llm-uservice" "service" "port" }}
-                #              path: /
-                #            initialDelaySeconds: 5
-                #            periodSeconds: 5
-                #            failureThreshold: 120
-                #          livenessProbe:
-                #            httpGet:
-                #              path: /
-                #              port: {{ .Values.port }}
-                #          readinessProbe:
-                #            httpGet:
-                #              path: /
-                #              port: {{ .Values.port }}
           resources:
             {{- toYaml .Values.resources | nindent 12 }}
       volumes:
@@ -82,3 +71,13 @@ spec:
       tolerations:
         {{- toYaml . | nindent 8 }}
       {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "chatqna.selectorLabels" . | nindent 14 }}
+              app: {{ include "chatqna.fullname" . }}
+      {{- end }}