Merge branch 'opea-project:main' into main

Signed-off-by: leslieluyu <[email protected]>
opea-project · Sep 9, 2024 · 0a87a34 · 0a87a34
2 parents 91d6793 + af9e1b6
commit 0a87a34
Show file tree

Hide file tree

Showing 148 changed files with 10,995 additions and 219 deletions.
diff --git a/.github/workflows/pr-go-unittests.yaml b/.github/workflows/pr-go-unittests.yaml
@@ -9,6 +9,7 @@ on:
     types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
     paths:
       - microservices-connector/**
+      - kubernetes-addons/memory-bandwidth-exporter/**
       - "!microservices-connector/helm/**"
       - "!**.md"
       - "!**.txt"
@@ -21,11 +22,45 @@ concurrency:
   cancel-in-progress: true
 
 env:
-  GOSRC_DIR: "microservices-connector"
+  GMC_DIR: "microservices-connector"
+  MBE_DIR: "kubernetes-addons/memory-bandwidth-exporter"
 
 jobs:
+  job1:
+    name: Get-test-matrix
+    runs-on: ubuntu-latest
+    outputs:
+      run_matrix: ${{ steps.get-test-matrix.outputs.run_matrix }}
+    steps:
+      - name: Checkout out Repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Get test matrix
+        id: get-test-matrix
+        run: |
+          set -xe
+          base_commit=${{ github.event.pull_request.base.sha }}
+          merged_commit=${{ github.event.pull_request.head.sha }}
+          run_matrix="["
+          if git diff --name-only ${base_commit} ${merged_commit} | grep -q "^${GMC_DIR}/"; then
+            run_matrix="${run_matrix}\"${GMC_DIR}\","
+          fi
+          if git diff --name-only ${base_commit} ${merged_commit} | grep -q "^${MBE_DIR}/"; then
+            run_matrix="${run_matrix}\"${MBE_DIR}\","
+          fi
+          run_matrix=${run_matrix%,}"]"
+          echo "run_matrix=${run_matrix}"
+          echo "run_matrix=${run_matrix}" >> $GITHUB_OUTPUT
+
   go-unittests:
+    needs: [job1]
     runs-on: ubuntu-latest
+    if: always() && ${{ needs.job1.outputs.run_matrix.length }} > 0
+    strategy:
+      matrix:
+        gopath: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
     permissions:
       contents: write
       actions: write
@@ -47,15 +82,18 @@ jobs:
         with:
           args: --timeout=10m
           version: v1.55.2
-          working-directory: ${{ env.GOSRC_DIR }}
+          working-directory: ${{ matrix.gopath }}
 
       - name: Run golangunit test
         run: |
-          cd $GOSRC_DIR
+          cd ${{ matrix.gopath }}
           make test
 
       - name: Run tests and generate coverage
         run: |
-          cd $GOSRC_DIR
+          if [ "${{ matrix.gopath }}" == "${MBE_DIR}" ]; then
+            exit 0
+          fi
+          cd ${{ matrix.gopath }}
           go test -coverprofile=coverage.out $(go list ./... | grep -v /e2e)
-          ../.github/workflows/scripts/go-coverage.sh
+          ${{ github.workspace }}/.github/workflows/scripts/go-coverage.sh
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,6 @@
 *.out
 **/Chart.lock
 **/charts/*.tgz
+
+bazel-*
+compile_commands.json
diff --git a/README.md b/README.md
@@ -16,31 +16,31 @@ The GenAIInfra repository is organized under four main directories, which includ
 
 ## Prerequisite
 
-GenAIInfra uses Kubernetes as the cloud native infrastructure. Please follow the steps below to prepare the Kubernetes environment.
+GenAIInfra uses Kubernetes as the cloud native infrastructure. Follow the steps below to prepare the Kubernetes environment.
 
 ### Setup Kubernetes cluster
 
-Please follow [Kubernetes official setup guide](https://kubernetes.io/docs/setup/) to setup Kubernetes. We recommend to use Kubernetes with version >= 1.27.
+Follow [Kubernetes official setup guide](https://kubernetes.io/docs/setup/) to setup Kubernetes. We recommend to use Kubernetes with version >= 1.27.
 
 There are different methods to setup Kubernetes production cluster, such as [kubeadm](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/), [kubespray](https://kubespray.io/), and [more](https://kubernetes.io/docs/setup/production-environment/tools/).
 
 NOTE: We recommend to use containerd when choosing the container runtime during Kubernetes setup. Docker engine is also verified on Ubuntu 22.04 and above.
 
-### (Optional) To run GenAIInfra on [Intel Gaudi](https://habana.ai/products/) product:
+### (Optional) To run GenAIInfra on [Intel Gaudi](https://habana.ai/products/) product
 
 The following steps are optional. They're only required if you want to run the workloads on Intel Gaudi product.
 
-1. Please check the [support matrix](https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix.html) to make sure that environment meets the requirements.
+1. Check the [support matrix](https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix.html) to make sure that environment meets the requirements.
 
 2. [Install Intel Gaudi software stack](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#driver-fw-install-bare).
 
 3. [Install and setup container runtime](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#set-up-container-usage), based on the container runtime used by Kubernetes.
 
-NOTE: Please make sure you configure the appropriate container runtime based on the type of container runtime you installed during Kubernetes setup.
+   NOTE: Make sure you configure the appropriate container runtime based on the type of container runtime you installed during Kubernetes setup.
 
 4. [Install Intel Gaudi device plugin for Kubernetes](https://docs.habana.ai/en/latest/Orchestration/Gaudi_Kubernetes/Device_Plugin_for_Kubernetes.html).
 
-Alternatively, Intel provides a base operator to manage the Gaudi software stack. Please refer to [this file](kubernetes-addons/Intel-Gaudi-Base-Operator/README.md) for details.
+   Alternatively, Intel provides a base operator to manage the Gaudi software stack. Refer to [this file](kubernetes-addons/Intel-Gaudi-Base-Operator/README.md) for details.
 
 ## Usages
 

diff --git a/authN-authZ/auth-istio/README.md b/authN-authZ/auth-istio/README.md
@@ -58,12 +58,12 @@ In this example, we setup rules that only users with JWT token issued by "testin
 ```sh
 # make sure running under authN-authZ/auth-istio folder
 # apply the yaml to request authentication using JWT token
-kubectl apply -f $(pwd)/$(DEPLOY_METHOD)/chatQnA_authZ_fakejwt.yaml -n chatqa
+kubectl apply -f $(pwd)/$DEPLOY_METHOD/chatQnA_authZ_fakejwt.yaml -n chatqa
 
 # apply the yaml file to request that only JWT token with
 # issuer & sub == "[email protected]" and groups belongs to group1
 # can access the endpoint of chatQnA service
-kubectl apply -f $(pwd)/$(DEPLOY_METHOD)/chatQnA_authN_fakejwt.yaml -n chatqa
+kubectl apply -f $(pwd)/$DEPLOY_METHOD/chatQnA_authN_fakejwt.yaml -n chatqa
 ```
 
 After applying these two yaml files, we have setup the policy that only user with a valid JWT token (with valid issuer and claims) could access the pipeline endpoint.
@@ -151,14 +151,14 @@ Use the commands to apply the authentication and authorization rules.
 
 ```bash
 # export the router service through istio ingress gateway
-kubectl apply -f $(pwd)/$(DEPLOY_METHOD)/chatQnA_router_gateway.yaml
+kubectl apply -f $(pwd)/$DEPLOY_METHOD/chatQnA_router_gateway.yaml
 
 # 'envsubst' is used to substitute envs in yaml.
 # use 'sudo apt-get install gettext-base' to install envsubst if it does not exist on your machine
 # apply the authentication and authorization rule
 # these files will restrict user access with valid token (with valid issuer, username and realm role)
-envsubst < $(pwd)/$(DEPLOY_METHOD)/chatQnA_authN_keycloak.yaml | kubectl -n chatqa apply -f -
-envsubst < $(pwd)/$(DEPLOY_METHOD)/chatQnA_authZ_keycloak.yaml | kubectl -n chatqa apply -f -
+envsubst < $(pwd)/$DEPLOY_METHOD/chatQnA_authN_keycloak.yaml | kubectl -n chatqa apply -f -
+envsubst < $(pwd)/$DEPLOY_METHOD/chatQnA_authZ_keycloak.yaml | kubectl -n chatqa apply -f -
 ```
 
 User could customize the chatQnA_authZ_keycloak.yaml to reflect roles, groups or any other claims they defined in the OIDC provider for the user.
@@ -261,6 +261,7 @@ export CLIENT_SECRET=<YOUR_CLIENT_SECRET>
 # Using bash here. More methods found here:
 # https://oauth2-proxy.github.io/oauth2-proxy/configuration/overview#generating-a-cookie-secret
 export COOKIE_SECRET=$(dd if=/dev/urandom bs=32 count=1 2>/dev/null | base64 | tr -d -- '\n' | tr -- '+/' '-_' ; echo)
+kubectl create ns oauth2-proxy
 envsubst < $(pwd)/oauth2_install.yaml | kubectl apply -f -
 ```
 
@@ -270,7 +271,7 @@ Here we expose the chatQnA endpoint through the ingress gateway and then install
 
 ```bash
 # expose chatqna endpoint
-kubectl apply -f $(pwd)/$(DEPLOY_METHOD)/chatQnA_router_gateway_oauth.yaml
+kubectl apply -f $(pwd)/$DEPLOY_METHOD/chatQnA_router_gateway_oauth.yaml
 # build chatqna UI image if not exist on your machine
 git clone https://github.com/opea-project/GenAIExamples.git
 cd GenAIExamples/ChatQnA/docker/ui/
@@ -280,7 +281,11 @@ docker save -o ui.tar opea/chatqna-conversation-ui:latest
 sudo ctr -n k8s.io image import ui.tar
 # install chatqna conversation UI
 cd && cd GenAIInfra
-helm install chatqna-ui $(pwd)/helm-charts/common/chatqna-ui --set BACKEND_SERVICE_ENDPOINT="http://${INGRESS_HOST}:${INGRESS_PORT}/",DATAPREP_SERVICE_ENDPOINT="http://${INGRESS_HOST}:${INGRESS_PORT}/dataprep"
+if [ "${DEPLOY_METHOD}" = "gmc-based" ]; then
+    helm install chatqna-ui $(pwd)/helm-charts/common/chatqna-ui --set BACKEND_SERVICE_ENDPOINT="http://chatqna-service.com:${INGRESS_PORT}/",DATAPREP_SERVICE_ENDPOINT="http://chatqna-service.com:${INGRESS_PORT}/dataprep"
+else
+    helm install chatqna-ui $(pwd)/helm-charts/common/chatqna-ui --set BACKEND_SERVICE_ENDPOINT="http://chatqna-service.com:${INGRESS_PORT}/v1/chatqna",DATAPREP_SERVICE_ENDPOINT="http://chatqna-service.com:${INGRESS_PORT}/v1/dataprep"
+fi
 # expose ui service outside
 kubectl apply -f $(pwd)/chatQnA_ui_gateway.yaml
 ```
@@ -292,6 +297,7 @@ Here we apply the authentication and authorization rules.
 ```bash
 # Before applying the authorization rule, need to add the oauth2-proxy as the external authorization provider
 kubectl apply -f $(pwd)/chatQnA_istio_external_auth.yaml
+kubectl rollout restart deployment/istiod -n istio-system
 # 'envsubst' is used to substitute envs in yaml.
 # use 'sudo apt-get install gettext-base' to install envsubst if it does not exist on your machine
 # apply the authentication and authorization rule

diff --git a/authN-authZ/auth-istio/chatQnA_ui_gateway.yaml b/authN-authZ/auth-istio/chatQnA_ui_gateway.yaml
@@ -42,6 +42,6 @@ spec:
         prefix: /
     route:
     - destination:
-        host: ui.default.svc.cluster.local
+        host: chatqna-ui.default.svc.cluster.local
         port:
           number: 5174
diff --git a/authN-authZ/auth-istio/helm-chart-based/chatQnA_router_gateway_oauth.yaml b/authN-authZ/auth-istio/helm-chart-based/chatQnA_router_gateway_oauth.yaml
@@ -31,9 +31,17 @@ spec:
   http:
   - match:
     - uri:
-        prefix: /
+        prefix: /v1/chatqna
     route:
     - destination:
         host: chatqna.chatqa.svc.cluster.local
         port:
           number: 8888
+  - match:
+    - uri:
+        prefix: /v1/dataprep
+    route:
+    - destination:
+        host: chatqna-data-prep.chatqa.svc.cluster.local
+        port:
+          number: 6007
diff --git a/authN-authZ/auth-istio/oauth2_install.yaml b/authN-authZ/auth-istio/oauth2_install.yaml
@@ -33,7 +33,6 @@ data:
     # Redirect url
     redirect_url="http://chatqna-ui.com:${INGRESS_PORT}/oauth2/callback"
     #extra attributes
-    pass_host_header = true
     reverse_proxy = true
     auth_logging = true
     cookie_httponly = true
@@ -42,6 +41,8 @@ data:
     email_domains = "*"
     pass_access_token = true
     pass_authorization_header = true
+    pass_basic_auth = true
+    pass_user_headers = true
     request_logging = true
     set_authorization_header = true
     set_xauthrequest = true
@@ -58,6 +59,46 @@ metadata:
 ---
 apiVersion: apps/v1
 kind: Deployment
+metadata:
+  name: redis
+  namespace: oauth2-proxy
+spec:
+  selector:
+    matchLabels:
+      app: redis
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app: redis
+    spec:
+      containers:
+        - name: redis
+          image: redis:latest
+          ports:
+            - containerPort: 6379
+          resources:
+            limits:
+              cpu: "0.5"
+              memory: "512Mi"
+          command: ["redis-server"]
+          args: ["--save", "", "--appendonly", "no"]
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: redis-service
+  namespace: oauth2-proxy
+spec:
+  selector:
+    app: redis
+  ports:
+    - protocol: TCP
+      port: 6379
+      targetPort: 6379
+---
+apiVersion: apps/v1
+kind: Deployment
 metadata:
   labels:
     app: oauth2-proxy
@@ -94,7 +135,7 @@ apiVersion: v1
 kind: Service
 metadata:
   labels:
-    app: oauth-proxy
+    app: oauth2-proxy
   name: oauth-proxy
   namespace: oauth2-proxy
 spec:
@@ -104,7 +145,7 @@ spec:
     protocol: TCP
     targetPort: 4180
   selector:
-    app: oauth-proxy
+    app: oauth2-proxy
   sessionAffinity: None
   type: ClusterIP
 status:

diff --git a/helm-charts/chatqna/Chart.yaml b/helm-charts/chatqna/Chart.yaml
@@ -6,6 +6,15 @@ name: chatqna
 description: The Helm chart to deploy ChatQnA
 type: application
 dependencies:
+  - name: tgi
+    version: 1.0.0
+    alias: tgi-guardrails
+    repository: "file://../common/tgi"
+    condition: guardrails-usvc.enabled
+  - name: guardrails-usvc
+    version: 1.0.0
+    repository: "file://../common/guardrails-usvc"
+    condition: guardrails-usvc.enabled
   - name: tgi
     version: 1.0.0
     repository: "file://../common/tgi"

diff --git a/helm-charts/chatqna/README.md b/helm-charts/chatqna/README.md
@@ -28,6 +28,10 @@ helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --
 #helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml
 # To use Nvidia GPU
 #helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml
+# To include guardrail component in chatqna on Xeon
+#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-values.yaml
+# To include guardrail component in chatqna on Gaudi
+#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-gaudi-values.yaml
 ```
 
 ### IMPORTANT NOTE

diff --git a/helm-charts/chatqna/guardrails-gaudi-values.yaml b/helm-charts/chatqna/guardrails-gaudi-values.yaml
@@ -0,0 +1,46 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/chatqna-guardrails
+  tag: "latest"
+
+# guardrails related config
+guardrails-usvc:
+  enabled: true
+  SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
+  SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
+
+# gaudi related config
+tei:
+  image:
+    repository: ghcr.io/huggingface/tei-gaudi
+    tag: synapse_1.16
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  securityContext:
+    readOnlyRootFilesystem: false
+
+tgi:
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.0.1"
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
+  CUDA_GRAPHS: ""
+
+tgi-guardrails:
+  LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.0.1"
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
+  CUDA_GRAPHS: ""
diff --git a/helm-charts/chatqna/guardrails-values.yaml b/helm-charts/chatqna/guardrails-values.yaml
@@ -0,0 +1,14 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/chatqna-guardrails
+  tag: "latest"
+
+# guardrails related config
+guardrails-usvc:
+  enabled: true
+  SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
+  SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
+tgi-guardrails:
+  LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"