Skip to content

Commit

Permalink
Merge branch 'main' into log
Browse files Browse the repository at this point in the history
  • Loading branch information
KfreeZ authored Sep 10, 2024
2 parents 7e8767b + 267d828 commit 5f606a9
Show file tree
Hide file tree
Showing 189 changed files with 31,832 additions and 312 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/manual-helm-cd-workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
find "$CHARTS_DIR" -mindepth 1 -maxdepth 1 -type d ! -name "common" -exec basename {} \; | while read -r subfolder; do
for file in "$CHARTS_DIR/$subfolder"/*values.yaml; do
if [ -f "$file" ]; then
if [[ "$file" == *"nv-values.yaml" ]]; then
if [[ "$file" == *"nv-values.yaml" ]] || [[ "$file" == *"hpa-values.yaml" ]]; then
continue
fi
filename=$(basename "$file" .yaml)
Expand All @@ -60,7 +60,7 @@ jobs:
find "$CHARTS_DIR/common" -mindepth 1 -maxdepth 1 -type d -exec basename {} \; | while read -r subfolder; do
for file in "$CHARTS_DIR/common/$subfolder"/*values.yaml; do
if [ -f "$file" ]; then
if [[ "$file" == *"nv-values.yaml" ]]; then
if [[ "$file" == *"nv-values.yaml" ]] || [[ "$file" == *"hpa-values.yaml" ]]; then
continue
fi
filename=$(basename "$file" .yaml)
Expand All @@ -77,7 +77,7 @@ jobs:
for service in $service_list; do
for file in "$CHARTS_DIR/$service"/*values.yaml; do
if [ -f "$file" ]; then
if [[ "$file" == *"nv-values.yaml" ]]; then
if [ "$file" == *"nv-values.yaml" ] || [ "$file" == *"hpa-values.yaml" ]; then
continue
fi
filename=$(basename "$file" .yaml)
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pr-chart-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
for chart in ${e2e_charts}; do
for file in "$CHARTS_DIR/$chart"/*values.yaml; do
if [ -f "$file" ]; then
if [[ "$file" == *"nv-values.yaml" ]]; then
if [[ "$file" == *"nv-values.yaml" ]] || [[ "$file" == *"hpa-values.yaml" ]]; then
continue
fi
filename=$(basename "$file" .yaml)
Expand All @@ -64,7 +64,7 @@ jobs:
for chart in ${common_charts}; do
for file in "$CHARTS_DIR/common/$chart"/*values.yaml; do
if [ -f "$file" ]; then
if [[ "$file" == *"nv-values.yaml" ]]; then
if [[ "$file" == *"nv-values.yaml" ]] || [[ "$file" == *"hpa-values.yaml" ]]; then
continue
fi
filename=$(basename "$file" .yaml)
Expand Down
45 changes: 40 additions & 5 deletions .github/workflows/pr-go-unittests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ on:
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- microservices-connector/**
- kubernetes-addons/memory-bandwidth-exporter/**
- "!microservices-connector/helm/**"
- "!**.md"
- "!**.txt"
Expand All @@ -21,11 +22,45 @@ concurrency:
cancel-in-progress: true

env:
GOSRC_DIR: "microservices-connector"
GMC_DIR: "microservices-connector"
MBE_DIR: "kubernetes-addons/memory-bandwidth-exporter"

jobs:
job1:
name: Get-test-matrix
runs-on: ubuntu-latest
outputs:
run_matrix: ${{ steps.get-test-matrix.outputs.run_matrix }}
steps:
- name: Checkout out Repo
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Get test matrix
id: get-test-matrix
run: |
set -xe
base_commit=${{ github.event.pull_request.base.sha }}
merged_commit=${{ github.event.pull_request.head.sha }}
run_matrix="["
if git diff --name-only ${base_commit} ${merged_commit} | grep -q "^${GMC_DIR}/"; then
run_matrix="${run_matrix}\"${GMC_DIR}\","
fi
if git diff --name-only ${base_commit} ${merged_commit} | grep -q "^${MBE_DIR}/"; then
run_matrix="${run_matrix}\"${MBE_DIR}\","
fi
run_matrix=${run_matrix%,}"]"
echo "run_matrix=${run_matrix}"
echo "run_matrix=${run_matrix}" >> $GITHUB_OUTPUT
go-unittests:
needs: [job1]
runs-on: ubuntu-latest
if: always() && ${{ needs.job1.outputs.run_matrix.length }} > 0
strategy:
matrix:
gopath: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
permissions:
contents: write
actions: write
Expand All @@ -47,15 +82,15 @@ jobs:
with:
args: --timeout=10m
version: v1.55.2
working-directory: ${{ env.GOSRC_DIR }}
working-directory: ${{ matrix.gopath }}

- name: Run golangunit test
run: |
cd $GOSRC_DIR
cd ${{ matrix.gopath }}
make test
- name: Run tests and generate coverage
run: |
cd $GOSRC_DIR
cd ${{ matrix.gopath }}
go test -coverprofile=coverage.out $(go list ./... | grep -v /e2e)
../.github/workflows/scripts/go-coverage.sh
${{ github.workspace }}/.github/workflows/scripts/go-coverage.sh
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@
*.out
**/Chart.lock
**/charts/*.tgz

bazel-*
compile_commands.json
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,31 @@ The GenAIInfra repository is organized under four main directories, which includ

## Prerequisite

GenAIInfra uses Kubernetes as the cloud native infrastructure. Please follow the steps below to prepare the Kubernetes environment.
GenAIInfra uses Kubernetes as the cloud native infrastructure. Follow the steps below to prepare the Kubernetes environment.

### Setup Kubernetes cluster

Please follow [Kubernetes official setup guide](https://kubernetes.io/docs/setup/) to setup Kubernetes. We recommend to use Kubernetes with version >= 1.27.
Follow [Kubernetes official setup guide](https://kubernetes.io/docs/setup/) to setup Kubernetes. We recommend to use Kubernetes with version >= 1.27.

There are different methods to setup Kubernetes production cluster, such as [kubeadm](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/), [kubespray](https://kubespray.io/), and [more](https://kubernetes.io/docs/setup/production-environment/tools/).

NOTE: We recommend to use containerd when choosing the container runtime during Kubernetes setup. Docker engine is also verified on Ubuntu 22.04 and above.

### (Optional) To run GenAIInfra on [Intel Gaudi](https://habana.ai/products/) product:
### (Optional) To run GenAIInfra on [Intel Gaudi](https://habana.ai/products/) product

The following steps are optional. They're only required if you want to run the workloads on Intel Gaudi product.

1. Please check the [support matrix](https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix.html) to make sure that environment meets the requirements.
1. Check the [support matrix](https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix.html) to make sure that environment meets the requirements.

2. [Install Intel Gaudi software stack](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#driver-fw-install-bare).

3. [Install and setup container runtime](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#set-up-container-usage), based on the container runtime used by Kubernetes.

NOTE: Please make sure you configure the appropriate container runtime based on the type of container runtime you installed during Kubernetes setup.
NOTE: Make sure you configure the appropriate container runtime based on the type of container runtime you installed during Kubernetes setup.

4. [Install Intel Gaudi device plugin for Kubernetes](https://docs.habana.ai/en/latest/Orchestration/Gaudi_Kubernetes/Device_Plugin_for_Kubernetes.html).

Alternatively, Intel provides a base operator to manage the Gaudi software stack. Please refer to [this file](kubernetes-addons/Intel-Gaudi-Base-Operator/README.md) for details.
Alternatively, Intel provides a base operator to manage the Gaudi software stack. Refer to [this file](kubernetes-addons/Intel-Gaudi-Base-Operator/README.md) for details.

## Usages

Expand Down
13 changes: 13 additions & 0 deletions helm-charts/chatqna/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ name: chatqna
description: The Helm chart to deploy ChatQnA
type: application
dependencies:
- name: tgi
version: 1.0.0
alias: tgi-guardrails
repository: "file://../common/tgi"
condition: guardrails-usvc.enabled
- name: guardrails-usvc
version: 1.0.0
repository: "file://../common/guardrails-usvc"
condition: guardrails-usvc.enabled
- name: tgi
version: 1.0.0
repository: "file://../common/tgi"
Expand Down Expand Up @@ -33,5 +42,9 @@ dependencies:
- name: data-prep
version: 1.0.0
repository: "file://../common/data-prep"
- name: ui
alias: chatqna-ui
version: 1.0.0
repository: "file://../common/ui"
version: 1.0.0
appVersion: "v1.0"
32 changes: 10 additions & 22 deletions helm-charts/chatqna/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,17 @@ helm dependency update chatqna
export HFTOKEN="insert-your-huggingface-token-here"
export MODELDIR="/mnt/opea-models"
export MODELNAME="Intel/neural-chat-7b-v3-3"
# If you would like to use the traditional UI, please change the image as well as the containerport within the values
# append these at the end of the command "--set chatqna-ui.image.repository=opea/chatqna-ui,chatqna-ui.image.tag=latest,chatqna-ui.containerPort=5173"
helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME}
# To use Gaudi device
#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml
# To use Nvidia GPU
#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml
# To include guardrail component in chatqna on Xeon
#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-values.yaml
# To include guardrail component in chatqna on Gaudi
#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-gaudi-values.yaml
```

### IMPORTANT NOTE
Expand All @@ -54,32 +60,14 @@ curl http://localhost:8888/v1/chatqna \

### Verify the workload through UI

UI need to get installed before accessing. Follow the steps below to build and install UI:
The UI has already been installed via the Helm chart. To access it, use the external IP of one your Kubernetes node along with the NGINX port. You can find the NGINX port using the following command:

```bash
# expose endpoint of ChatQnA service and dataprep service
kubectl port-forward svc/chatqna --address 0.0.0.0 8888:8888
kubectl port-forward svc/chatqna-data-prep --address 0.0.0.0 6007:6007

# build and push the UI image if not exist
# skip these steps if the image already exists
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/ChatQnA/docker/ui/
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
# push the image to your cluster, make sure the image exists on each node of your cluster
docker save -o ui.tar opea/chatqna-conversation-ui:latest
sudo ctr -n k8s.io image import ui.tar

# install UI using helm chart. Replace image tag if required
cd
cd GenAIInfra/helm-charts/
helm install ui common/chatqna-ui --set BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna",DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep",image.tag="latest"

# expose the endpoint of UI for verification
kubectl port-forward svc/ui --address 0.0.0.0 5174:5174
export port=$(kubectl get service chatqna-nginx --output='jsonpath={.spec.ports[0].nodePort}')
echo $port
```

Access `http://localhost:5174` to play with the ChatQnA workload through UI.
Open a browser to access `http://<k8s-node-ip-address>:${port}` to play with the ChatQnA workload.

## Values

Expand Down
46 changes: 46 additions & 0 deletions helm-charts/chatqna/guardrails-gaudi-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

image:
repository: opea/chatqna-guardrails
tag: "latest"

# guardrails related config
guardrails-usvc:
enabled: true
SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"

# gaudi related config
tei:
image:
repository: ghcr.io/huggingface/tei-gaudi
tag: synapse_1.16
resources:
limits:
habana.ai/gaudi: 1
securityContext:
readOnlyRootFilesystem: false

tgi:
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.1"
resources:
limits:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""

tgi-guardrails:
LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.1"
resources:
limits:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
14 changes: 14 additions & 0 deletions helm-charts/chatqna/guardrails-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

image:
repository: opea/chatqna-guardrails
tag: "latest"

# guardrails related config
guardrails-usvc:
enabled: true
SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
tgi-guardrails:
LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
31 changes: 15 additions & 16 deletions helm-charts/chatqna/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ metadata:
name: {{ include "chatqna.fullname" . }}
labels:
{{- include "chatqna.labels" . | nindent 4 }}
app: {{ include "chatqna.fullname" . }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
{{- include "chatqna.selectorLabels" . | nindent 6 }}
app: {{ include "chatqna.fullname" . }}
template:
metadata:
{{- with .Values.podAnnotations }}
Expand All @@ -20,6 +22,7 @@ spec:
{{- end }}
labels:
{{- include "chatqna.selectorLabels" . | nindent 8 }}
app: {{ include "chatqna.fullname" . }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
Expand All @@ -38,6 +41,8 @@ spec:
value: {{ .Release.Name }}-retriever-usvc
- name: EMBEDDING_SERVICE_HOST_IP
value: {{ .Release.Name }}-embedding-usvc
- name: GUARDRAIL_SERVICE_HOST_IP
value: {{ .Release.Name }}-guardrails-usvc
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
Expand All @@ -49,22 +54,6 @@ spec:
- name: chatqna
containerPort: {{ .Values.port }}
protocol: TCP
# startupProbe:
# httpGet:
# host: {{ .Release.Name }}-llm-uservice
# port: {{ index .Values "llm-uservice" "service" "port" }}
# path: /
# initialDelaySeconds: 5
# periodSeconds: 5
# failureThreshold: 120
# livenessProbe:
# httpGet:
# path: /
# port: {{ .Values.port }}
# readinessProbe:
# httpGet:
# path: /
# port: {{ .Values.port }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumes:
Expand All @@ -82,3 +71,13 @@ spec:
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- if .Values.evenly_distributed }}
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
{{- include "chatqna.selectorLabels" . | nindent 14 }}
app: {{ include "chatqna.fullname" . }}
{{- end }}
Loading

0 comments on commit 5f606a9

Please sign in to comment.