Skip to content

Commit

Permalink
Add UDS bundles and README Updates (#322)
Browse files Browse the repository at this point in the history
* add uds bundles for local package deployment
* update README and Makefile targets
* add uds latest targets and move dir
* add local dev instructions to README.md
* update e2e test workflow
  • Loading branch information
jalling97 authored Apr 8, 2024
1 parent a5d98b2 commit 2bf1974
Show file tree
Hide file tree
Showing 15 changed files with 478 additions and 26 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ jobs:
run: |
make build-api LOCAL_VERSION=e2e-test
docker image prune -af
uds zarf package deploy zarf-package-leapfrogai-api-amd64-e2e-test.tar.zst --confirm
rm zarf-package-leapfrogai-api-amd64-e2e-test.tar.zst
uds zarf package deploy packages/api/zarf-package-leapfrogai-api-amd64-e2e-test.tar.zst --confirm
rm packages/api/zarf-package-leapfrogai-api-amd64-e2e-test.tar.zst
##########
# llama
Expand All @@ -72,8 +72,8 @@ jobs:
run: |
make build-llama-cpp-python LOCAL_VERSION=e2e-test
docker image prune -af
uds zarf package deploy zarf-package-llama-cpp-python-amd64-e2e-test.tar.zst -l=trace --confirm
rm zarf-package-llama-cpp-python-amd64-e2e-test.tar.zst
uds zarf package deploy packages/llama-cpp-python/zarf-package-llama-cpp-python-amd64-e2e-test.tar.zst -l=trace --confirm
rm packages/llama-cpp-python/zarf-package-llama-cpp-python-amd64-e2e-test.tar.zst
- name: Test llama-cpp-python
run: |
Expand All @@ -90,8 +90,8 @@ jobs:
run: |
make build-text-embeddings LOCAL_VERSION=e2e-test
docker image prune -af
uds zarf package deploy zarf-package-text-embeddings-amd64-e2e-test.tar.zst -l=trace --confirm
rm zarf-package-text-embeddings-amd64-e2e-test.tar.zst
uds zarf package deploy packages/text-embeddings/zarf-package-text-embeddings-amd64-e2e-test.tar.zst -l=trace --confirm
rm packages/text-embeddings/zarf-package-text-embeddings-amd64-e2e-test.tar.zst
- name: Test text-embeddings
run: |
Expand All @@ -108,8 +108,8 @@ jobs:
run: |
make build-whisper LOCAL_VERSION=e2e-test
docker image prune -af
uds zarf package deploy zarf-package-whisper-amd64-e2e-test.tar.zst -l=trace --confirm
rm zarf-package-whisper-amd64-e2e-test.tar.zst
uds zarf package deploy packages/whisper/zarf-package-whisper-amd64-e2e-test.tar.zst -l=trace --confirm
rm packages/whisper/zarf-package-whisper-amd64-e2e-test.tar.zst
- name: Test whisper
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ tokenizer.json
config.json
generation_config.json
vocabulary.json
.model/

# go binaries
api/main
25 changes: 15 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ KEY ?= ""

VERSION ?= $(shell git describe --abbrev=0 --tags)
LOCAL_VERSION ?= $(shell git rev-parse --short HEAD)

######################################################################################

.PHONY: help
Expand Down Expand Up @@ -49,14 +48,14 @@ build-api: local-registry setup-api-deps ## Build the leapfrogai_api container a
docker push localhost:5000/defenseunicorns/leapfrogai/leapfrogai-api:${LOCAL_VERSION}

## Build the Zarf package
uds zarf package create packages/api --registry-override=ghcr.io=localhost:5000 --insecure --set LEAPFROGAI_IMAGE_VERSION=${LOCAL_VERSION} --confirm
uds zarf package create packages/api -o packages/api --registry-override=ghcr.io=localhost:5000 --insecure --set LEAPFROGAI_IMAGE_VERSION=${LOCAL_VERSION} --confirm


setup-llama-deps: ## Download the wheels for the optional 'llama' dependencies
-rm packages/llama/build/*.whl
setup-llama-cpp-python-deps: ## Download the wheels for the optional 'llama-cpp-python' dependencies
-rm packages/llama-cpp-python/build/*.whl
python -m pip wheel ".[llama-cpp-python]" -w packages/llama-cpp-python/build

build-llama-cpp-python: local-registry setup-llama-deps ## Build the llama-cpp-python (cpu) container and Zarf package
build-llama-cpp-python: local-registry setup-llama-cpp-python-deps ## Build the llama-cpp-python (cpu) container and Zarf package
## Build the image (and tag it for the local registry)
docker build -t ghcr.io/defenseunicorns/leapfrogai/llama-cpp-python:${LOCAL_VERSION} packages/llama-cpp-python
docker tag ghcr.io/defenseunicorns/leapfrogai/llama-cpp-python:${LOCAL_VERSION} localhost:5000/defenseunicorns/leapfrogai/llama-cpp-python:${LOCAL_VERSION}
Expand All @@ -65,14 +64,14 @@ build-llama-cpp-python: local-registry setup-llama-deps ## Build the llama-cpp-p
docker push localhost:5000/defenseunicorns/leapfrogai/llama-cpp-python:${LOCAL_VERSION}

## Build the Zarf package
uds zarf package create packages/llama-cpp-python --registry-override=ghcr.io=localhost:5000 --insecure --set IMAGE_VERSION=${LOCAL_VERSION} --confirm
uds zarf package create packages/llama-cpp-python -o packages/llama-cpp-python --registry-override=ghcr.io=localhost:5000 --insecure --set IMAGE_VERSION=${LOCAL_VERSION} --confirm


setup-vllm-deps: ## Download the wheels for the optional 'vllm' dependencies
-rm packages/vllm/build/*.whl
python -m pip wheel ".[vllm]" -w packages/vllm/build

build-vllm: local-registry setup-vllm-deps
build-vllm: local-registry setup-vllm-deps ## Build the vllm container and Zarf package
## Build the image (and tag it for the local registry)
docker build -t ghcr.io/defenseunicorns/leapfrogai/vllm:${LOCAL_VERSION} packages/vllm
docker tag ghcr.io/defenseunicorns/leapfrogai/vllm:${LOCAL_VERSION} localhost:5000/defenseunicorns/leapfrogai/vllm:${LOCAL_VERSION}
Expand All @@ -81,7 +80,7 @@ build-vllm: local-registry setup-vllm-deps
docker push localhost:5000/defenseunicorns/leapfrogai/vllm:${LOCAL_VERSION}

## Build the Zarf package
uds zarf package create packages/vllm --registry-override=ghcr.io=localhost:5000 --insecure --set IMAGE_VERSION=${LOCAL_VERSION} --confirm
uds zarf package create packages/vllm -o packages/vllm --registry-override=ghcr.io=localhost:5000 --insecure --set IMAGE_VERSION=${LOCAL_VERSION} --confirm


setup-text-embeddings-deps: ## Download the wheels for the optional 'text-embeddings' dependencies
Expand All @@ -97,7 +96,7 @@ build-text-embeddings: local-registry setup-text-embeddings-deps ## Build the te
docker push localhost:5000/defenseunicorns/leapfrogai/text-embeddings:${LOCAL_VERSION}

## Build the Zarf package
uds zarf package create packages/text-embeddings --registry-override=ghcr.io=localhost:5000 --insecure --set IMAGE_VERSION=${LOCAL_VERSION} --confirm
uds zarf package create packages/text-embeddings -o packages/text-embeddings --registry-override=ghcr.io=localhost:5000 --insecure --set IMAGE_VERSION=${LOCAL_VERSION} --confirm


setup-whisper-deps: ## Download the wheels for the optional 'whisper' dependencies
Expand All @@ -113,4 +112,10 @@ build-whisper: local-registry setup-whisper-deps ## Build the whisper container
docker push localhost:5000/defenseunicorns/leapfrogai/whisper:${LOCAL_VERSION}

## Build the Zarf package
uds zarf package create packages/whisper --registry-override=ghcr.io=localhost:5000 --insecure --set IMAGE_VERSION=${LOCAL_VERSION} --confirm
uds zarf package create packages/whisper -o packages/whisper --registry-override=ghcr.io=localhost:5000 --insecure --set IMAGE_VERSION=${LOCAL_VERSION} --confirm

build-cpu: build-api build-llama-cpp-python build-text-embeddings build-whisper ## Build all zarf packages for a cpu-enabled deployment of LFAI

build-gpu: build-api build-vllm build-text-embeddings build-whisper ## Build all zarf packages for a gpu-enabled deployment of LFAI

build-all: build-api build-llama-cpp-python build-vllm build-text-embeddings build-whisper ## Build all of the LFAI packages
150 changes: 149 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@
- [Table of Contents](#table-of-contents)
- [Overview](#overview)
- [Why Host Your Own LLM?](#why-host-your-own-llm)
- [Structure](#structure)
- [Getting Started](#getting-started)
- [Components](#components)
- [API](#api)
- [Backends](#backends)
- [Image Hardening](#image-hardening)
- [SDK](#sdk)
- [User Interface](#user-interface)
- [Usage](#usage)
- [UDS (Latest)](#uds-latest)
- [UDS (Dev)](#uds-dev)
- [Local Dev](#local-dev)
- [Community](#community)

## Overview
Expand All @@ -31,9 +36,34 @@ Large Language Models (LLMs) are a powerful resource for AI-driven decision maki

- **Mission Integration**: By hosting your own LLM, you have the ability to customize the model's parameters, training data, and more, tailoring the AI to your specific needs.

## Structure

The LeapfrogAI repository follows a monorepo structure based around an [API](#api) with each of the [components](#components) included in a dedicated `packages` directory. Each of these package directories contains the source code for each component as well as the deployment infrastructure. The UDS bundles that handle the development and latest deployments of LeapfrogAI are in the `uds-bundles` directory. The structure looks as follows:

```
leapfrogai/
├── src/
│ └── leapfrogai_api/
│ ├── main.py
│ └── ...
├── packages/
│ ├── api/
│ ├── llama-cpp-python/
│ ├── text-embeddings/
│ ├── vllm/
│ └── whisper/
├── uds-bundles/
│ ├── dev/
│ └── latest/
├── Makefile
├── pyproject.toml
├── README.md
└── ...
```

## Getting Started

The preferred method for running LeapfrogAI is a local [Kubernetes](https://kubernetes.io/) deployment using [UDS](https://github.com/defenseunicorns/uds-core). Simple instructions for this type of deployment can be found on the [LeapfrogAI Documentation Site](https://docs.leapfrog.ai/docs/).
The preferred method for running LeapfrogAI is a local [Kubernetes](https://kubernetes.io/) deployment using [UDS](https://github.com/defenseunicorns/uds-core). Refer to the [Quick Start](https://docs.leapfrog.ai/docs/local-deploy-guide/quick_start/) section of the LeapfrogAI documentation site for instructions on this type of deployment.

## Components

Expand Down Expand Up @@ -74,6 +104,124 @@ The LeapfrogAI SDK provides a standard set of protobuff and python utilities for
LeapfrogAI provides some options of UI to get started with common use-cases such as chat, summarization, and transcription.

## Usage

### UDS (Latest)

LeapfrogAI can be deployed and run locally via UDS and Kubernetes, built out using [Zarf](https://zarf.dev) packages. This pulls the most recent package images and is the most stable way of running a local LeapfrogAI deployment. These instructions can be found on the [LeapfrogAI Docs](https://docs.leapfrog.ai/docs/) site.

### UDS (Dev)

If you want to make some changes to LeapfrogAI before deploying via UDS (for example in a dev environment), you can follow these instructions:

Make sure your system has the [required dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/quick_start/#prerequisites).

For ease, it's best to create a virtual environment:
```
python -m venv .venv
source .venv/bin/activate
```

Each component is built into its own Zarf package. This can be done easily using the provided `Make` targets:
```
make build-api
make build-vllm # if you have GPUs
make build-llama-cpp-python # if you have CPU only
make build-text-embeddings
make build-whisper
```
**OR**

You can build all of the packages you need at once with the following make targets:

```
make build-cpu # api, llama-cpp-python, text-embeddings, whisper
make build-gpu # api, vllm, text-embeddings, whisper
make build-all # all of the backends
```

Once the packages are created, you can deploy either a CPU or GPU-enabled deployment via one of the UDS bundles:

#### CPU
```
cd uds-bundles/dev/cpu
uds create .
uds deploy k3d-core-slim-dev:0.18.0
uds deploy uds-bundle-leapfrogai*.tar.zst
```

#### GPU
```
cd uds-bundles/dev/gpu
uds create .
uds deploy k3d-core-slim-dev:0.18.0 --set K3D_EXTRA_ARGS="--gpus=all --image=ghcr.io/justinthelaw/k3d-gpu-support:v1.27.4-k3s1-cuda" # be sure to check if a newer version exists
uds deploy uds-bundle-leapfrogai-*.tar.zst --confirm
```

### Local Dev

The following instructions are for running each of the LFAI components for local development. This is useful when testing changes to a specific component, but will not assist in a full deployment of LeapfrogAI. Please refer to the above sections for deployment instructions.

It is highly recommended to make a virtual environment to keep the development environment clean:

```
python -m venv .venv
source .venv/bin/activate
```

#### API

To run the LeapfrogAI API locally (starting from the root directory of the repository):

```
python -m pip install ".[api,dev]"
cd src
uvicorn leapfrogai_api.main:app --port 3000 --reload
```

#### Backend: llama-cpp-python

To run the llama-cpp-python backend locally (starting from the root directory of the repository):

```
python -m pip install ".[llama-cpp-python,dev]"
cd packages/llama-cpp-python
python scripts/model_download.py
mv .model/*.gguf .model/model.gguf
python -m leapfrogai_api.types.cli --app-dir=. main:Model
```

#### Backend: text-embeddings
To run the text-embeddings backend locally (starting from the root directory of the repository):

```
python -m pip install ".[text-embeddings,dev]"
cd packages/text-embeddings
python scripts/model_download.py
python -u main.py
```

#### Backend: vllm
To run the vllm backend locally (starting from the root directory of the repository):

```
python -m pip install ".[vllm,dev]"
cd packages/vllm
python scripts/model_download.py
export QUANTIZATION=awq
python -m leapfrogai_api.types.cli --app-dir=. main:Model
```

#### Backend: whisper
To run the vllm backend locally (starting from the root directory of the repository):

```
python -m pip install ".[whisper,dev]"
cd packages/whisper
ct2-transformers-converter --model openai/whisper-base --output_dir .model --copy_files tokenizer.json --quantization float32
python -u main.py
```

## Community

LeapfrogAI is supported by a community of users and contributors, including:
Expand Down
1 change: 0 additions & 1 deletion packages/vllm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ COPY build/*.whl build/
COPY build/leapfrogai_api*.whl leapfrogai_api-100.100.100-py3-none-any.whl
RUN pip install "leapfrogai_api-100.100.100-py3-none-any.whl[vllm]" --no-index --find-links=build/


# download model
ARG REPO_ID=TheBloke/Synthia-7B-v2.0-AWQ
ARG REVISION=main
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ text-embeddings = [

whisper = [
"faster-whisper == 0.10.0",
"ctranslate2 >= 4.1.0"
]

e2e-test = [
Expand Down
39 changes: 39 additions & 0 deletions uds-bundles/dev/cpu/uds-bundle.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
kind: UDSBundle
metadata:
name: leapfrogai
description: A UDS bundle for deploying LeapfrogAI with CPU-only support
version: dev

packages:
# API
- name: leapfrogai-api
path: ../../../packages/api/
ref: dev

# Legacy UI - not currently in this repo so it builds from ghcr
- name: leapfrogai-ui
repository: ghcr.io/defenseunicorns/packages/leapfrogai/leapfrogai-ui
ref: 0.3.5

# Chat Model
- name: llama-cpp-python
path: ../../../packages/llama-cpp-python/
ref: dev

# Text Embeddings Model
- name: text-embeddings
path: ../../../packages/text-embeddings/
ref: dev

# RAG Backend - not currently in this repo so it builds from ghcr
- name: rag
repository: ghcr.io/defenseunicorns/packages/leapfrogai/rag
ref: 0.3.1

# Transcription Model
- name: whisper
path: ../../../packages/whisper/
ref: dev



26 changes: 26 additions & 0 deletions uds-bundles/dev/cpu/uds-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
variables:
leapfrogai-ui:
domain: https://ai.uds.dev
model: llama-cpp-python
concurrent_requests: "false"
ai4ns_branding: "false"
leapfrogai_rag_url: http://rag.leapfrogai.svc.cluster.local:8000
max_tokens: 8192

text-embeddings:
gpu_limit: 0

whisper:
gpu_limit: 0

rag:
model: llama-cpp-python
ssl_verification: "false" # if certs exist in-cluster, make true
response_mode: "raw" # default mode for query endpoint
temperature: 0 # refine method temperature for vllm
max_output: 2048
context_window: 4096
chunk_size: 512
overlap_size: 64
embedding_model_name: text-embeddings
top_k: 20
Loading

0 comments on commit 2bf1974

Please sign in to comment.