diff --git a/README.md b/README.md index d4c8460b..08bbc1d5 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,17 @@ client(hls_url="http://as-hls-ww-live.akamaized.net/pool_904/live/ww/bbc_1xtra/b docker run -it --gpus all -p 9090:9090 ghcr.io/collabora/whisperlive-gpu:latest ``` - - TensorRT. Follow [TensorRT_whisper readme](https://github.com/collabora/WhisperLive/blob/main/TensorRT_whisper.md) in order to setup docker and use TensorRT backend. We provide a pre-built docker image which has TensorRT-LLM built and ready to use. + - TensorRT. Follow [TensorRT_whisper readme](https://github.com/collabora/WhisperLive/blob/main/TensorRT_whisper.md). + ``` + mkdir docker/scratch-space + cp docker/scripts/build-whisper-tensorrt.sh docker/scratch-space + cp docker/scripts/run-whisperlive.sh docker/scratch-space + + # For e.g. 3090 RTX cuda architecture is 86-real + CUDA_ARCH=86-real docker compose build + + MODEL_SIZE=small.en BACKEND=tensorrt docker compose up + ``` - CPU ```bash diff --git a/TensorRT_whisper.md b/TensorRT_whisper.md index 1bc303f0..fa2d04f1 100644 --- a/TensorRT_whisper.md +++ b/TensorRT_whisper.md @@ -12,56 +12,19 @@ git clone https://github.com/collabora/WhisperLive.git cd WhisperLive ``` -- Pull the TensorRT-LLM docker image which we prebuilt for WhisperLive TensorRT backend. -```bash -docker pull ghcr.io/collabora/whisperbot-base:latest +- Build docker image for the gpu architecture. By default the image is built for 4090 i.e. `CUDA_ARCH=89-real;90-real` ``` +mkdir docker/scratch-space +cp docker/scripts/build-whisper-tensorrt.sh docker/scratch-space +cp docker/scripts/run-whisperlive.sh docker/scratch-space -- Next, we run the docker image and mount WhisperLive repo to the containers `/home` directory. -```bash -docker run -it --gpus all --shm-size=8g \ - --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 \ - -p 9090:9090 -v /path/to/WhisperLive:/home/WhisperLive \ - ghcr.io/collabora/whisperbot-base:latest -``` - -- Make sure to test the installation. -```bash -# export ENV=${ENV:-/etc/shinit_v2} -# source $ENV -python -c "import torch; import tensorrt; import tensorrt_llm" -``` -**NOTE**: Uncomment and update library paths if imports fail. - -## Whisper TensorRT Engine -- We build `small.en` and `small` multilingual TensorRT engine. The script logs the path of the directory with Whisper TensorRT engine. We need the model_path to run the server. -```bash -# convert small.en -bash scripts/build_whisper_tensorrt.sh /root/TensorRT-LLM-examples small.en - -# convert small multilingual model -bash scripts/build_whisper_tensorrt.sh /root/TensorRT-LLM-examples small +# For e.g. 3090 RTX cuda architecture is 86-real +CUDA_ARCH=86-real docker compose build ``` ## Run WhisperLive Server with TensorRT Backend +We run the container with docker compose which builds the tensorrt engine for specified model +if it doesnt exist already in the mounted volume `docker/scratch-space`. Optionally, if you want to run `faster_whisper` backend use `BACKEND=faster_whisper` ```bash -cd /home/WhisperLive - -# Install requirements -apt update && bash scripts/setup.sh -pip install -r requirements/server.txt - -# Required to create mel spectogram -wget --directory-prefix=assets assets/mel_filters.npz https://raw.githubusercontent.com/openai/whisper/main/whisper/assets/mel_filters.npz - -# Run English only model -python3 run_server.py --port 9090 \ - --backend tensorrt \ - --trt_model_path "path/to/whisper_trt/from/build/step" - -# Run Multilingual model -python3 run_server.py --port 9090 \ - --backend tensorrt \ - --trt_model_path "path/to/whisper_trt/from/build/step" \ - --trt_multilingual +MODEL_SIZE=small.en BACKEND=tensorrt docker compose up ``` diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..8d5b8a5a --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,26 @@ +version: '3.8' + +services: + whisperlive-tensorrt: + build: + context: docker + dockerfile: Dockerfile.tensorrt + args: + CUDA_ARCH: ${CUDA_ARCH:-89-real;90-real} + image: whisperlive-tensorrt:latest + volumes: + - type: bind + source: ./docker/scratch-space + target: /root/scratch-space + environment: + VERBOSE: ${VERBOSE:-false} + MODEL_SIZE: ${MODEL_SIZE:-small.en} + BACKEND: ${BACKEND:-tensorrt} + ports: + - "8000:9090" + deploy: + resources: + reservations: + devices: + - capabilities: ["gpu"] + entrypoint: ["/bin/bash", "-c", "/root/scratch-space/run-whisperlive.sh $$MODEL_SIZE $$BACKEND"] diff --git a/docker/Dockerfile.tensorrt b/docker/Dockerfile.tensorrt new file mode 100644 index 00000000..10406268 --- /dev/null +++ b/docker/Dockerfile.tensorrt @@ -0,0 +1,20 @@ +ARG BASE_IMAGE=nvcr.io/nvidia/cuda +ARG BASE_TAG=12.2.2-devel-ubuntu22.04 + +FROM ${BASE_IMAGE}:${BASE_TAG} as base +ARG CUDA_ARCH +ENV CUDA_ARCH=${CUDA_ARCH} + +FROM base as devel +WORKDIR /root +COPY scripts/install-deps.sh /root +RUN bash install-deps.sh && rm install-deps.sh +COPY scripts/build-trt-llm.sh /root +RUN bash build-trt-llm.sh && rm build-trt-llm.sh + +FROM devel as release +WORKDIR /root/ +COPY scripts/install-trt-llm.sh /root +RUN bash install-trt-llm.sh && rm install-trt-llm.sh +COPY scripts/setup-whisperlive.sh /root/ +RUN ./setup-whisperlive.sh diff --git a/docker/scripts/build-trt-llm.sh b/docker/scripts/build-trt-llm.sh new file mode 100644 index 00000000..88d6743d --- /dev/null +++ b/docker/scripts/build-trt-llm.sh @@ -0,0 +1,9 @@ +#!/bin/bash -e + +export ENV=${ENV:-/etc/shinit_v2} +source $ENV + +CUDA_ARCH="${CUDA_ARCH:-89-real;90-real}" + +cd /root/TensorRT-LLM +python3 scripts/build_wheel.py --clean --cuda_architectures "$CUDA_ARCH" --trt_root /usr/local/tensorrt diff --git a/scripts/build_whisper_tensorrt.sh b/docker/scripts/build-whisper-tensorrt.sh old mode 100644 new mode 100755 similarity index 79% rename from scripts/build_whisper_tensorrt.sh rename to docker/scripts/build-whisper-tensorrt.sh index 98248039..2e25aec4 --- a/scripts/build_whisper_tensorrt.sh +++ b/docker/scripts/build-whisper-tensorrt.sh @@ -48,19 +48,21 @@ download_and_build_model() { # wget --directory-prefix=assets "$model_url" # echo "Download completed: ${model_name}.pt" if [ ! -f "assets/${model_name}.pt" ]; then - wget --directory-prefix=assets "$model_url" + wget --directory-prefix=assets "$model_url" > /dev/null 2>&1 echo "Download completed: ${model_name}.pt" else echo "${model_name}.pt already exists in assets directory." fi local output_dir="whisper_${model_name//./_}" - echo "$output_dir" - echo "Running build script for $model_name with output directory $output_dir" - python3 build.py --output_dir "$output_dir" --use_gpt_attention_plugin --use_gemm_plugin --use_bert_attention_plugin --model_name "$model_name" + echo "Running TensorRT-LLM build script for $model_name with output directory $output_dir" + python3 build.py --output_dir "$output_dir" --use_gpt_attention_plugin --use_gemm_plugin --use_bert_attention_plugin --model_name "$model_name" > /dev/null 2>&1 echo "Whisper $model_name TensorRT engine built." echo "=========================================" echo "Model is located at: $(pwd)/$output_dir" + mkdir -p /root/scratch-space/models + cp -r $output_dir /root/scratch-space/models + } if [ "$#" -lt 1 ]; then @@ -70,8 +72,15 @@ fi tensorrt_examples_dir="$1" model_name="${2:-small.en}" +output_dir="whisper_${model_name//./_}" -cd $1/whisper -pip install --no-deps -r requirements.txt +if [ ! -d "/root/scratch-space/models/$output_dir" ] || [ -z "$(ls -A /root/scratch-space/models/$output_dir)" ]; then + echo "$output_dir directory does not exist or is empty. Building whisper" + cd $1/whisper + echo "Installing requirements for Whisper TensorRT-LLM ..." + pip install --no-deps -r requirements.txt > /dev/null 2>&1 + download_and_build_model "$output_dir" +else + echo "$output_dir directory exists and is not empty. Skipping build-whisper..." +fi -download_and_build_model "$model_name" diff --git a/docker/scripts/install-deps.sh b/docker/scripts/install-deps.sh new file mode 100755 index 00000000..8baad6aa --- /dev/null +++ b/docker/scripts/install-deps.sh @@ -0,0 +1,54 @@ +#!/bin/bash -e + +apt-get update && apt-get -y install git git-lfs +git clone --depth=1 -b cuda12.2 https://github.com/makaveli10/TensorRT-LLM.git +cd TensorRT-LLM +git checkout main +git submodule update --init --recursive +git lfs install +git lfs pull + +# do not reinstall CUDA (our base image provides the same exact versions) +patch -p1 <