Skip to content

Commit

Permalink
[TEST Framework] Add query http requests & openai sdk tests (#83)
Browse files Browse the repository at this point in the history
* first commit query_http

* fix format

* fix proxy

* add martix

* add openai test case

* change to githubci

* add

* add

* add

* add

* add

* add

* add

* add

* add

* add

* add

* add

* add

* add

* add

* github ci

* only gpt2

* fix openai

* fix openai

* fix openai

* fix openai

* fix openai

* fix openai

* fix openai

* fix openai

* fix openai

* fix openai

* f##

* change to github ci

* change to github ci

* change to github ci

* change to github ci

* change to github ci

* change to dare&docker

* change to dare&docker

* change to dare&docker

* change to dare&docker

* change to dare&docker

* change to docker

* change to docker

* change to docker

* ls

* ls

* test ls

* test ls

* test ls

* test ls

* test ls

* Organize code

* update openai

* fix openai

* fix ci

* fix ci

* fix openai

* fix openai

* add

* fix key

* remove checkoutpath

* remove checkoutpath

* remove checkoutpath

* fix checkout

* fix bash -c

* update req

* reduce req

* reduce code

* fix review229

* fix review229

* change os and req

* change os and req

* fix path

* docker python version

* docker python version

* docker python version

* docker python version

* change name

* after pr106 fix

* after pr106 fix

* after pr106 fix

* after pr106 fix

* after pr106 fix

* fix review

* fix lint

* fix lint
  • Loading branch information
yutianchen666 authored Mar 11, 2024
1 parent c5c076a commit bad9cb9
Show file tree
Hide file tree
Showing 9 changed files with 321 additions and 20 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/config/gpt2-ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
port: 8000
name: gpt2
route_prefix: /gpt2
cpus_per_worker: 2
gpus_per_worker: 0
deepspeed: false
workers_per_group: 2
device: CPU
ipex:
enabled: true
precision: bf16
model_description:
model_id_or_path: gpt2
tokenizer_name_or_path: gpt2
chat_processor: ChatModelGptJ
gpt_base_model: true
prompt:
intro: ''
human_id: ''
bot_id: ''
stop_words: []
99 changes: 90 additions & 9 deletions .github/workflows/workflow_tests.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
name: Tests

on:
workflow_call
workflow_call:
inputs:
ci_type:
type: string
default: 'pr'

jobs:
tests:
name: tests
runs-on: ubuntu-latest
bare-test:

name: bare-test
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11"]

runs-on: ubuntu-latest
defaults:
run:
shell: bash
Expand All @@ -19,17 +27,90 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
python-version: ${{matrix.python-version}}
architecture: 'x64'

- name: Display Python version
run: python -c "import sys; print(sys.version)"
run: |
python -c "import sys; print(sys.version)"
- name: Install dependencies
- name: Install dependencies for tests
run: |
python -m pip install --upgrade pip
pip install .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
# Dynamic link oneCCL and Intel MPI libraries
source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh
# Additional libraries required for pytest
pip install -r ./tests/requirements.txt
- name: Start Ray Cluster
run: |
ray start --head
- name: Run Tests
run: |
./tests/run-tests.sh
docker-test:

name: docker-test
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11"]

runs-on: ubuntu-latest

defaults:
run:
shell: bash

steps:
- name: Checkout
uses: actions/checkout@v2

- name: Determine Target
id: "target"
run: |
target="inference"
echo "target is ${target}"
echo "target=$target" >> $GITHUB_OUTPUT
- name: Build Docker Image
run: |
DF_SUFFIX=".tests_cpu_and_deepspeed"
TARGET=${{steps.target.outputs.target}}
docker build ./ --build-arg CACHEBUST=1 --build-arg python_v=${{matrix.python-version}} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${TARGET}:latest && yes | docker container prune && yes
docker image prune -f
- name: Start tests
- name: Start Docker Container
run: |
TARGET=${{steps.target.outputs.target}}
cid=$(docker ps -q --filter "name=${TARGET}")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
# check and remove exited container
cid=$(docker ps -a -q --filter "name=${TARGET}")
if [[ ! -z "$cid" ]]; then docker rm $cid; fi
docker ps -a
docker run -tid -v ${{ github.workspace }}:/root/llm-on-ray --name="${TARGET}" --hostname="${TARGET}-container" ${TARGET}:latest
- name: Install Dependencies for Tests
run: |
TARGET=${{steps.target.outputs.target}}
docker exec "${TARGET}" bash -c "pip install -r ./tests/requirements.txt"
- name: Start Ray Cluster
run: |
TARGET=${{steps.target.outputs.target}}
docker exec "${TARGET}" bash -c "./dev/scripts/start-ray-cluster.sh"
- name: Run Tests
run: |
TARGET=${{steps.target.outputs.target}}
docker exec "${TARGET}" bash -c "./tests/run-tests.sh"
- name: Stop Container
if: success() || failure()
run: |
bash -c "./tests/run-tests.sh"
TARGET=${{steps.target.outputs.target}}
cid=$(docker ps -q --filter "name=${TARGET}")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ repos:
- id: black

- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v0.950"
rev: "v0.981"
hooks:
- id: mypy
exclude: tests
Expand Down
43 changes: 43 additions & 0 deletions dev/docker/Dockerfile.tests_cpu_and_deepspeed
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# syntax=docker/dockerfile:1
FROM ubuntu:22.04

ARG python_v

ENV LANG C.UTF-8

WORKDIR /root/llm-on-ray

RUN --mount=type=cache,target=/var/cache/apt apt-get update -y \
&& apt-get install -y build-essential cmake wget curl git vim htop ssh net-tools \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

ENV CONDA_DIR /opt/conda
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
/bin/bash ~/miniconda.sh -b -p /opt/conda
ENV PATH $CONDA_DIR/bin:$PATH

# setup env
SHELL ["/bin/bash", "--login", "-c"]

RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \
unset -f conda && \
export PATH=$CONDA_DIR/bin/:${PATH} && \
conda config --add channels intel && \
conda install python==${python_v}

COPY ./pyproject.toml .
COPY ./MANIFEST.in .

# create llm_on_ray package directory to bypass the following 'pip install -e' command
RUN mkdir ./llm_on_ray

RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu,deepspeed] --extra-index-url https://download.pytorch.org/whl/cpu \
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/

RUN ds_report

# Used to invalidate docker build cache with --build-arg CACHEBUST=$(date +%s)
ARG CACHEBUST=1
COPY ./dev/scripts/install-oneapi.sh /tmp
RUN /tmp/install-oneapi.sh
73 changes: 73 additions & 0 deletions tests/inference/test_example_query_http_requests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import subprocess
import pytest
import os


def script_with_args(model_name, streaming_response, max_new_tokens, temperature, top_p):
current_path = os.path.dirname(os.path.abspath(__file__))

config_path = os.path.join(
current_path, "../../.github/workflows/config/" + model_name + "-ci.yaml"
)

os.path.join(current_path, "../../inference/serve.py")

cmd_serve = ["llm_on_ray-serve", "--config_file", config_path]

result_serve = subprocess.run(cmd_serve, capture_output=True, text=True)

# Print the output of subprocess.run for checking if output is expected
print(result_serve)

# Ensure there are no errors in the serve script execution
assert "Error" not in result_serve.stderr

example_http_path = os.path.join(
current_path, "../../examples/inference/api_server_openai/query_http_requests.py"
)

cmd_http = [
"python",
example_http_path,
"--model_name",
model_name,
]

if streaming_response:
cmd_http.append("--streaming_response")

if max_new_tokens is not None:
cmd_http.extend(["--max_new_tokens", str(max_new_tokens)])

if temperature is not None:
cmd_http.extend(["--temperature", str(temperature)])

if top_p is not None:
cmd_http.extend(["--top_p", str(top_p)])

result_http = subprocess.run(cmd_http, capture_output=True, text=True)

# Print the output of subprocess.run for checking if output is expected
print(result_http)

# Ensure there are no errors in the http query script execution
assert "Error" not in result_http.stderr

assert isinstance(result_http.stdout, str)

assert len(result_http.stdout) > 0


@pytest.mark.parametrize(
"model_name,streaming_response,max_new_tokens,temperature,top_p",
[
(model_name, streaming_response, max_new_tokens, temperature, top_p)
for model_name in ["gpt2"]
for streaming_response in [False, True]
for max_new_tokens in [None, 128]
for temperature in [None, 0.8]
for top_p in [None, 0.7]
],
)
def test_script(model_name, streaming_response, max_new_tokens, temperature, top_p):
script_with_args(model_name, streaming_response, max_new_tokens, temperature, top_p)
85 changes: 85 additions & 0 deletions tests/inference/test_example_query_openai_sdk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import subprocess
import pytest
import os

os.environ["no_proxy"] = "localhost,127.0.0.1"
os.environ["OPENAI_API_BASE"] = "http://localhost:8000/v1"
os.environ["OPENAI_API_KEY"] = "YOUR_OPEN_AI_KEY"
os.environ["OPENAI_BASE_URL"] = "http://localhost:8000/v1"


def script_with_args(api_base, model_name, streaming_response, max_new_tokens, temperature, top_p):
# Other OpenAI SDK tests
if api_base != "http://localhost:8000/v1":
os.environ["OPENAI_API_BASE"] = api_base
os.environ["OPENAI_BASE_URL"] = api_base

current_path = os.path.dirname(os.path.abspath(__file__))

config_path = os.path.join(
current_path, "../../.github/workflows/config/" + model_name + "-ci.yaml"
)

os.path.join(current_path, "../../inference/serve.py")

cmd_serve = ["llm_on_ray-serve", "--config_file", config_path]

result_serve = subprocess.run(cmd_serve, capture_output=True, text=True)

# Print the output of subprocess.run for checking if output is expected
print(result_serve)

# Ensure there are no errors in the serve script execution
assert "Error" not in result_serve.stderr

example_openai_path = os.path.join(
current_path, "../../examples/inference/api_server_openai/query_openai_sdk.py"
)

cmd_openai = [
"python",
example_openai_path,
"--model_name",
model_name,
]

if streaming_response:
cmd_openai.append("--streaming_response")

if max_new_tokens is not None:
cmd_openai.extend(["--max_new_tokens", str(max_new_tokens)])

if temperature is not None:
cmd_openai.extend(["--temperature", str(temperature)])

if top_p is not None:
cmd_openai.extend(["--top_p", str(top_p)])

result_openai = subprocess.run(cmd_openai, capture_output=True, text=True)

# Print the output of subprocess.run for checking if output is expected
print(result_openai)

# Ensure there are no errors in the OpenAI API query script execution
assert "Error" not in result_openai.stderr

assert isinstance(result_openai.stdout, str)

assert len(result_openai.stdout) > 0


# Parametrize the test function with different combinations of parameters
@pytest.mark.parametrize(
"api_base,model_name,streaming_response,max_new_tokens,temperature,top_p",
[
(api_base, model_name, streaming_response, max_new_tokens, temperature, top_p)
for api_base in ["http://localhost:8000/v1"]
for model_name in ["gpt2"]
for streaming_response in [False, True]
for max_new_tokens in [None, 128]
for temperature in [None, 0.8]
for top_p in [None, 0.7]
],
)
def test_script(api_base, model_name, streaming_response, max_new_tokens, temperature, top_p):
script_with_args(api_base, model_name, streaming_response, max_new_tokens, temperature, top_p)
4 changes: 2 additions & 2 deletions tests/inference/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import pytest
import torch

from inference.utils import (
from llm_on_ray.inference.utils import (
get_deployment_actor_options,
StoppingCriteriaSub,
max_input_len,
get_torch_dtype,
is_cpu_without_ipex,
)
from inference_config import InferenceConfig, DEVICE_CPU
from llm_on_ray.inference.inference_config import InferenceConfig, DEVICE_CPU


# Mock the InferenceConfig for testing
Expand Down
10 changes: 3 additions & 7 deletions tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
pytest==7.4.4
torch==2.1.0
transformers==4.36.0
starlette==0.36.2
pydantic==1.10.13
pydantic-yaml==1.2.0
pydantic_core==2.14.5
pytest
openai
async-timeout
4 changes: 3 additions & 1 deletion tests/run-tests.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#!/bin/bash
set -eo pipefail
cd $(dirname $0)


# Run pytest with the test file
pytest -vs ./inference
pytest -vv --capture=tee-sys --show-capture=all ./inference

echo "Pytest finished running tests."

0 comments on commit bad9cb9

Please sign in to comment.