diff --git a/.github/workflows/config/gpt2-ci.yaml b/.github/workflows/config/gpt2-ci.yaml new file mode 100644 index 000000000..57913874e --- /dev/null +++ b/.github/workflows/config/gpt2-ci.yaml @@ -0,0 +1,21 @@ +port: 8000 +name: gpt2 +route_prefix: /gpt2 +cpus_per_worker: 2 +gpus_per_worker: 0 +deepspeed: false +workers_per_group: 2 +device: CPU +ipex: + enabled: true + precision: bf16 +model_description: + model_id_or_path: gpt2 + tokenizer_name_or_path: gpt2 + chat_processor: ChatModelGptJ + gpt_base_model: true + prompt: + intro: '' + human_id: '' + bot_id: '' + stop_words: [] diff --git a/.github/workflows/workflow_tests.yml b/.github/workflows/workflow_tests.yml index 19f398952..63a5fd80b 100644 --- a/.github/workflows/workflow_tests.yml +++ b/.github/workflows/workflow_tests.yml @@ -1,13 +1,21 @@ name: Tests on: - workflow_call + workflow_call: + inputs: + ci_type: + type: string + default: 'pr' jobs: - tests: - name: tests - runs-on: ubuntu-latest + bare-test: + + name: bare-test + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11"] + runs-on: ubuntu-latest defaults: run: shell: bash @@ -19,17 +27,90 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.9' + python-version: ${{matrix.python-version}} architecture: 'x64' - name: Display Python version - run: python -c "import sys; print(sys.version)" + run: | + python -c "import sys; print(sys.version)" - - name: Install dependencies + - name: Install dependencies for tests run: | python -m pip install --upgrade pip + pip install .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ + # Dynamic link oneCCL and Intel MPI libraries + source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh + # Additional libraries required for pytest pip install -r ./tests/requirements.txt + + - name: Start Ray Cluster + run: | + ray start --head + + - name: Run Tests + run: | + ./tests/run-tests.sh + + docker-test: + + name: docker-test + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11"] + + runs-on: ubuntu-latest + + defaults: + run: + shell: bash + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Determine Target + id: "target" + run: | + target="inference" + echo "target is ${target}" + echo "target=$target" >> $GITHUB_OUTPUT + + - name: Build Docker Image + run: | + DF_SUFFIX=".tests_cpu_and_deepspeed" + TARGET=${{steps.target.outputs.target}} + docker build ./ --build-arg CACHEBUST=1 --build-arg python_v=${{matrix.python-version}} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${TARGET}:latest && yes | docker container prune && yes + docker image prune -f - - name: Start tests + - name: Start Docker Container + run: | + TARGET=${{steps.target.outputs.target}} + cid=$(docker ps -q --filter "name=${TARGET}") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi + # check and remove exited container + cid=$(docker ps -a -q --filter "name=${TARGET}") + if [[ ! -z "$cid" ]]; then docker rm $cid; fi + docker ps -a + docker run -tid -v ${{ github.workspace }}:/root/llm-on-ray --name="${TARGET}" --hostname="${TARGET}-container" ${TARGET}:latest + + - name: Install Dependencies for Tests + run: | + TARGET=${{steps.target.outputs.target}} + docker exec "${TARGET}" bash -c "pip install -r ./tests/requirements.txt" + + - name: Start Ray Cluster + run: | + TARGET=${{steps.target.outputs.target}} + docker exec "${TARGET}" bash -c "./dev/scripts/start-ray-cluster.sh" + + - name: Run Tests + run: | + TARGET=${{steps.target.outputs.target}} + docker exec "${TARGET}" bash -c "./tests/run-tests.sh" + + - name: Stop Container + if: success() || failure() run: | - bash -c "./tests/run-tests.sh" + TARGET=${{steps.target.outputs.target}} + cid=$(docker ps -q --filter "name=${TARGET}") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c539326c1..5e56e52a6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - id: black - repo: https://github.com/pre-commit/mirrors-mypy - rev: "v0.950" + rev: "v0.981" hooks: - id: mypy exclude: tests diff --git a/dev/docker/Dockerfile.tests_cpu_and_deepspeed b/dev/docker/Dockerfile.tests_cpu_and_deepspeed new file mode 100644 index 000000000..4d159225d --- /dev/null +++ b/dev/docker/Dockerfile.tests_cpu_and_deepspeed @@ -0,0 +1,43 @@ +# syntax=docker/dockerfile:1 +FROM ubuntu:22.04 + +ARG python_v + +ENV LANG C.UTF-8 + +WORKDIR /root/llm-on-ray + +RUN --mount=type=cache,target=/var/cache/apt apt-get update -y \ + && apt-get install -y build-essential cmake wget curl git vim htop ssh net-tools \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +ENV CONDA_DIR /opt/conda +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p /opt/conda +ENV PATH $CONDA_DIR/bin:$PATH + +# setup env +SHELL ["/bin/bash", "--login", "-c"] + +RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \ + unset -f conda && \ + export PATH=$CONDA_DIR/bin/:${PATH} && \ + conda config --add channels intel && \ + conda install python==${python_v} + +COPY ./pyproject.toml . +COPY ./MANIFEST.in . + +# create llm_on_ray package directory to bypass the following 'pip install -e' command +RUN mkdir ./llm_on_ray + +RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu,deepspeed] --extra-index-url https://download.pytorch.org/whl/cpu \ + --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ + +RUN ds_report + +# Used to invalidate docker build cache with --build-arg CACHEBUST=$(date +%s) +ARG CACHEBUST=1 +COPY ./dev/scripts/install-oneapi.sh /tmp +RUN /tmp/install-oneapi.sh diff --git a/tests/inference/test_example_query_http_requests.py b/tests/inference/test_example_query_http_requests.py new file mode 100644 index 000000000..d8d5a168c --- /dev/null +++ b/tests/inference/test_example_query_http_requests.py @@ -0,0 +1,73 @@ +import subprocess +import pytest +import os + + +def script_with_args(model_name, streaming_response, max_new_tokens, temperature, top_p): + current_path = os.path.dirname(os.path.abspath(__file__)) + + config_path = os.path.join( + current_path, "../../.github/workflows/config/" + model_name + "-ci.yaml" + ) + + os.path.join(current_path, "../../inference/serve.py") + + cmd_serve = ["llm_on_ray-serve", "--config_file", config_path] + + result_serve = subprocess.run(cmd_serve, capture_output=True, text=True) + + # Print the output of subprocess.run for checking if output is expected + print(result_serve) + + # Ensure there are no errors in the serve script execution + assert "Error" not in result_serve.stderr + + example_http_path = os.path.join( + current_path, "../../examples/inference/api_server_openai/query_http_requests.py" + ) + + cmd_http = [ + "python", + example_http_path, + "--model_name", + model_name, + ] + + if streaming_response: + cmd_http.append("--streaming_response") + + if max_new_tokens is not None: + cmd_http.extend(["--max_new_tokens", str(max_new_tokens)]) + + if temperature is not None: + cmd_http.extend(["--temperature", str(temperature)]) + + if top_p is not None: + cmd_http.extend(["--top_p", str(top_p)]) + + result_http = subprocess.run(cmd_http, capture_output=True, text=True) + + # Print the output of subprocess.run for checking if output is expected + print(result_http) + + # Ensure there are no errors in the http query script execution + assert "Error" not in result_http.stderr + + assert isinstance(result_http.stdout, str) + + assert len(result_http.stdout) > 0 + + +@pytest.mark.parametrize( + "model_name,streaming_response,max_new_tokens,temperature,top_p", + [ + (model_name, streaming_response, max_new_tokens, temperature, top_p) + for model_name in ["gpt2"] + for streaming_response in [False, True] + for max_new_tokens in [None, 128] + for temperature in [None, 0.8] + for top_p in [None, 0.7] + ], +) +def test_script(model_name, streaming_response, max_new_tokens, temperature, top_p): + script_with_args(model_name, streaming_response, max_new_tokens, temperature, top_p) diff --git a/tests/inference/test_example_query_openai_sdk.py b/tests/inference/test_example_query_openai_sdk.py new file mode 100644 index 000000000..24dc50e31 --- /dev/null +++ b/tests/inference/test_example_query_openai_sdk.py @@ -0,0 +1,85 @@ +import subprocess +import pytest +import os + +os.environ["no_proxy"] = "localhost,127.0.0.1" +os.environ["OPENAI_API_BASE"] = "http://localhost:8000/v1" +os.environ["OPENAI_API_KEY"] = "YOUR_OPEN_AI_KEY" +os.environ["OPENAI_BASE_URL"] = "http://localhost:8000/v1" + + +def script_with_args(api_base, model_name, streaming_response, max_new_tokens, temperature, top_p): + # Other OpenAI SDK tests + if api_base != "http://localhost:8000/v1": + os.environ["OPENAI_API_BASE"] = api_base + os.environ["OPENAI_BASE_URL"] = api_base + + current_path = os.path.dirname(os.path.abspath(__file__)) + + config_path = os.path.join( + current_path, "../../.github/workflows/config/" + model_name + "-ci.yaml" + ) + + os.path.join(current_path, "../../inference/serve.py") + + cmd_serve = ["llm_on_ray-serve", "--config_file", config_path] + + result_serve = subprocess.run(cmd_serve, capture_output=True, text=True) + + # Print the output of subprocess.run for checking if output is expected + print(result_serve) + + # Ensure there are no errors in the serve script execution + assert "Error" not in result_serve.stderr + + example_openai_path = os.path.join( + current_path, "../../examples/inference/api_server_openai/query_openai_sdk.py" + ) + + cmd_openai = [ + "python", + example_openai_path, + "--model_name", + model_name, + ] + + if streaming_response: + cmd_openai.append("--streaming_response") + + if max_new_tokens is not None: + cmd_openai.extend(["--max_new_tokens", str(max_new_tokens)]) + + if temperature is not None: + cmd_openai.extend(["--temperature", str(temperature)]) + + if top_p is not None: + cmd_openai.extend(["--top_p", str(top_p)]) + + result_openai = subprocess.run(cmd_openai, capture_output=True, text=True) + + # Print the output of subprocess.run for checking if output is expected + print(result_openai) + + # Ensure there are no errors in the OpenAI API query script execution + assert "Error" not in result_openai.stderr + + assert isinstance(result_openai.stdout, str) + + assert len(result_openai.stdout) > 0 + + +# Parametrize the test function with different combinations of parameters +@pytest.mark.parametrize( + "api_base,model_name,streaming_response,max_new_tokens,temperature,top_p", + [ + (api_base, model_name, streaming_response, max_new_tokens, temperature, top_p) + for api_base in ["http://localhost:8000/v1"] + for model_name in ["gpt2"] + for streaming_response in [False, True] + for max_new_tokens in [None, 128] + for temperature in [None, 0.8] + for top_p in [None, 0.7] + ], +) +def test_script(api_base, model_name, streaming_response, max_new_tokens, temperature, top_p): + script_with_args(api_base, model_name, streaming_response, max_new_tokens, temperature, top_p) diff --git a/tests/inference/test_utils.py b/tests/inference/test_utils.py index 37b16d677..d2a996b62 100644 --- a/tests/inference/test_utils.py +++ b/tests/inference/test_utils.py @@ -1,14 +1,14 @@ import pytest import torch -from inference.utils import ( +from llm_on_ray.inference.utils import ( get_deployment_actor_options, StoppingCriteriaSub, max_input_len, get_torch_dtype, is_cpu_without_ipex, ) -from inference_config import InferenceConfig, DEVICE_CPU +from llm_on_ray.inference.inference_config import InferenceConfig, DEVICE_CPU # Mock the InferenceConfig for testing diff --git a/tests/requirements.txt b/tests/requirements.txt index 9694dd725..cf6c10e5f 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,7 +1,3 @@ -pytest==7.4.4 -torch==2.1.0 -transformers==4.36.0 -starlette==0.36.2 -pydantic==1.10.13 -pydantic-yaml==1.2.0 -pydantic_core==2.14.5 \ No newline at end of file +pytest +openai +async-timeout \ No newline at end of file diff --git a/tests/run-tests.sh b/tests/run-tests.sh index 19cbb53aa..2b10723e5 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -1,7 +1,9 @@ #!/bin/bash +set -eo pipefail cd $(dirname $0) + # Run pytest with the test file -pytest -vs ./inference +pytest -vv --capture=tee-sys --show-capture=all ./inference echo "Pytest finished running tests."