diff --git a/.github/workflows/config/gpt2-ci.yaml b/.github/workflows/config/gpt2-ci.yaml
new file mode 100644
index 000000000..57913874e
--- /dev/null
+++ b/.github/workflows/config/gpt2-ci.yaml
@@ -0,0 +1,21 @@
+port: 8000
+name: gpt2
+route_prefix: /gpt2
+cpus_per_worker: 2
+gpus_per_worker: 0
+deepspeed: false
+workers_per_group: 2
+device: CPU
+ipex:
+  enabled: true
+  precision: bf16
+model_description:
+  model_id_or_path: gpt2
+  tokenizer_name_or_path: gpt2
+  chat_processor: ChatModelGptJ
+  gpt_base_model: true
+  prompt:
+    intro: ''
+    human_id: ''
+    bot_id: ''
+    stop_words: []
diff --git a/.github/workflows/workflow_tests.yml b/.github/workflows/workflow_tests.yml
index 19f398952..63a5fd80b 100644
--- a/.github/workflows/workflow_tests.yml
+++ b/.github/workflows/workflow_tests.yml
@@ -1,13 +1,21 @@
 name: Tests
 
 on:
-  workflow_call
+  workflow_call:
+    inputs:
+      ci_type:
+        type: string
+        default: 'pr'
 
 jobs:
-  tests:
-    name: tests
-    runs-on: ubuntu-latest
+  bare-test:
+
+    name: bare-test
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
 
+    runs-on: ubuntu-latest
     defaults:
       run:
         shell: bash
@@ -19,17 +27,90 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: '3.9'
+          python-version: ${{matrix.python-version}}
           architecture: 'x64'
 
       - name: Display Python version
-        run: python -c "import sys; print(sys.version)"
+        run: |
+          python -c "import sys; print(sys.version)"
 
-      - name: Install dependencies
+      - name: Install dependencies for tests
         run: |
           python -m pip install --upgrade pip
+          pip install .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
+          # Dynamic link oneCCL and Intel MPI libraries
+          source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh
+          # Additional libraries required  for pytest
           pip install -r ./tests/requirements.txt
+          
+      - name: Start Ray Cluster
+        run: |
+          ray start --head
+
+      - name: Run Tests
+        run: |
+          ./tests/run-tests.sh
+
+  docker-test:
+
+    name: docker-test
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+
+    runs-on: ubuntu-latest
+
+    defaults:
+      run:
+        shell: bash
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      
+      - name: Determine Target
+        id: "target"
+        run: |
+          target="inference"
+          echo "target is ${target}"
+          echo "target=$target" >> $GITHUB_OUTPUT
+
+      - name: Build Docker Image
+        run: |
+          DF_SUFFIX=".tests_cpu_and_deepspeed"
+          TARGET=${{steps.target.outputs.target}}
+          docker build ./ --build-arg CACHEBUST=1 --build-arg python_v=${{matrix.python-version}} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${TARGET}:latest && yes | docker container prune && yes
+          docker image prune -f
 
-      - name: Start tests
+      - name: Start Docker Container
+        run: |
+          TARGET=${{steps.target.outputs.target}}
+          cid=$(docker ps -q --filter "name=${TARGET}")
+          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
+          # check and remove exited container
+          cid=$(docker ps -a -q --filter "name=${TARGET}")
+          if [[ ! -z "$cid" ]]; then docker rm $cid; fi
+          docker ps -a
+          docker run -tid -v ${{ github.workspace }}:/root/llm-on-ray --name="${TARGET}" --hostname="${TARGET}-container" ${TARGET}:latest
+
+      - name: Install Dependencies for Tests
+        run: |
+          TARGET=${{steps.target.outputs.target}}
+          docker exec "${TARGET}" bash -c "pip install -r ./tests/requirements.txt"
+
+      - name: Start Ray Cluster
+        run: |
+          TARGET=${{steps.target.outputs.target}}
+          docker exec "${TARGET}" bash -c "./dev/scripts/start-ray-cluster.sh"
+
+      - name: Run Tests
+        run: |
+          TARGET=${{steps.target.outputs.target}}
+          docker exec "${TARGET}" bash -c "./tests/run-tests.sh"
+           
+      - name: Stop Container
+        if: success() || failure()
         run: |
-          bash -c "./tests/run-tests.sh"
+          TARGET=${{steps.target.outputs.target}}
+          cid=$(docker ps -q --filter "name=${TARGET}")
+          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c539326c1..5e56e52a6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,7 +15,7 @@ repos:
       - id: black
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: "v0.950"
+    rev: "v0.981"
     hooks:
       - id: mypy
         exclude: tests
diff --git a/dev/docker/Dockerfile.tests_cpu_and_deepspeed b/dev/docker/Dockerfile.tests_cpu_and_deepspeed
new file mode 100644
index 000000000..4d159225d
--- /dev/null
+++ b/dev/docker/Dockerfile.tests_cpu_and_deepspeed
@@ -0,0 +1,43 @@
+# syntax=docker/dockerfile:1
+FROM ubuntu:22.04
+
+ARG python_v 
+
+ENV LANG C.UTF-8
+
+WORKDIR /root/llm-on-ray
+
+RUN --mount=type=cache,target=/var/cache/apt apt-get update -y \
+    && apt-get install -y build-essential cmake wget curl git vim htop ssh net-tools \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV CONDA_DIR /opt/conda
+RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
+    /bin/bash ~/miniconda.sh -b -p /opt/conda
+ENV PATH $CONDA_DIR/bin:$PATH
+
+# setup env
+SHELL ["/bin/bash", "--login", "-c"]
+
+RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \
+    unset -f conda && \
+    export PATH=$CONDA_DIR/bin/:${PATH} && \
+    conda config --add channels intel && \
+    conda install python==${python_v}
+
+COPY ./pyproject.toml .
+COPY ./MANIFEST.in .
+
+# create llm_on_ray package directory to bypass the following 'pip install -e' command
+RUN mkdir ./llm_on_ray
+
+RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu,deepspeed] --extra-index-url https://download.pytorch.org/whl/cpu \
+    --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
+
+RUN ds_report
+
+# Used to invalidate docker build cache with --build-arg CACHEBUST=$(date +%s)
+ARG CACHEBUST=1
+COPY ./dev/scripts/install-oneapi.sh /tmp
+RUN /tmp/install-oneapi.sh
diff --git a/tests/inference/test_example_query_http_requests.py b/tests/inference/test_example_query_http_requests.py
new file mode 100644
index 000000000..d8d5a168c
--- /dev/null
+++ b/tests/inference/test_example_query_http_requests.py
@@ -0,0 +1,73 @@
+import subprocess
+import pytest
+import os
+
+
+def script_with_args(model_name, streaming_response, max_new_tokens, temperature, top_p):
+    current_path = os.path.dirname(os.path.abspath(__file__))
+
+    config_path = os.path.join(
+        current_path, "../../.github/workflows/config/" + model_name + "-ci.yaml"
+    )
+
+    os.path.join(current_path, "../../inference/serve.py")
+
+    cmd_serve = ["llm_on_ray-serve", "--config_file", config_path]
+
+    result_serve = subprocess.run(cmd_serve, capture_output=True, text=True)
+
+    # Print the output of subprocess.run for checking if output is expected
+    print(result_serve)
+
+    # Ensure there are no errors in the serve script execution
+    assert "Error" not in result_serve.stderr
+
+    example_http_path = os.path.join(
+        current_path, "../../examples/inference/api_server_openai/query_http_requests.py"
+    )
+
+    cmd_http = [
+        "python",
+        example_http_path,
+        "--model_name",
+        model_name,
+    ]
+
+    if streaming_response:
+        cmd_http.append("--streaming_response")
+
+    if max_new_tokens is not None:
+        cmd_http.extend(["--max_new_tokens", str(max_new_tokens)])
+
+    if temperature is not None:
+        cmd_http.extend(["--temperature", str(temperature)])
+
+    if top_p is not None:
+        cmd_http.extend(["--top_p", str(top_p)])
+
+    result_http = subprocess.run(cmd_http, capture_output=True, text=True)
+
+    # Print the output of subprocess.run for checking if output is expected
+    print(result_http)
+
+    # Ensure there are no errors in the http query script execution
+    assert "Error" not in result_http.stderr
+
+    assert isinstance(result_http.stdout, str)
+
+    assert len(result_http.stdout) > 0
+
+
+@pytest.mark.parametrize(
+    "model_name,streaming_response,max_new_tokens,temperature,top_p",
+    [
+        (model_name, streaming_response, max_new_tokens, temperature, top_p)
+        for model_name in ["gpt2"]
+        for streaming_response in [False, True]
+        for max_new_tokens in [None, 128]
+        for temperature in [None, 0.8]
+        for top_p in [None, 0.7]
+    ],
+)
+def test_script(model_name, streaming_response, max_new_tokens, temperature, top_p):
+    script_with_args(model_name, streaming_response, max_new_tokens, temperature, top_p)
diff --git a/tests/inference/test_example_query_openai_sdk.py b/tests/inference/test_example_query_openai_sdk.py
new file mode 100644
index 000000000..24dc50e31
--- /dev/null
+++ b/tests/inference/test_example_query_openai_sdk.py
@@ -0,0 +1,85 @@
+import subprocess
+import pytest
+import os
+
+os.environ["no_proxy"] = "localhost,127.0.0.1"
+os.environ["OPENAI_API_BASE"] = "http://localhost:8000/v1"
+os.environ["OPENAI_API_KEY"] = "YOUR_OPEN_AI_KEY"
+os.environ["OPENAI_BASE_URL"] = "http://localhost:8000/v1"
+
+
+def script_with_args(api_base, model_name, streaming_response, max_new_tokens, temperature, top_p):
+    # Other OpenAI SDK tests
+    if api_base != "http://localhost:8000/v1":
+        os.environ["OPENAI_API_BASE"] = api_base
+        os.environ["OPENAI_BASE_URL"] = api_base
+
+    current_path = os.path.dirname(os.path.abspath(__file__))
+
+    config_path = os.path.join(
+        current_path, "../../.github/workflows/config/" + model_name + "-ci.yaml"
+    )
+
+    os.path.join(current_path, "../../inference/serve.py")
+
+    cmd_serve = ["llm_on_ray-serve", "--config_file", config_path]
+
+    result_serve = subprocess.run(cmd_serve, capture_output=True, text=True)
+
+    # Print the output of subprocess.run for checking if output is expected
+    print(result_serve)
+
+    # Ensure there are no errors in the serve script execution
+    assert "Error" not in result_serve.stderr
+
+    example_openai_path = os.path.join(
+        current_path, "../../examples/inference/api_server_openai/query_openai_sdk.py"
+    )
+
+    cmd_openai = [
+        "python",
+        example_openai_path,
+        "--model_name",
+        model_name,
+    ]
+
+    if streaming_response:
+        cmd_openai.append("--streaming_response")
+
+    if max_new_tokens is not None:
+        cmd_openai.extend(["--max_new_tokens", str(max_new_tokens)])
+
+    if temperature is not None:
+        cmd_openai.extend(["--temperature", str(temperature)])
+
+    if top_p is not None:
+        cmd_openai.extend(["--top_p", str(top_p)])
+
+    result_openai = subprocess.run(cmd_openai, capture_output=True, text=True)
+
+    # Print the output of subprocess.run for checking if output is expected
+    print(result_openai)
+
+    # Ensure there are no errors in the OpenAI API query script execution
+    assert "Error" not in result_openai.stderr
+
+    assert isinstance(result_openai.stdout, str)
+
+    assert len(result_openai.stdout) > 0
+
+
+# Parametrize the test function with different combinations of parameters
+@pytest.mark.parametrize(
+    "api_base,model_name,streaming_response,max_new_tokens,temperature,top_p",
+    [
+        (api_base, model_name, streaming_response, max_new_tokens, temperature, top_p)
+        for api_base in ["http://localhost:8000/v1"]
+        for model_name in ["gpt2"]
+        for streaming_response in [False, True]
+        for max_new_tokens in [None, 128]
+        for temperature in [None, 0.8]
+        for top_p in [None, 0.7]
+    ],
+)
+def test_script(api_base, model_name, streaming_response, max_new_tokens, temperature, top_p):
+    script_with_args(api_base, model_name, streaming_response, max_new_tokens, temperature, top_p)
diff --git a/tests/inference/test_utils.py b/tests/inference/test_utils.py
index 37b16d677..d2a996b62 100644
--- a/tests/inference/test_utils.py
+++ b/tests/inference/test_utils.py
@@ -1,14 +1,14 @@
 import pytest
 import torch
 
-from inference.utils import (
+from llm_on_ray.inference.utils import (
     get_deployment_actor_options,
     StoppingCriteriaSub,
     max_input_len,
     get_torch_dtype,
     is_cpu_without_ipex,
 )
-from inference_config import InferenceConfig, DEVICE_CPU
+from llm_on_ray.inference.inference_config import InferenceConfig, DEVICE_CPU
 
 
 # Mock the InferenceConfig for testing
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 9694dd725..cf6c10e5f 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,7 +1,3 @@
-pytest==7.4.4
-torch==2.1.0
-transformers==4.36.0
-starlette==0.36.2
-pydantic==1.10.13
-pydantic-yaml==1.2.0
-pydantic_core==2.14.5
\ No newline at end of file
+pytest
+openai
+async-timeout
\ No newline at end of file
diff --git a/tests/run-tests.sh b/tests/run-tests.sh
index 19cbb53aa..2b10723e5 100755
--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
@@ -1,7 +1,9 @@
 #!/bin/bash
+set -eo pipefail
 cd $(dirname $0)
 
+
 # Run pytest with the test file
-pytest -vs ./inference
+pytest -vv --capture=tee-sys --show-capture=all ./inference
 
 echo "Pytest finished running tests."