intel · jiafuzha · Apr 17, 2024 · Apr 26, 2024 · May 7, 2024 · May 8, 2024
diff --git a/.github/license/header_exclude_files.txt b/.github/license/header_exclude_files.txt
@@ -0,0 +1 @@
+vllm-ext/vllm/extension/ns/__init__.py
diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
@@ -34,7 +34,7 @@ jobs:
     name: inference
     strategy:
       matrix:
-        model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-ipex-llm, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, gemma-2b, deepseek-coder-33b-instruct]
+        model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-ipex-llm, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, llama-2-7b-chat-hf-vllm-ns, gemma-2b, deepseek-coder-33b-instruct]
         isPR:
           - ${{inputs.ci_type == 'pr'}}
 
@@ -97,7 +97,11 @@ jobs:
         run: |
           TARGET=${{steps.target.outputs.target}}
           source dev/scripts/ci-functions.sh
-          strat_ray ${TARGET}
+          if [[ "$TARGET" == *ns ]]; then
+            start_ray ${TARGET} 1
+          else
+            start_ray ${TARGET}
+          fi
 
       - name: Run Inference Test
         run: |

diff --git a/.github/workflows/workflow_inference_gaudi2.yml b/.github/workflows/workflow_inference_gaudi2.yml
@@ -94,7 +94,7 @@ jobs:
           # check and remove exited container
           cid=$(docker ps -a -q --filter "name=${TARGET}")
           if [[ ! -z "$cid" ]]; then docker rm $cid; fi
-          docker run -tid --name="${TARGET}" --hostname="${TARGET}-container" --runtime=habana -v /home/yizhong/Model-References:/root/Model-References -v ${{ inputs.code_checkout_path }}:/root/llm-on-ray -v ${{ inputs.model_cache_path }}:/root/.cache/huggingface/hub/ -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --cap-add sys_ptrace --net=host --ipc=host ${TARGET}:habana
+          docker run -tid --privileged --name="${TARGET}" --hostname="${TARGET}-container" --runtime=habana -v /home/yizhong/Model-References:/root/Model-References -v ${{ inputs.code_checkout_path }}:/root/llm-on-ray -v ${{ inputs.model_cache_path }}:/root/.cache/huggingface/hub/ -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --cap-add sys_ptrace --net=host --ipc=host ${TARGET}:habana
       - name: Start Ray Cluster
         run: |
           TARGET=${{steps.target.outputs.target}}

diff --git a/.github/workflows/workflow_test_benchmark.yml b/.github/workflows/workflow_test_benchmark.yml
@@ -80,7 +80,7 @@ jobs:
           # check and remove exited container
           cid=$(docker ps -a -q --filter "name=${TARGET}")
           if [[ ! -z "$cid" ]]; then docker rm $cid; fi
-          docker run -tid -v ${{ inputs.model_cache_path }}:/root/.cache/huggingface/hub -v ${{ inputs.code_checkout_path }}:/root/llm-on-ray -e http_proxy=${{ inputs.http_proxy }} -e https_proxy=${{ inputs.https_proxy }} --name="${TARGET}" --hostname="${TARGET}-container" ${TARGET}:latest
+          docker run -tid --privileged -v ${{ inputs.model_cache_path }}:/root/.cache/huggingface/hub -v ${{ inputs.code_checkout_path }}:/root/llm-on-ray -e http_proxy=${{ inputs.http_proxy }} -e https_proxy=${{ inputs.https_proxy }} --name="${TARGET}" --hostname="${TARGET}-container" ${TARGET}:latest
 
       - name: Start Ray Cluster
         run: |

diff --git a/.github/workflows/workflow_tests.yml b/.github/workflows/workflow_tests.yml
@@ -176,7 +176,7 @@ jobs:
         run: |
           TARGET=${{steps.target.outputs.target}}
           source dev/scripts/ci-functions.sh
-          strat_ray ${TARGET}
+          start_ray ${TARGET}
 
       - name: Run Tests
         run: |

diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,9 @@ build/lib/
 *.json
 *.txt
 *.egg-info
+.eggs
+*.log
+*.so
+*.ninja_log
+build/
+runtime_outs/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,6 +7,12 @@ repos:
     hooks:
       - id: ruff
         args: [ --fix, --exit-non-zero-on-fix, --ignore=E402, --ignore=E501, --ignore=E731, --ignore=F401]
+        exclude: |
+            (?x)^(
+              examples/inference/vllm/ray-vllm-examples/llm.py|
+              vllm-ext/vllm/extension/ns/__init__.py|
+            )$
+
 
   # Black needs to be ran after ruff with --fix
   - repo: https://github.com/psf/black
@@ -18,7 +24,18 @@ repos:
     rev: "v0.981"
     hooks:
       - id: mypy
-        exclude: tests
+        exclude: |
+          (?x)^(
+            tests|
+            vllm-ext/vllm/extension/ns/model/ns_loader.py|
+            vllm-ext/vllm/extension/ns/kv_cache/ns_cache.py|
+            vllm-ext/inference_engine/python/inference_engine/|
+            vllm-ext/setup.py|
+            examples/inference/vllm/ray-vllm-examples/llm.py|
+            llm_on_ray/inference/inference_config.py|
+            vllm-ext/vllm/extension/ns/
+          )
+
         additional_dependencies:
           - mypy-extensions
           - pydantic==1.10.0