From 6cee70a8dd06af9012bdd7a5ed675ca0f7a60e61 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Fri, 8 Mar 2024 14:17:55 +0800
Subject: [PATCH 01/40] enable tests for ipex-llm

---
 .github/workflows/ipex-llm-nightly-test.yml   | 118 +++
 .github/workflows/ipex_llm_example_tests.yml  |  80 ++
 .../workflows/ipex_llm_performance_tests.yml  | 791 ++++++++++++++++++
 .github/workflows/ipex_llm_unit_tests.yml     | 398 +++++++++
 4 files changed, 1387 insertions(+)
 create mode 100644 .github/workflows/ipex-llm-nightly-test.yml
 create mode 100644 .github/workflows/ipex_llm_example_tests.yml
 create mode 100644 .github/workflows/ipex_llm_performance_tests.yml
 create mode 100644 .github/workflows/ipex_llm_unit_tests.yml

diff --git a/.github/workflows/ipex-llm-nightly-test.yml b/.github/workflows/ipex-llm-nightly-test.yml
new file mode 100644
index 00000000000..68f96f9c263
--- /dev/null
+++ b/.github/workflows/ipex-llm-nightly-test.yml
@@ -0,0 +1,118 @@
+name: LLM Nightly Tests
+
+# Cancel previous runs in the PR when you push new commits
+concurrency:
+  group: ${{ github.workflow }}-llm-nightly-test-${{ github.event.pull_request.number || github.run_id }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+# Controls when the action will run.
+on:
+  schedule:
+    - cron: "00 13 * * *" # GMT time, 13:00 GMT == 21:00 China
+  pull_request:
+    branches: [main]
+    # paths:
+    #   - ".github/workflows/llm-nightly-test.yml"
+    #   - ".github/actions/llm/setup-llm-env/action.yml"
+    #   - ".github/actions/llm/remove-llm-env/action.yml"
+    #   - ".github/actions/llm/convert-test/action.yml"
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  # llm-cpp-build:
+  #   uses: ./.github/workflows/llm-binary-build.yml
+  llm-nightly-convert-test:
+    # needs: llm-cpp-build
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: windows
+            instruction: AVX-VNNI-UT
+            python-version: "3.9"
+          - os: ubuntu-20.04-lts
+            instruction: avx512
+            python-version: "3.9"
+    runs-on: [self-hosted, llm, "${{matrix.instruction}}", "${{matrix.os}}"]
+    env:
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+    steps:
+      - name: Set model directories
+        shell: bash
+        run: |
+          echo "ORIGIN_DIR=$(pwd)/../llm/origin-models" >> "$GITHUB_ENV"
+          echo "INT4_CKPT_DIR=$(pwd)/../llm/nightly-converted-models" >> "$GITHUB_ENV"
+      - name: Create model directories
+        shell: bash
+        run: |
+          if [ ! -d $ORIGIN_DIR ]; then
+            mkdir -p $ORIGIN_DIR
+          fi
+          if [ ! -d $INT4_CKPT_DIR ]; then
+            mkdir -p $INT4_CKPT_DIR
+          fi
+      - name: Set environment variables
+        shell: bash
+        run: |
+          echo "LLAMA_ORIGIN_PATH=${ORIGIN_DIR}/llama-7b-hf" >> "$GITHUB_ENV"
+          echo "GPTNEOX_ORIGIN_PATH=${ORIGIN_DIR}/gptneox-7b-redpajama-bf16" >> "$GITHUB_ENV"
+          echo "BLOOM_ORIGIN_PATH=${ORIGIN_DIR}/bloomz-7b1" >> "$GITHUB_ENV"
+          echo "STARCODER_ORIGIN_PATH=${ORIGIN_DIR}/gpt_bigcode-santacoder" >> "$GITHUB_ENV"
+
+          echo "LLAMA_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_llama_q4_0.bin" >> "$GITHUB_ENV"
+          echo "GPTNEOX_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_gptneox_q4_0.bin" >> "$GITHUB_ENV"
+          echo "BLOOM_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_bloom_q4_0.bin" >> "$GITHUB_ENV"
+          echo "STARCODER_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_starcoder_q4_0.bin" >> "$GITHUB_ENV"
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        shell: bash
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade setuptools==58.0.4
+          python -m pip install --upgrade wheel
+
+      # - name: Download llm binary
+      #   uses: ./.github/actions/llm/download-llm-binary
+
+      # - name: Install BigDL-LLM
+      #   uses: ./.github/actions/llm/setup-llm-env
+
+      - name: Install IPEX-LLM from Pypi
+        shell: bash
+        run: |
+          pip install --pre --upgrade ipex-llm[all]
+
+      - name: Download original models & convert
+        uses: ./.github/actions/llm/convert-test
+
+      - name: Upload ckpt to ftp
+        shell: bash
+        if: runner.os == 'Linux' && github.event_name == 'schedule'
+        run: |
+          curl -T $LLAMA_INT4_CKPT_PATH ${LLM_FTP_URL}/llm/ggml-actions/nightly/bigdl_llm_llama_7b_q4_0.bin
+          curl -T $GPTNEOX_INT4_CKPT_PATH ${LLM_FTP_URL}/llm/ggml-actions/nightly/bigdl_llm_redpajama_7b_q4_0.bin
+          curl -T $BLOOM_INT4_CKPT_PATH ${LLM_FTP_URL}/llm/ggml-actions/nightly/bigdl_llm_bloom_7b_q4_0.bin
+          curl -T $STARCODER_INT4_CKPT_PATH ${LLM_FTP_URL}/llm/ggml-actions/nightly/bigdl_llm_santacoder_1b_q4_0.bin
+      - name: Delete ckpt
+        shell: bash
+        run: |
+          rm -rf $LLAMA_INT4_CKPT_PATH
+          rm -rf $GPTNEOX_INT4_CKPT_PATH
+          rm -rf $BLOOM_INT4_CKPT_PATH
+          rm -rf $STARCODER_INT4_CKPT_PATH
+
+  llm-unit-tests:
+    # needs: llm-cpp-build
+    uses: ./.github/workflows/ipex_llm_unit_tests.yml
+  llm-example-test:
+    # needs: llm-cpp-build
+    uses: ./.github/workflows/ipex_llm_example_tests.yml
diff --git a/.github/workflows/ipex_llm_example_tests.yml b/.github/workflows/ipex_llm_example_tests.yml
new file mode 100644
index 00000000000..386a6255812
--- /dev/null
+++ b/.github/workflows/ipex_llm_example_tests.yml
@@ -0,0 +1,80 @@
+name: LLM Example Test
+
+# Cancel previous runs in the PR when you push new commits
+concurrency:
+  group: ${{ github.workflow }}-llm-example-tests-${{ github.event.pull_request.number || github.run_id }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+# Controls when the action will run. 
+on:
+  # schedule:
+  #   - cron: '00 13 * * *' # GMT time, 13:00 GMT == 21:00 China
+  pull_request:
+    branches: [ main ]
+    # paths:
+    #   - '.github/workflows/llm_example_tests.yml'
+    #   - '.github/workflows/llm-binary-build.yml'
+    #   - '.github/actions/llm/example-test/action.yml'
+    #   - '.github/actions/llm/setup-llm-env/action.yml'
+    #   - '.github/actions/llm/remove-llm-env/action.yml'
+    #   - '.github/actions/llm/download-llm-binary/action.yml'
+    #   - 'python/llm/dev/test/run-example-tests.sh'
+    #   - 'python/llm/example/**'
+  workflow_dispatch:
+  workflow_call:
+
+env:
+  INT4_CKPT_DIR: ./llm/ggml-actions/stable
+  LLM_DIR: ./llm
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  # llm-cpp-build:
+  #   uses: ./.github/workflows/llm-binary-build.yml
+  llm-example-test:
+    # needs: llm-cpp-build
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9"]
+        instruction: ["AVX512"]
+    runs-on: [ self-hosted, llm,"${{matrix.instruction}}", ubuntu-20.04-lts ]
+    env:
+      THREAD_NUM: 24
+    steps:
+      - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade setuptools==58.0.4
+          python -m pip install --upgrade wheel
+
+      # - name: Download llm binary
+      #   uses: ./.github/actions/llm/download-llm-binary
+
+      # - name: Run LLM install (all) test
+      #   uses: ./.github/actions/llm/setup-llm-env
+      #   env:
+      #     ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+
+      - name: Install IPEX-LLM from Pypi
+        shell: bash
+        run: |
+          pip install --pre --upgrade ipex-llm[all]
+
+      - name: Run LLM example test
+        uses: ./.github/actions/llm/example-test
+        env:
+          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+
+      # - name: Clean up test environment
+      #   uses: ./.github/actions/llm/remove-llm-env
+      #   env:
+      #     ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
new file mode 100644
index 00000000000..68b9cbbd2cc
--- /dev/null
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -0,0 +1,791 @@
+name: LLM Performance Test
+
+# Cancel previous runs in the PR when you push new commits
+concurrency:
+  group: ${{ github.workflow }}-llm-performance-tests-${{ github.event.pull_request.number || github.run_id }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+# Controls when the action will run.
+on:
+  schedule:
+    - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China
+  # please uncomment it for PR tests
+  pull_request:
+    branches: [main]
+  #   paths:
+  #     - ".github/workflows/llm_performance_tests.yml"
+  #     - "python/llm/test/benchmark/**"
+  #     - "python/llm/dev/benchmark/all-in-one/**"
+  workflow_dispatch:
+  workflow_call:
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  # llm-cpp-build: # please uncomment it for PR tests
+  #   uses: ./.github/workflows/llm-binary-build.yml
+
+  llm-performance-test-on-arc:
+    # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
+    # needs: llm-cpp-build # please uncomment it for PR tests
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9"]
+    runs-on: [self-hosted, llm, perf]
+    env:
+      OMP_NUM_THREADS: 16
+      THREAD_NUM: 16
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+      CSV_SAVE_PATH: ${{ github.event.schedule && '/mnt/disk1/nightly_perf_gpu/' || '/mnt/disk1/pr_perf_gpu/' }}
+
+    steps:
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        shell: bash
+        # pip install transformers_stream_generator for model internlm-chat-7b-8k
+        # pip install tiktoken for model Qwen-7B-Chat-10-12
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade wheel
+          python -m pip install --upgrade omegaconf
+          python -m pip install --upgrade pandas
+          python -m pip install --upgrade einops
+          python -m pip install --upgrade transformers_stream_generator
+          python -m pip install --upgrade tiktoken
+
+      # please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests
+      # - name: Download llm binary
+      #   uses: ./.github/actions/llm/download-llm-binary
+
+      # - name: Run LLM install (all) test
+      #   uses: ./.github/actions/llm/setup-llm-env
+      #   with:
+      #     extra-dependency: "xpu_2.1"
+
+      - name: Install BigDL-LLM from Pypi
+        shell: bash
+        run: |
+          pip install --pre --upgrade ipex-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+          # test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+          # if ! pip show bigdl-llm | grep $test_version_date; then
+          #   echo "Did not install bigdl-llm with excepted version $test_version_date"
+          #   exit 1
+          # fi
+
+      - name: Test installed xpu version
+        shell: bash
+        run: |
+          source /opt/intel/oneapi/setvars.sh
+          bash python/llm/test/run-llm-install-tests.sh
+
+      - name: Test on xpu(transformers==4.31.0)
+        shell: bash
+        run: |
+          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+
+          source /opt/intel/oneapi/setvars.sh
+          export USE_XETLA=OFF
+          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          cp python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cd python/llm/dev/benchmark/all-in-one
+          # hide time info
+          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
+          # change csv name
+          sed -i 's/{today}/{today}_test1/g' run.py
+          python run.py
+
+      - name: Test on xpu(transformers==4.34.0)
+        shell: bash
+        run: |
+          source /opt/intel/oneapi/setvars.sh
+          export USE_XETLA=OFF
+          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          # upgrade transformers for model Mistral-7B-v0.1
+          python -m pip install transformers==4.34.0
+          cp python/llm/test/benchmark/arc-perf-transformers-434.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cd python/llm/dev/benchmark/all-in-one
+          # change csv name
+          sed -i 's/test1/test2/g' run.py
+          python run.py
+
+      - name: Test on xpu(transformers==4.37.0)
+        shell: bash
+        run: |
+          source /opt/intel/oneapi/setvars.sh
+          export USE_XETLA=OFF
+          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          # upgrade transformers for model Qwen/Qwen1.5-7B-Chat
+          python -m pip install transformers==4.37.0
+          cp python/llm/test/benchmark/arc-perf-transformers-437.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cd python/llm/dev/benchmark/all-in-one
+          # change csv name
+          sed -i 's/test2/test3/g' run.py
+          python run.py
+
+      - name: Concat csv and generate html
+        shell: bash
+        run: |
+          cd python/llm/dev/benchmark/all-in-one
+          python ../../../test/benchmark/concat_csv.py
+          for file in *.csv; do
+              if [[ $file != *test* ]]; then
+                  cp "$file" $CSV_SAVE_PATH
+              fi
+          done
+          python -m pip install pandas==1.5.3
+          cd ../../../test/benchmark
+          python csv_to_html.py -f $CSV_SAVE_PATH
+
+      - name: Check and upload results to ftp
+        shell: bash
+        run: |
+          cd python/llm/dev/benchmark/all-in-one
+          python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml
+          python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml
+          find . -name "*test*.csv" -delete
+          if [ ${{ github.event.schedule}} ]; then
+            curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/
+          fi
+          
+  llm-performance-test-on-spr:
+    # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-spr' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
+    # needs: llm-cpp-build # please uncomment it for PR tests
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9"]
+    runs-on: [self-hosted, llm, spr01-perf]
+    env:
+      OMP_NUM_THREADS: 16
+      THREAD_NUM: 16
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        shell: bash
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade wheel
+          python -m pip install --upgrade omegaconf
+          python -m pip install --upgrade pandas
+          python -m pip install --upgrade einops
+          python -m pip install --upgrade tiktoken
+          python -m pip install --upgrade transformers_stream_generator
+
+      # please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests
+      # - name: Download llm binary
+      #   uses: ./.github/actions/llm/download-llm-binary
+
+      # - name: Run LLM install (all) test
+      #   uses: ./.github/actions/llm/setup-llm-env
+
+      - name: Install BigDL-LLM from Pypi
+        shell: bash
+        run: |
+          pip install --pre --upgrade ipex-llm[all] -f https://developer.intel.com/ipex-whl-stable-xpu
+          # test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+          # if ! pip show bigdl-llm | grep $test_version_date; then
+          #   echo "Did not install bigdl-llm with excepted version $test_version_date"
+          #   exit 1
+          # fi
+
+      - name: Test on cpu
+        shell: bash
+        run: |
+          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+
+          mv python/llm/test/benchmark/cpu-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cd python/llm/dev/benchmark/all-in-one
+          export http_proxy=${HTTP_PROXY}
+          export https_proxy=${HTTPS_PROXY}
+          source bigdl-llm-init -t
+          export OMP_NUM_THREADS=48
+          # hide time info
+          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
+          python run.py
+          cp ./*.csv /models/nightly_perf_cpu
+          cd ../../../test/benchmark
+          python -m pip install pandas==1.5.3
+          python csv_to_html.py -f /models/nightly_perf_cpu
+          cd /models/nightly_perf_cpu
+          for f in *.html; do
+            curl -T "$f" ${LLM_FTP_URL}/llm/nightly_perf/nightly_perf_cpu/
+          done
+
+  llm-performance-test-on-core:
+    # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-core' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
+    # needs: llm-cpp-build # please uncomment it for PR tests
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: windows
+            platform: dp
+            python-version: "3.9"
+          # - os: windows
+          #   platform: lp
+          #   python-version: "3.9"
+    runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-core, "${{ matrix.platform }}"]
+    env:
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+      CSV_SAVE_PATH: ${{ github.event.schedule && 'D:/action-runners/nightly_perf_core_' || 'D:/action-runners/pr_perf_core_' }}${{ matrix.platform }}/
+    steps:
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        shell: bash
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade wheel
+          python -m pip install --upgrade omegaconf pandas
+          python -m pip install --upgrade tiktoken einops transformers_stream_generator
+    
+      # please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests
+      # - name: Download llm binary
+      #   uses: ./.github/actions/llm/download-llm-binary
+
+      # - name: Run LLM install (all) test
+      #   uses: ./.github/actions/llm/setup-llm-env
+
+      - name: Install BigDL-LLM from Pypi
+        shell: bash
+        run: |
+          pip install --pre --upgrade ipex-llm[all] -f https://developer.intel.com/ipex-whl-stable-xpu
+          # test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+          # if ! pip show bigdl-llm | grep $test_version_date; then
+          #   echo "Did not install bigdl-llm with excepted version $test_version_date"
+          #   exit 1
+          # fi
+
+      - name: Test on core ${{ matrix.platform }}
+        shell: bash
+        run: |
+          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+
+          mv python/llm/test/benchmark/core-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cd python/llm/dev/benchmark/all-in-one
+          export http_proxy=${HTTP_PROXY}
+          export https_proxy=${HTTPS_PROXY}
+          # hide time info
+          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
+          python run.py
+          cp ./*.csv $CSV_SAVE_PATH
+          cd ../../../test/benchmark
+          python -m pip install pandas==1.5.3
+          python csv_to_html.py -f $CSV_SAVE_PATH
+          cd ../../dev/benchmark/all-in-one/
+          if [ ${{ github.event.schedule}} ]; then
+            curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/core_${{ matrix.platform }}/
+          fi
+
+  llm-performance-test-on-igpu:
+    # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
+    # needs: llm-cpp-build # please uncomment it for PR tests
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: windows
+            python-version: "3.9"
+    runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-igpu]
+    env:
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+    steps:
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+
+      # TODO: Put the bigdl-llm related install process for win gpu into a action function
+
+      # Please uncomment it and commment the install from pypi for PR tests
+      # - name: Download llm binary
+      #   uses: ./.github/actions/llm/download-llm-binary
+
+      # - name: Prepare for install bigdl-llm from source
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/"bigdl-core-xe-21==" + VERSION + "/"bigdl-core-xe-21/g' python/llm/setup.py
+      #     sed -i 's/"bigdl-core-xe-21==" + VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py
+
+      # - name: Install bigdl-llm and other related packages (install from source)
+      #   shell: cmd
+      #   run: |
+      #     call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
+      #     call conda activate igpu-perf
+
+      #     pip install --upgrade pip
+      #     pip install --upgrade wheel
+      #     pip install --upgrade omegaconf pandas
+      #     pip install --upgrade tiktoken einops transformers_stream_generator
+
+      #     cd python\llm
+      #     python setup.py clean --all bdist_wheel --win
+      #     if not exist dist\bigdl_llm*.whl (exit /b 1)
+      #     for %%i in (dist\bigdl_llm*.whl) do set whl_name=%%i
+
+      #     pip install --pre --upgrade %whl_name%[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     pip list
+
+      #     call conda deactivate
+
+      - name: Determine desired bigdl-llm version
+        shell: bash
+        run: |
+          test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+          echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV"
+
+      - name: Install bigdl-llm and other related packages (install from pypi)
+        shell: cmd
+        run: |
+          call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
+          call conda activate igpu-perf
+
+          pip install --upgrade pip
+          pip install --upgrade wheel
+          pip install --upgrade omegaconf pandas
+          pip install --upgrade tiktoken einops transformers_stream_generator
+
+          pip install --pre --upgrade ipex-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+          # pip show bigdl-llm | findstr %TEST_VERSION_DATE%
+          # if %ERRORLEVEL% neq 0 (
+          #   echo "Did not install bigdl-llm with excepted version %TEST_VERSION_DATE%"
+          #   exit /b 1
+          # )
+          pip list
+
+          call conda deactivate
+
+      - name: Create env for html generation
+        shell: cmd
+        run: |
+          call conda create -n html-gen python=3.9 -y
+          call conda activate html-gen
+
+          pip install pandas==1.5.3
+          pip install Jinja2
+
+          call conda deactivate
+
+      - name: Set directory envs & and fix generated csv date name
+        shell: bash
+        run: |
+          if [ ${{ github.event_name }} == 'schedule' ]; then
+            echo "CSV_SAVE_PATH=${CSV_NIGHTLY_PATH}" >> "$GITHUB_ENV"
+          else
+            echo "CSV_SAVE_PATH=${CSV_PR_PATH}" >> "$GITHUB_ENV"
+          fi
+          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+          echo "LOG_FILE=${date_for_test_version}_output.txt" >> "$GITHUB_ENV"
+
+          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+
+      - name: Prepare igpu perf test (32-32)
+        shell: bash
+        run: |
+          # hide time info
+          # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32.yaml
+
+      - name: Test on igpu (32-32)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\32-32.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for Mistral (32-32)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml
+
+      - name: Test on igpu for Mistral (32-32)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.34.0
+
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for Qwen1.5 (32-32)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_437.yaml
+
+      - name: Test on igpu for Qwen1.5 (32-32)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.37.0
+
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\32-32_437.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Concat csv and generate html (32-32)
+        shell: cmd
+        run: |
+          call conda activate html-gen
+
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\32-32\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\32-32\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\32-32\*.html %CSV_SAVE_PATH%
+
+          call conda deactivate
+
+      # TODO: create a action function here for different input
+      # 1024-128
+      - name: Prepare igpu perf test (1024-128)
+        shell: bash
+        run: |
+          sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml
+
+      - name: Test on igpu (1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.31.0
+
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for Mistral (1024-128)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml
+
+      - name: Test on igpu for Mistral (1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.34.0
+
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for Qwen 1.5 (1024-128)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_437.yaml
+
+      - name: Test on igpu for Qwen 1.5 (1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.37.0
+
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_437.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Concat csv and generate html (1024-128)
+        shell: cmd
+        run: |
+          call conda activate html-gen
+
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\1024-128\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\1024-128\*.html %CSV_SAVE_PATH%
+
+          call conda deactivate
+
+      # 2048-256
+      - name: Prepare igpu perf test (2048-256)
+        shell: bash
+        run: |
+          sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256.yaml
+
+      - name: Test on igpu (2048-256)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.31.0
+
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\2048-256.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for Mistral (2048-256)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml
+
+      - name: Test on igpu for Mistral (2048-256)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.34.0
+
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for Qwen 1.5 (2048-256)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_437.yaml
+
+      - name: Test on igpu for Qwen 1.5 (2048-256)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.37.0
+
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\2048-256_437.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Concat csv and generate html (2048-256)
+        shell: cmd
+        run: |
+          call conda activate html-gen
+
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\2048-256\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\2048-256\*.html %CSV_SAVE_PATH%
+
+          call conda deactivate
+
+      # load_low_bit 1024-128 
+      - name: Prepare igpu perf test (load_low_bit 1024-128)
+        shell: bash
+        run: |
+          sed -i 's/2048-256/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml
+
+      - name: Test on igpu (load_low_bit 1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.31.0
+
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for Mistral (load_low_bit 1024-128)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml
+
+      - name: Test on igpu for Mistral (load_low_bit 1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.34.0
+
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Prepare igpu perf test for Qwen 1.5 (load_low_bit 1024-128)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml
+
+      - name: Test on igpu for Qwen 1.5 (load_low_bit 1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.37.0
+
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_437.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Concat csv and generate html (load_low_bit 1024-128)
+        shell: cmd
+        run: |
+          call conda activate html-gen
+
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\1024-128_loadlowbit\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_loadlowbit\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\1024-128_loadlowbit\*.html %CSV_SAVE_PATH%
+
+          call conda deactivate
+
+      - name: Upload results to ftp
+        if: ${{ always() }}
+        shell: cmd
+        run: |
+          cd %CSV_SAVE_PATH%
+          IF "${{ github.event_name }}"=="schedule" (
+            for %%f in (*.html) do (
+                curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH%
+            )
+          )
+
+      # for test on machine when encountering error
+      # - name: Remove conda env
+      #   if: ${{ always() }}
+      #   shell: cmd
+      #   run: |
+      #     call conda env remove -n igpu-perf -y
diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
new file mode 100644
index 00000000000..3c3b1c233ce
--- /dev/null
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -0,0 +1,398 @@
+name: LLM Unit Tests
+
+# Cancel previous runs in the PR when you push new commits
+concurrency:
+  group: ${{ github.workflow }}-llm-unittest-${{ github.event.pull_request.number || github.run_id }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+# Controls when the action will run.
+on:
+  # Triggers the workflow on push or pull request events but only for the main branch
+  push:
+    branches: [main]
+    paths:
+      - "python/llm/**"
+      - ".github/workflows/llm_unit_tests.yml"
+      - ".github/workflows/llm-binary-build.yml"
+      - ".github/actions/llm/setup-llm-env/action.yml"
+      - ".github/actions/llm/remove-llm-env/action.yml"
+      - ".github/actions/llm/cli-test-linux/action.yml"
+      - ".github/actions/llm/cli-test-windows/action.yml"
+      - ".github/actions/llm/download-llm-binary/action.yml"
+  pull_request:
+    branches: [main]
+    # paths:
+    #   - "python/llm/**"
+    #   - ".github/workflows/llm_unit_tests.yml"
+    #   - ".github/workflows/llm-binary-build.yml"
+    #   - ".github/actions/llm/setup-llm-env/action.yml"
+    #   - ".github/actions/llm/remove-llm-env/action.yml"
+    #   - ".github/actions/llm/cli-test-linux/action.yml"
+    #   - ".github/actions/llm/cli-test-windows/action.yml"
+    #   - ".github/actions/llm/download-llm-binary/action.yml"
+  workflow_dispatch:
+  workflow_call:
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  # llm-cpp-build:
+  #   uses: ./.github/workflows/llm-binary-build.yml
+  setup-python-version:
+    runs-on: ubuntu-latest
+    outputs:
+      python-version: ${{ steps.setup-python-version.outputs.python-version }}
+    steps:
+      - name: setup-python-version
+        id: setup-python-version
+        run: |
+          if [ ${{ github.event_name }} == 'schedule' ]; then
+            python_version='["3.9", "3.10", "3.11"]'
+          else
+            python_version='["3.9"]'
+          fi
+          list=$(echo ${python_version} | jq -c)
+          echo "python-version=${list}" >> "$GITHUB_OUTPUT"
+  llm-unit-test:
+    needs: [setup-python-version]
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [windows, ubuntu-20.04-lts]
+        python-version: ${{ fromJson(needs.setup-python-version.outputs.python-version) }}
+        include:
+          - os: windows
+            instruction: AVX-VNNI-UT
+          - os: ubuntu-20.04-lts
+            instruction: avx512
+    runs-on: [self-hosted, llm, "${{matrix.instruction}}", "${{matrix.os}}"]
+    env:
+      THREAD_NUM: 24
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+    steps:
+      - name: Set model directories
+        shell: bash
+        run: |
+          echo "DATASET_DIR=${{ github.workspace }}/../llm/datasets" >> "$GITHUB_ENV"
+          echo "ORIGIN_DIR=${{ github.workspace }}/../llm/origin-models" >> "$GITHUB_ENV"
+          echo "INT4_CKPT_DIR=${{ github.workspace }}/../llm/converted-models" >> "$GITHUB_ENV"
+      - name: Create model directories
+        shell: bash
+        run: |
+          if [ ! -d $DATASET_DIR ]; then
+            mkdir -p $DATASET_DIR
+          fi
+          if [ ! -d $ORIGIN_DIR ]; then
+            mkdir -p $ORIGIN_DIR
+          fi
+          if [ ! -d $INT4_CKPT_DIR ]; then
+            mkdir -p $INT4_CKPT_DIR
+          fi
+      - name: Set environment variables
+        shell: bash
+        run: |
+          echo "SPEECH_DATASET_PATH=${DATASET_DIR}/librispeech_asr_dummy" >> "$GITHUB_ENV"
+          echo "COMMON_VOICE_PATH=${DATASET_DIR}/common_voice" >> "$GITHUB_ENV"
+
+          echo "LLAMA_ORIGIN_PATH=${ORIGIN_DIR}/llama-7b-hf" >> "$GITHUB_ENV"
+          echo "BLOOM_ORIGIN_PATH=${ORIGIN_DIR}/bloom-7b1" >> "$GITHUB_ENV"
+          echo "ORIGINAL_CHATGLM2_6B_PATH=${ORIGIN_DIR}/chatglm2-6b" >> "$GITHUB_ENV"
+          echo "ORIGINAL_REPLIT_CODE_PATH=${ORIGIN_DIR}/replit-code-v1-3b" >> "$GITHUB_ENV"
+          echo "ORIGINAL_WHISPER_TINY_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV"
+          echo "MISTRAL_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-v0.1" >> "$GITHUB_ENV"
+          echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV"
+
+          echo "LLAMA_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_llama_7b_q4_0.bin" >> "$GITHUB_ENV"
+          echo "GPTNEOX_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_redpajama_7b_q4_0.bin" >> "$GITHUB_ENV"
+          echo "BLOOM_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_bloom_7b_q4_0.bin" >> "$GITHUB_ENV"
+          echo "STARCODER_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_santacoder_1b_q4_0.bin" >> "$GITHUB_ENV"
+          echo "CHATGLM_INT4_CKPT_PATH=${INT4_CKPT_DIR}/chatglm2-6b-q4_0.bin" >> "$GITHUB_ENV"
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        shell: bash
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade setuptools==58.0.4
+          python -m pip install --upgrade wheel
+
+      # - name: Download llm binary
+      #   uses: ./.github/actions/llm/download-llm-binary
+
+      # - name: Run LLM install (all) test
+      #   uses: ./.github/actions/llm/setup-llm-env
+
+      - name: Install IPEX-LLM from Pypi
+        shell: bash
+        run: |
+          pip install --pre --upgrade ipex-llm[all]
+
+      - name: Download ckpt & original models
+        shell: bash
+        run: |
+          if [ ! -e $LLAMA_INT4_CKPT_PATH ]; then
+            echo "Directory $LLAMA_INT4_CKPT_PATH not found. Downloading from FTP server..."
+            echo "wget --no-verbose $LLM_FTP_URL/llm/ggml-actions/stable/bigdl_llm_llama_7b_q4_0.bin -P $INT4_CKPT_DIR"
+            wget --no-verbose $LLM_FTP_URL/llm/ggml-actions/stable/bigdl_llm_llama_7b_q4_0.bin -P $INT4_CKPT_DIR
+          fi
+          if [ ! -e $GPTNEOX_INT4_CKPT_PATH ]; then
+            echo "Directory $GPTNEOX_INT4_CKPT_PATH not found. Downloading from FTP server..."
+            wget --no-verbose $LLM_FTP_URL/llm/ggml-actions/stable/bigdl_llm_redpajama_7b_q4_0.bin -P $INT4_CKPT_DIR
+          fi
+          if [ ! -e $BLOOM_INT4_CKPT_PATH ]; then
+            echo "Directory $BLOOM_INT4_CKPT_PATH not found. Downloading from FTP server..."
+            wget --no-verbose $LLM_FTP_URL/llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin -P $INT4_CKPT_DIR
+          fi
+          if [ ! -e $STARCODER_INT4_CKPT_PATH ]; then
+            echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..."
+            wget --no-verbose $LLM_FTP_URL/llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin -P $INT4_CKPT_DIR
+          fi
+          # if [ ! -e $CHATGLM_INT4_CKPT_PATH ]; then
+          #   echo "Directory $CHATGLM_INT4_CKPT_PATH not found. Downloading from FTP server..."
+          #   wget --no-verbose $LLM_FTP_URL/llm/ggml-actions/stable/chatglm2-6b-q4_0.bin -P $INT4_CKPT_DIR
+          # fi
+          if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then
+            echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..."
+            echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR"            
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR
+          fi
+          if [ ! -d $ORIGINAL_REPLIT_CODE_PATH ]; then
+            echo "Directory $ORIGINAL_REPLIT_CODE_PATH not found. Downloading from FTP server..."
+            echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/replit-code-v1-3b -P $ORIGIN_DIR"
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/replit-code-v1-3b -P $ORIGIN_DIR
+          fi
+          if [ ! -d $ORIGINAL_WHISPER_TINY_PATH ]; then
+            echo "Directory $ORIGINAL_WHISPER_TINY_PATH not found. Downloading from FTP server..."
+            echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/whisper-tiny -P $ORIGIN_DIR"
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/whisper-tiny -P $ORIGIN_DIR
+          fi
+          if [ ! -d $MISTRAL_ORIGIN_PATH ]; then
+            echo "Directory $MISTRAL_ORIGIN_PATH not found. Downloading from FTP server..."
+            echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Mistral-7B-v0.1 -P $ORIGIN_DIR"
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Mistral-7B-v0.1 -P $ORIGIN_DIR
+          fi
+          if [ ! -d $LLAMA_ORIGIN_PATH ]; then
+            echo "Directory $LLAMA_ORIGIN_PATH not found. Downloading from FTP server..."
+            echo "wget --no-verbose $LLM_FTP_URL/llm/llama-7b-hf -P $ORIGIN_DIR"
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/llama-7b-hf -P $ORIGIN_DIR
+          fi
+          if [ ! -d $BLOOM_ORIGIN_PATH ]; then
+            echo "Directory $BLOOM_ORIGIN_PATH not found. Downloading from FTP server..."
+            echo "wget --no-verbose $LLM_FTP_URL/llm/bloom-7b1 -P $ORIGIN_DIR"
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/bloom-7b1 -P $ORIGIN_DIR
+          fi
+          if [ ! -d $SPEECH_DATASET_PATH ]; then
+            echo "Directory $SPEECH_DATASET_PATH not found. Downloading from FTP server..."
+            echo "wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/librispeech_asr_dummy -P $DATASET_DIR"
+            wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/librispeech_asr_dummy -P $DATASET_DIR
+          fi
+          if [ ! -d $COMMON_VOICE_PATH ]; then
+            echo "Directory $COMMON_VOICE_PATH not found. Downloading from FTP server..."
+            echo "wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/common_voice -P $DATASET_DIR"
+            wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/common_voice -P $DATASET_DIR
+          fi
+          if [ ! -d $LLAMA2_7B_ORIGIN_PATH ]; then
+            echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR
+          fi
+
+      - name: Run LLM cli test (Linux)
+        if: runner.os == 'Linux' 
+        uses: ./.github/actions/llm/cli-test-linux
+      - name: Run LLM cli test (Windows)
+        if: runner.os == 'Windows' 
+        uses: ./.github/actions/llm/cli-test-windows
+      - name: Run LLM inference test
+        shell: bash
+        run: |
+          python -m pip install einops datasets librosa openai-whisper
+          bash python/llm/test/run-llm-inference-tests.sh
+      - name: Run LLM langchain test
+        shell: bash
+        run: |
+          pip install -U langchain==0.0.184
+          pip install -U chromadb==0.3.25
+          pip install -U pandas==2.0.3
+          bash python/llm/test/run-llm-langchain-tests.sh
+      - name: Run LLM llamaindex test
+        shell: bash
+        run: |
+          pip install llama-index-readers-file llama-index-vector-stores-postgres llama-index-embeddings-huggingface
+          pip install transformers==4.31.0
+          bash python/llm/test/run-llm-llamaindex-tests.sh
+  llm-unit-test-on-arc:
+    needs: [setup-python-version]
+    strategy:
+      fail-fast: false
+      matrix:
+        pytorch-version: ['2.1', '2.0']
+        python-version: ${{ fromJson(needs.setup-python-version.outputs.python-version) }}
+    runs-on: [self-hosted, llm, arc-ut]
+    env:
+      # OMP_NUM_THREADS: 16
+      # THREAD_NUM: 16
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+    steps:
+      - name: Set environment variables
+        shell: bash
+        run: |
+          echo "DATASET_DIR=${ORIGIN_DIR}/../datasets" >> "$GITHUB_ENV"
+          echo "ABIRATE_ENGLISH_QUOTES_PATH=${ORIGIN_DIR}/../datasets/abirate_english_quotes" >> "$GITHUB_ENV"
+          echo "SPEECH_DATASET_PATH=${ORIGIN_DIR}/../datasets/librispeech_asr_dummy" >> "$GITHUB_ENV"
+
+          echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV"
+          echo "CHATGLM2_6B_ORIGIN_PATH=${ORIGIN_DIR}/chatglm2-6b" >> "$GITHUB_ENV"
+          echo "FALCON_7B_ORIGIN_PATH=${ORIGIN_DIR}/falcon-7b-instruct-with-patch" >> "$GITHUB_ENV"
+          echo "MPT_7B_ORIGIN_PATH=${ORIGIN_DIR}/mpt-7b-chat" >> "$GITHUB_ENV"
+          echo "WHISPER_TINY_ORIGIN_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV"
+
+          echo "MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-Instruct-v0.1" >> "$GITHUB_ENV"
+          echo "BAICHUAN2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Baichuan2-7B-Chat" >> "$GITHUB_ENV"
+          echo "QWEN_7B_ORIGIN_PATH=${ORIGIN_DIR}/Qwen-7B-Chat" >> "$GITHUB_ENV"
+      - name: Checkout repo
+        uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        shell: bash
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade setuptools
+          python -m pip install --upgrade wheel
+
+      # - name: Download llm binary
+      #   uses: ./.github/actions/llm/download-llm-binary
+
+      # - name: Install BigDL-LLM for xpu
+      #   uses: ./.github/actions/llm/setup-llm-env
+      #   with:
+      #     extra-dependency: "xpu_${{ matrix.pytorch-version }}"
+
+      - name: Install IPEX-LLM from Pypi
+        shell: bash
+        run: |
+          export xpu_version=xpu_${{ matrix.pytorch-version }}
+          pip install --pre --upgrade ipex-llm[$xpu_version] -f https://developer.intel.com/ipex-whl-stable-xpu
+
+      - name: Test installed xpu version
+        shell: bash
+        run: |
+          # Specific oneapi position on arc ut test machines
+          if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then
+            source /opt/intel/oneapi/setvars.sh
+          elif [[ '${{ matrix.pytorch-version }}' == '2.0' ]]; then
+            source /home/arda/intel/oneapi/setvars.sh
+          fi
+          bash python/llm/test/run-llm-install-tests.sh
+
+      - name: Download LLMs and datasets
+        shell: bash
+        run: |
+          if [ ! -d $LLAMA2_7B_ORIGIN_PATH ]; then
+            echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR
+          fi
+          if [ ! -d $CHATGLM2_6B_ORIGIN_PATH ]; then
+            echo "Directory $CHATGLM2_6B_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR
+          fi
+          if [ ! -d $FALCON_7B_ORIGIN_PATH ]; then
+            echo "Directory $FALCON_7B_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/falcon-7b-instruct-with-patch -P $ORIGIN_DIR
+          fi
+          if [ ! -d $MPT_7B_ORIGIN_PATH ]; then
+            echo "Directory $MPT_7B_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/mpt-7b-chat -P $ORIGIN_DIR
+          fi
+          if [ ! -d $WHISPER_TINY_ORIGIN_PATH ]; then
+            echo "Directory $WHISPER_TINY_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/whisper-tiny -P $ORIGIN_DIR
+          fi
+          if [ ! -d $DATASET_DIR ]; then
+            mkdir -p $DATASET_DIR
+          fi
+          if [ ! -d $ABIRATE_ENGLISH_QUOTES_PATH ]; then
+            echo "Directory $ABIRATE_ENGLISH_QUOTES_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/abirate_english_quotes -P $DATASET_DIR
+          fi
+          if [ ! -d $SPEECH_DATASET_PATH ]; then
+            echo "Directory $SPEECH_DATASET_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/librispeech_asr_dummy -P $DATASET_DIR
+          fi
+          if [ ! -d $MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH ]; then
+            echo "Directory $MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Mistral-7B-Instruct-v0.1 -P $ORIGIN_DIR
+          fi
+          if [ ! -d $QWEN_7B_ORIGIN_PATH ]; then
+            echo "Directory $QWEN_7B_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Qwen-7B-Chat -P $ORIGIN_DIR
+          fi
+          if [ ! -d $BAICHUAN2_7B_ORIGIN_PATH ]; then
+            echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Baichuan2-7B-Chat -P $ORIGIN_DIR
+          fi
+          
+      - name: Run LLM inference test
+        shell: bash
+        run: |
+          # Specific oneapi position on arc ut test machines
+          if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then
+            source /opt/intel/oneapi/setvars.sh
+          elif [[ '${{ matrix.pytorch-version }}' == '2.0' ]]; then
+            source /home/arda/intel/oneapi/setvars.sh
+          fi
+          python -m pip install datasets librosa soundfile einops tiktoken transformers_stream_generator
+          bash python/llm/test/run-llm-inference-tests-gpu.sh
+          python -m pip install transformers==4.34.0 
+          bash python/llm/test/run-llm-inference-tests-gpu-434.sh
+
+      - name: Run LLM example tests
+        shell: bash
+        run: |
+          python -m pip uninstall datasets -y
+          python -m pip install transformers==4.34.0 datasets peft==0.5.0 accelerate==0.23.0
+          python -m pip install bitsandbytes scipy
+          # Specific oneapi position on arc ut test machines
+          if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then
+            source /opt/intel/oneapi/setvars.sh
+          elif [[ '${{ matrix.pytorch-version }}' == '2.0' ]]; then
+            source /home/arda/intel/oneapi/setvars.sh
+          fi
+          bash python/llm/test/run-llm-example-tests-gpu.sh
+
+      - name: Run LLM langchain GPU test
+        shell: bash
+        run: |
+          pip install -U langchain==0.0.184
+          pip install -U chromadb==0.3.25
+          pip install -U pandas==2.0.3
+          # Specific oneapi position on arc ut test machines
+          if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then
+            source /opt/intel/oneapi/setvars.sh
+          elif [[ '${{ matrix.pytorch-version }}' == '2.0' ]]; then
+            source /home/arda/intel/oneapi/setvars.sh
+          fi
+          bash python/llm/test/run-llm-langchain-tests-gpu.sh
+
+      - name: Run LLM llamaindex GPU test
+        shell: bash
+        run: |
+          pip install llama-index-readers-file llama-index-vector-stores-postgres llama-index-embeddings-huggingface
+          # Specific oneapi position on arc ut test machines
+          if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then
+            pip install --pre --upgrade bigdl-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+            source /opt/intel/oneapi/setvars.sh
+          elif [[ '${{ matrix.pytorch-version }}' == '2.0' ]]; then
+            pip install --pre --upgrade bigdl-llm[xpu_2.0] -f https://developer.intel.com/ipex-whl-stable-xpu
+            source /home/arda/intel/oneapi/setvars.sh
+          fi
+          bash python/llm/test/run-llm-llamaindex-tests-gpu.sh
\ No newline at end of file

From 9f145aaf21c468e4aa123a31a4daab44051f2722 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Fri, 8 Mar 2024 23:50:02 +0800
Subject: [PATCH 02/40] checkout analytics-zoo/bigdl-llm-internal

---
 .github/workflows/ipex-llm-nightly-test.yml   |  8 ++++
 .github/workflows/ipex_llm_example_tests.yml  | 26 ++++++++-----
 .../workflows/ipex_llm_performance_tests.yml  | 37 ++++++++++++++++--
 .github/workflows/ipex_llm_unit_tests.yml     | 38 +++++++++++++------
 4 files changed, 85 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/ipex-llm-nightly-test.yml b/.github/workflows/ipex-llm-nightly-test.yml
index 68f96f9c263..3eedb0d2ed5 100644
--- a/.github/workflows/ipex-llm-nightly-test.yml
+++ b/.github/workflows/ipex-llm-nightly-test.yml
@@ -68,7 +68,15 @@ jobs:
           echo "GPTNEOX_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_gptneox_q4_0.bin" >> "$GITHUB_ENV"
           echo "BLOOM_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_bloom_q4_0.bin" >> "$GITHUB_ENV"
           echo "STARCODER_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_starcoder_q4_0.bin" >> "$GITHUB_ENV"
+      - name: Set access token
+        run: |
+          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+        with:
+          repository: "analytics-zoo/bigdl-llm-internal"
+          ref: "ipex-llm-20240308"
+          token: ${{ env.github_access_token }}
+          submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
         with:
diff --git a/.github/workflows/ipex_llm_example_tests.yml b/.github/workflows/ipex_llm_example_tests.yml
index 386a6255812..05b5bcfa888 100644
--- a/.github/workflows/ipex_llm_example_tests.yml
+++ b/.github/workflows/ipex_llm_example_tests.yml
@@ -14,15 +14,15 @@ on:
   #   - cron: '00 13 * * *' # GMT time, 13:00 GMT == 21:00 China
   pull_request:
     branches: [ main ]
-    # paths:
-    #   - '.github/workflows/llm_example_tests.yml'
-    #   - '.github/workflows/llm-binary-build.yml'
-    #   - '.github/actions/llm/example-test/action.yml'
-    #   - '.github/actions/llm/setup-llm-env/action.yml'
-    #   - '.github/actions/llm/remove-llm-env/action.yml'
-    #   - '.github/actions/llm/download-llm-binary/action.yml'
-    #   - 'python/llm/dev/test/run-example-tests.sh'
-    #   - 'python/llm/example/**'
+    paths:
+      - '.github/workflows/llm_example_tests.yml'
+      - '.github/workflows/llm-binary-build.yml'
+      - '.github/actions/llm/example-test/action.yml'
+      - '.github/actions/llm/setup-llm-env/action.yml'
+      - '.github/actions/llm/remove-llm-env/action.yml'
+      - '.github/actions/llm/download-llm-binary/action.yml'
+      - 'python/llm/dev/test/run-example-tests.sh'
+      - 'python/llm/example/**'
   workflow_dispatch:
   workflow_call:
 
@@ -45,7 +45,15 @@ jobs:
     env:
       THREAD_NUM: 24
     steps:
+      - name: Set access token
+        run: |
+          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
       - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # actions/checkout@v2
+        with:
+          repository: "analytics-zoo/bigdl-llm-internal"
+          ref: "ipex-llm-20240308"
+          token: ${{ env.github_access_token }}
+          submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v2
         with:
diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index 68b9cbbd2cc..7136f98b015 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -13,8 +13,8 @@ on:
   schedule:
     - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China
   # please uncomment it for PR tests
-  pull_request:
-    branches: [main]
+  # pull_request:
+  #   branches: [main]
   #   paths:
   #     - ".github/workflows/llm_performance_tests.yml"
   #     - "python/llm/test/benchmark/**"
@@ -42,7 +42,15 @@ jobs:
       CSV_SAVE_PATH: ${{ github.event.schedule && '/mnt/disk1/nightly_perf_gpu/' || '/mnt/disk1/pr_perf_gpu/' }}
 
     steps:
+      - name: Set access token
+        run: |
+          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+        with:
+          repository: "analytics-zoo/bigdl-llm-internal"
+          ref: "ipex-llm-20240308"
+          token: ${{ env.github_access_token }}
+          submodules: "recursive"
 
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
@@ -170,8 +178,15 @@ jobs:
       THREAD_NUM: 16
       ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
     steps:
+      - name: Set access token
+        run: |
+          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
-
+        with:
+          repository: "analytics-zoo/bigdl-llm-internal"
+          ref: "ipex-llm-20240308"
+          token: ${{ env.github_access_token }}
+          submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
         with:
@@ -247,7 +262,15 @@ jobs:
       ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
       CSV_SAVE_PATH: ${{ github.event.schedule && 'D:/action-runners/nightly_perf_core_' || 'D:/action-runners/pr_perf_core_' }}${{ matrix.platform }}/
     steps:
+      - name: Set access token
+        run: |
+          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+        with:
+          repository: "analytics-zoo/bigdl-llm-internal"
+          ref: "ipex-llm-20240308"
+          token: ${{ env.github_access_token }}
+          submodules: "recursive"
 
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
@@ -314,7 +337,15 @@ jobs:
     env:
       ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
     steps:
+      - name: Set access token
+        run: |
+          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+        with:
+          repository: "analytics-zoo/bigdl-llm-internal"
+          ref: "ipex-llm-20240308"
+          token: ${{ env.github_access_token }}
+          submodules: "recursive"
 
       # TODO: Put the bigdl-llm related install process for win gpu into a action function
 
diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 3c3b1c233ce..80868f7ded9 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -24,15 +24,15 @@ on:
       - ".github/actions/llm/download-llm-binary/action.yml"
   pull_request:
     branches: [main]
-    # paths:
-    #   - "python/llm/**"
-    #   - ".github/workflows/llm_unit_tests.yml"
-    #   - ".github/workflows/llm-binary-build.yml"
-    #   - ".github/actions/llm/setup-llm-env/action.yml"
-    #   - ".github/actions/llm/remove-llm-env/action.yml"
-    #   - ".github/actions/llm/cli-test-linux/action.yml"
-    #   - ".github/actions/llm/cli-test-windows/action.yml"
-    #   - ".github/actions/llm/download-llm-binary/action.yml"
+    paths:
+      - "python/llm/**"
+      - ".github/workflows/llm_unit_tests.yml"
+      - ".github/workflows/llm-binary-build.yml"
+      - ".github/actions/llm/setup-llm-env/action.yml"
+      - ".github/actions/llm/remove-llm-env/action.yml"
+      - ".github/actions/llm/cli-test-linux/action.yml"
+      - ".github/actions/llm/cli-test-windows/action.yml"
+      - ".github/actions/llm/download-llm-binary/action.yml"
   workflow_dispatch:
   workflow_call:
 
@@ -109,7 +109,15 @@ jobs:
           echo "BLOOM_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_bloom_7b_q4_0.bin" >> "$GITHUB_ENV"
           echo "STARCODER_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_santacoder_1b_q4_0.bin" >> "$GITHUB_ENV"
           echo "CHATGLM_INT4_CKPT_PATH=${INT4_CKPT_DIR}/chatglm2-6b-q4_0.bin" >> "$GITHUB_ENV"
+      - name: Set access token
+        run: |
+          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+        with:
+          repository: "analytics-zoo/bigdl-llm-internal"
+          ref: "ipex-llm-20240308"
+          token: ${{ env.github_access_token }}
+          submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
         with:
@@ -254,9 +262,15 @@ jobs:
           echo "MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-Instruct-v0.1" >> "$GITHUB_ENV"
           echo "BAICHUAN2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Baichuan2-7B-Chat" >> "$GITHUB_ENV"
           echo "QWEN_7B_ORIGIN_PATH=${ORIGIN_DIR}/Qwen-7B-Chat" >> "$GITHUB_ENV"
-      - name: Checkout repo
-        uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
-
+      - name: Set access token
+        run: |
+          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+        with:
+          repository: "analytics-zoo/bigdl-llm-internal"
+          ref: "ipex-llm-20240308"
+          token: ${{ env.github_access_token }}
+          submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
         with:

From 27da1782cd053b85a57a3b95f2a4fade76a5e486 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Sun, 10 Mar 2024 12:09:23 +0800
Subject: [PATCH 03/40] echo

---
 .github/workflows/ipex-llm-nightly-test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ipex-llm-nightly-test.yml b/.github/workflows/ipex-llm-nightly-test.yml
index 3eedb0d2ed5..4d4d9bc0977 100644
--- a/.github/workflows/ipex-llm-nightly-test.yml
+++ b/.github/workflows/ipex-llm-nightly-test.yml
@@ -71,6 +71,7 @@ jobs:
       - name: Set access token
         run: |
           echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
+          echo ${{ env.github_access_token }}
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"

From 24ef2d19c90abe853c12db29b40ee2dab88e59c4 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Sun, 10 Mar 2024 12:10:51 +0800
Subject: [PATCH 04/40] update

---
 .github/workflows/ipex-llm-nightly-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex-llm-nightly-test.yml b/.github/workflows/ipex-llm-nightly-test.yml
index 4d4d9bc0977..32ae4d4578e 100644
--- a/.github/workflows/ipex-llm-nightly-test.yml
+++ b/.github/workflows/ipex-llm-nightly-test.yml
@@ -71,7 +71,7 @@ jobs:
       - name: Set access token
         run: |
           echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
-          echo ${{ env.github_access_token }}
+          echo $GITHUB_ACCESS_TOKEN
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"

From 27ca99847cf2a063c5a04dedbac71934a03d28cd Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Sun, 10 Mar 2024 12:28:08 +0800
Subject: [PATCH 05/40] update

---
 .github/workflows/ipex-llm-nightly-test.yml  | 95 --------------------
 .github/workflows/ipex_llm_example_tests.yml |  2 +-
 .github/workflows/ipex_llm_unit_tests.yml    |  2 +-
 3 files changed, 2 insertions(+), 97 deletions(-)

diff --git a/.github/workflows/ipex-llm-nightly-test.yml b/.github/workflows/ipex-llm-nightly-test.yml
index 32ae4d4578e..e2f5b38fc9a 100644
--- a/.github/workflows/ipex-llm-nightly-test.yml
+++ b/.github/workflows/ipex-llm-nightly-test.yml
@@ -24,101 +24,6 @@ on:
 
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
-  # llm-cpp-build:
-  #   uses: ./.github/workflows/llm-binary-build.yml
-  llm-nightly-convert-test:
-    # needs: llm-cpp-build
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - os: windows
-            instruction: AVX-VNNI-UT
-            python-version: "3.9"
-          - os: ubuntu-20.04-lts
-            instruction: avx512
-            python-version: "3.9"
-    runs-on: [self-hosted, llm, "${{matrix.instruction}}", "${{matrix.os}}"]
-    env:
-      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-    steps:
-      - name: Set model directories
-        shell: bash
-        run: |
-          echo "ORIGIN_DIR=$(pwd)/../llm/origin-models" >> "$GITHUB_ENV"
-          echo "INT4_CKPT_DIR=$(pwd)/../llm/nightly-converted-models" >> "$GITHUB_ENV"
-      - name: Create model directories
-        shell: bash
-        run: |
-          if [ ! -d $ORIGIN_DIR ]; then
-            mkdir -p $ORIGIN_DIR
-          fi
-          if [ ! -d $INT4_CKPT_DIR ]; then
-            mkdir -p $INT4_CKPT_DIR
-          fi
-      - name: Set environment variables
-        shell: bash
-        run: |
-          echo "LLAMA_ORIGIN_PATH=${ORIGIN_DIR}/llama-7b-hf" >> "$GITHUB_ENV"
-          echo "GPTNEOX_ORIGIN_PATH=${ORIGIN_DIR}/gptneox-7b-redpajama-bf16" >> "$GITHUB_ENV"
-          echo "BLOOM_ORIGIN_PATH=${ORIGIN_DIR}/bloomz-7b1" >> "$GITHUB_ENV"
-          echo "STARCODER_ORIGIN_PATH=${ORIGIN_DIR}/gpt_bigcode-santacoder" >> "$GITHUB_ENV"
-
-          echo "LLAMA_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_llama_q4_0.bin" >> "$GITHUB_ENV"
-          echo "GPTNEOX_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_gptneox_q4_0.bin" >> "$GITHUB_ENV"
-          echo "BLOOM_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_bloom_q4_0.bin" >> "$GITHUB_ENV"
-          echo "STARCODER_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_starcoder_q4_0.bin" >> "$GITHUB_ENV"
-      - name: Set access token
-        run: |
-          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
-          echo $GITHUB_ACCESS_TOKEN
-      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
-        with:
-          repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308"
-          token: ${{ env.github_access_token }}
-          submodules: "recursive"
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        shell: bash
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install --upgrade setuptools==58.0.4
-          python -m pip install --upgrade wheel
-
-      # - name: Download llm binary
-      #   uses: ./.github/actions/llm/download-llm-binary
-
-      # - name: Install BigDL-LLM
-      #   uses: ./.github/actions/llm/setup-llm-env
-
-      - name: Install IPEX-LLM from Pypi
-        shell: bash
-        run: |
-          pip install --pre --upgrade ipex-llm[all]
-
-      - name: Download original models & convert
-        uses: ./.github/actions/llm/convert-test
-
-      - name: Upload ckpt to ftp
-        shell: bash
-        if: runner.os == 'Linux' && github.event_name == 'schedule'
-        run: |
-          curl -T $LLAMA_INT4_CKPT_PATH ${LLM_FTP_URL}/llm/ggml-actions/nightly/bigdl_llm_llama_7b_q4_0.bin
-          curl -T $GPTNEOX_INT4_CKPT_PATH ${LLM_FTP_URL}/llm/ggml-actions/nightly/bigdl_llm_redpajama_7b_q4_0.bin
-          curl -T $BLOOM_INT4_CKPT_PATH ${LLM_FTP_URL}/llm/ggml-actions/nightly/bigdl_llm_bloom_7b_q4_0.bin
-          curl -T $STARCODER_INT4_CKPT_PATH ${LLM_FTP_URL}/llm/ggml-actions/nightly/bigdl_llm_santacoder_1b_q4_0.bin
-      - name: Delete ckpt
-        shell: bash
-        run: |
-          rm -rf $LLAMA_INT4_CKPT_PATH
-          rm -rf $GPTNEOX_INT4_CKPT_PATH
-          rm -rf $BLOOM_INT4_CKPT_PATH
-          rm -rf $STARCODER_INT4_CKPT_PATH
-
   llm-unit-tests:
     # needs: llm-cpp-build
     uses: ./.github/workflows/ipex_llm_unit_tests.yml
diff --git a/.github/workflows/ipex_llm_example_tests.yml b/.github/workflows/ipex_llm_example_tests.yml
index 05b5bcfa888..dce884e2bdb 100644
--- a/.github/workflows/ipex_llm_example_tests.yml
+++ b/.github/workflows/ipex_llm_example_tests.yml
@@ -51,7 +51,7 @@ jobs:
       - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # actions/checkout@v2
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308"
+          ref: "ipex-llm-20240308-action"
           token: ${{ env.github_access_token }}
           submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}
diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 80868f7ded9..692e7f76dad 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -115,7 +115,7 @@ jobs:
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308"
+          ref: "ipex-llm-20240308-action"
           token: ${{ env.github_access_token }}
           submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}

From b0b4df0403bf11552b0f8aa1b7c14879e9544867 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Sun, 10 Mar 2024 12:37:00 +0800
Subject: [PATCH 06/40] pip install pytest

---
 .github/workflows/ipex_llm_unit_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 692e7f76dad..a9d235a358e 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -218,7 +218,7 @@ jobs:
       - name: Run LLM inference test
         shell: bash
         run: |
-          python -m pip install einops datasets librosa openai-whisper
+          python -m pip install pytest einops datasets librosa openai-whisper
           bash python/llm/test/run-llm-inference-tests.sh
       - name: Run LLM langchain test
         shell: bash

From e7041d8ad6dd1aed46b44e26b7ac7bdbcbfc7c6b Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Sun, 10 Mar 2024 12:58:25 +0800
Subject: [PATCH 07/40] update

---
 .github/workflows/ipex_llm_unit_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index a9d235a358e..0591acde76c 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -116,7 +116,7 @@ jobs:
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
           ref: "ipex-llm-20240308-action"
-          token: ${{ env.github_access_token }}
+          token: ${GITHUB_ACCESS_TOKEN}
           submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4

From f653cd1c4ea9f78ccbb93c4b6dcab8ebd5b1a96b Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Sun, 10 Mar 2024 12:59:36 +0800
Subject: [PATCH 08/40] update

---
 .github/workflows/ipex_llm_unit_tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 0591acde76c..4365016c339 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -112,6 +112,7 @@ jobs:
       - name: Set access token
         run: |
           echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
+          env
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"

From 92e734c2a9d6d3609ae5d1af4bd4ae4c3e51f498 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Sun, 10 Mar 2024 13:05:48 +0800
Subject: [PATCH 09/40] update

---
 .github/workflows/ipex_llm_unit_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 4365016c339..93e10fc7429 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -112,7 +112,7 @@ jobs:
       - name: Set access token
         run: |
           echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
-          env
+          set
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"

From 09209a0fac45b9d569b666c484e195cc1edd4c20 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Sun, 10 Mar 2024 13:09:53 +0800
Subject: [PATCH 10/40] update

---
 .github/workflows/ipex_llm_unit_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 93e10fc7429..24fcc138e20 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -112,7 +112,7 @@ jobs:
       - name: Set access token
         run: |
           echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
-          set
+          Get-ChildItem Env:
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"

From f07f2516fc9c4dcda423fac23170083e9fcc0d70 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Sun, 10 Mar 2024 13:13:48 +0800
Subject: [PATCH 11/40] update

---
 .github/workflows/ipex_llm_unit_tests.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 24fcc138e20..a9d235a358e 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -112,12 +112,11 @@ jobs:
       - name: Set access token
         run: |
           echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
-          Get-ChildItem Env:
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
           ref: "ipex-llm-20240308-action"
-          token: ${GITHUB_ACCESS_TOKEN}
+          token: ${{ env.github_access_token }}
           submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4

From 9dd02b30c60e6d4ee93e88f5881c120204dee247 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 11:07:14 +0800
Subject: [PATCH 12/40] enable performance test

---
 .github/workflows/ipex-llm-nightly-test.yml      | 2 +-
 .github/workflows/ipex_llm_example_tests.yml     | 2 +-
 .github/workflows/ipex_llm_performance_tests.yml | 6 +++---
 .github/workflows/ipex_llm_unit_tests.yml        | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ipex-llm-nightly-test.yml b/.github/workflows/ipex-llm-nightly-test.yml
index e2f5b38fc9a..2418845a953 100644
--- a/.github/workflows/ipex-llm-nightly-test.yml
+++ b/.github/workflows/ipex-llm-nightly-test.yml
@@ -1,4 +1,4 @@
-name: LLM Nightly Tests
+name: NEW LLM Nightly Tests
 
 # Cancel previous runs in the PR when you push new commits
 concurrency:
diff --git a/.github/workflows/ipex_llm_example_tests.yml b/.github/workflows/ipex_llm_example_tests.yml
index dce884e2bdb..88007bb7b7e 100644
--- a/.github/workflows/ipex_llm_example_tests.yml
+++ b/.github/workflows/ipex_llm_example_tests.yml
@@ -1,4 +1,4 @@
-name: LLM Example Test
+name: NEW LLM Example Test
 
 # Cancel previous runs in the PR when you push new commits
 concurrency:
diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index 7136f98b015..b4397591105 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -1,4 +1,4 @@
-name: LLM Performance Test
+name: NEW LLM Performance Test
 
 # Cancel previous runs in the PR when you push new commits
 concurrency:
@@ -13,8 +13,8 @@ on:
   schedule:
     - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China
   # please uncomment it for PR tests
-  # pull_request:
-  #   branches: [main]
+  pull_request:
+    branches: [main]
   #   paths:
   #     - ".github/workflows/llm_performance_tests.yml"
   #     - "python/llm/test/benchmark/**"
diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index a9d235a358e..286f6a755c6 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -1,4 +1,4 @@
-name: LLM Unit Tests
+name: NEW LLM Unit Tests
 
 # Cancel previous runs in the PR when you push new commits
 concurrency:

From abfb535696dadd150d8199463eb2226ba69d9b3e Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 11:18:05 +0800
Subject: [PATCH 13/40] only run ut

---
 .github/workflows/ipex-llm-nightly-test.yml | 6 +++---
 .github/workflows/ipex_llm_unit_tests.yml   | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ipex-llm-nightly-test.yml b/.github/workflows/ipex-llm-nightly-test.yml
index 2418845a953..f4bc1fa9ff5 100644
--- a/.github/workflows/ipex-llm-nightly-test.yml
+++ b/.github/workflows/ipex-llm-nightly-test.yml
@@ -27,6 +27,6 @@ jobs:
   llm-unit-tests:
     # needs: llm-cpp-build
     uses: ./.github/workflows/ipex_llm_unit_tests.yml
-  llm-example-test:
-    # needs: llm-cpp-build
-    uses: ./.github/workflows/ipex_llm_example_tests.yml
+  # llm-example-test:
+  #   # needs: llm-cpp-build
+  #   uses: ./.github/workflows/ipex_llm_example_tests.yml
diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 286f6a755c6..1dcf64c21d3 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -116,7 +116,7 @@ jobs:
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
           ref: "ipex-llm-20240308-action"
-          token: ${{ env.github_access_token }}
+          token: "xxxxxx"
           submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
@@ -234,7 +234,7 @@ jobs:
           pip install transformers==4.31.0
           bash python/llm/test/run-llm-llamaindex-tests.sh
   llm-unit-test-on-arc:
-    needs: [setup-python-version]
+    needs: [setup-python-version, llm-unit-test]
     strategy:
       fail-fast: false
       matrix:

From 4e9a5e4e1c34face9c14d62c67cb344cf2bdc9a0 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 11:41:57 +0800
Subject: [PATCH 14/40] Set access token for windows and ubuntu separately

---
 .github/workflows/ipex_llm_unit_tests.yml | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 1dcf64c21d3..8e793562810 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -109,14 +109,27 @@ jobs:
           echo "BLOOM_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_bloom_7b_q4_0.bin" >> "$GITHUB_ENV"
           echo "STARCODER_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_santacoder_1b_q4_0.bin" >> "$GITHUB_ENV"
           echo "CHATGLM_INT4_CKPT_PATH=${INT4_CKPT_DIR}/chatglm2-6b-q4_0.bin" >> "$GITHUB_ENV"
-      - name: Set access token
+      # - name: Set access token
+      #   run: |
+      #     echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
+
+
+      - name: Set access token for Ubuntu
+        if: matrix.os == 'ubuntu-20.04-lts'
         run: |
-          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
+          echo "github_access_token=${{ secrets.GITHUB_ACCESS_TOKEN }}" >> "$GITHUB_ENV"
+
+      - name: Set access token for Windows
+        if: matrix.os == 'windows'
+        run: |
+          echo "github_access_token=${{ secrets.GITHUB_ACCESS_TOKEN }}" | Out-File -FilePath $Env:GITHUB_ENV -Append
+        shell: powershell
+
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
           ref: "ipex-llm-20240308-action"
-          token: "xxxxxx"
+          token: ${{ env.github_access_token }}
           submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4

From e992cdbc34eb6324cb149481f35a1036ab408b5d Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 11:49:07 +0800
Subject: [PATCH 15/40] fix

---
 .github/workflows/ipex_llm_unit_tests.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 8e793562810..1fbdee09d5f 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -117,12 +117,12 @@ jobs:
       - name: Set access token for Ubuntu
         if: matrix.os == 'ubuntu-20.04-lts'
         run: |
-          echo "github_access_token=${{ secrets.GITHUB_ACCESS_TOKEN }}" >> "$GITHUB_ENV"
+          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
 
       - name: Set access token for Windows
         if: matrix.os == 'windows'
         run: |
-          echo "github_access_token=${{ secrets.GITHUB_ACCESS_TOKEN }}" | Out-File -FilePath $Env:GITHUB_ENV -Append
+          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" | Out-File -FilePath $Env:GITHUB_ENV -Append
         shell: powershell
 
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3

From ed0460e15c90636292a702817816a4a8b8b76ce8 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 12:05:04 +0800
Subject: [PATCH 16/40] try

---
 .github/workflows/ipex_llm_unit_tests.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 1fbdee09d5f..a3ce1923a77 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -122,8 +122,8 @@ jobs:
       - name: Set access token for Windows
         if: matrix.os == 'windows'
         run: |
-          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" | Out-File -FilePath $Env:GITHUB_ENV -Append
-        shell: powershell
+          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> $Env:GITHUB_ENV
+        shell: cmd
 
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:

From c084e07bece197974dd00b2c70021fa84437d401 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 14:00:18 +0800
Subject: [PATCH 17/40] test token

---
 .../workflows/ipex_llm_performance_tests.yml  |  4 +--
 .github/workflows/ipex_llm_unit_tests.yml     | 34 ++++++++++++++++++-
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index b4397591105..a0a58444107 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -13,8 +13,8 @@ on:
   schedule:
     - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China
   # please uncomment it for PR tests
-  pull_request:
-    branches: [main]
+  # pull_request:
+  #   branches: [main]
   #   paths:
   #     - ".github/workflows/llm_performance_tests.yml"
   #     - "python/llm/test/benchmark/**"
diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index a3ce1923a77..0debe412f1b 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -55,7 +55,9 @@ jobs:
           fi
           list=$(echo ${python_version} | jq -c)
           echo "python-version=${list}" >> "$GITHUB_OUTPUT"
-  llm-unit-test:
+
+
+llm-unit-test:
     needs: [setup-python-version]
     strategy:
       fail-fast: false
@@ -68,6 +70,36 @@ jobs:
           - os: ubuntu-20.04-lts
             instruction: avx512
     runs-on: [self-hosted, llm, "${{matrix.instruction}}", "${{matrix.os}}"]
+    env:
+      THREAD_NUM: 24
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+    steps:
+      - name: Test GITHUB_ACCESS_TOKEN on Windows
+        if: matrix.os == 'windows'
+        shell: powershell
+        run: |
+          $token = "${GITHUB_ACCESS_TOKEN}"
+          $headers = @{Authorization = "token $token"}
+          $user = Invoke-RestMethod -Uri https://api.github.com/user -Method Get -Headers $headers
+          if ($user.login) {
+              Write-Output "Token is valid. User login: $($user.login)"
+          } else {
+              Write-Output "Token is invalid or not set."
+          }
+
+  llm-unit-test:
+    needs: [setup-python-version, xxx]
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [windows, ubuntu-20.04-lts]
+        python-version: ${{ fromJson(needs.setup-python-version.outputs.python-version) }}
+        include:
+          - os: windows
+            instruction: AVX-VNNI-UT
+          - os: ubuntu-20.04-lts
+            instruction: avx512
+    runs-on: [self-hosted, llm, "${{matrix.instruction}}", "${{matrix.os}}"]
     env:
       THREAD_NUM: 24
       ANALYTICS_ZOO_ROOT: ${{ github.workspace }}

From 84bfd7d9bcaabecc794149de65aaddd5738ec4f5 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 14:11:00 +0800
Subject: [PATCH 18/40] fix

---
 .github/workflows/ipex_llm_unit_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 0debe412f1b..73f282b0a53 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -57,7 +57,7 @@ jobs:
           echo "python-version=${list}" >> "$GITHUB_OUTPUT"
 
 
-llm-unit-test:
+  token-test:
     needs: [setup-python-version]
     strategy:
       fail-fast: false

From c16aa8c8e1c3761a9897ff9aa536d346e126dcda Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 14:12:30 +0800
Subject: [PATCH 19/40] fix

---
 .github/workflows/ipex_llm_unit_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 73f282b0a53..b9f44f246ec 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -88,7 +88,7 @@ jobs:
           }
 
   llm-unit-test:
-    needs: [setup-python-version, xxx]
+    needs: [setup-python-version, token-test]
     strategy:
       fail-fast: false
       matrix:

From 0a59fec998e74f46f01ef3b13f0f680a3c23d41f Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 14:14:02 +0800
Subject: [PATCH 20/40] test

---
 .github/workflows/ipex_llm_unit_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index b9f44f246ec..289a0e72876 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -66,7 +66,7 @@ jobs:
         python-version: ${{ fromJson(needs.setup-python-version.outputs.python-version) }}
         include:
           - os: windows
-            instruction: AVX-VNNI-UT
+            instruction: AVX-VNNI-Build
           - os: ubuntu-20.04-lts
             instruction: avx512
     runs-on: [self-hosted, llm, "${{matrix.instruction}}", "${{matrix.os}}"]

From 017b858eb67798e378c87c4b7714001c569c8725 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 14:16:03 +0800
Subject: [PATCH 21/40] update

---
 .github/workflows/ipex_llm_unit_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 289a0e72876..81b16fe4b50 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -66,7 +66,7 @@ jobs:
         python-version: ${{ fromJson(needs.setup-python-version.outputs.python-version) }}
         include:
           - os: windows
-            instruction: AVX-VNNI-Build
+            instruction: accuracy
           - os: ubuntu-20.04-lts
             instruction: avx512
     runs-on: [self-hosted, llm, "${{matrix.instruction}}", "${{matrix.os}}"]

From 0f8c7e8b655155bbe1eeb93ad9d3e7581c0df891 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 14:20:56 +0800
Subject: [PATCH 22/40] update

---
 .github/workflows/ipex_llm_unit_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 81b16fe4b50..7b8ee105d8c 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -66,7 +66,7 @@ jobs:
         python-version: ${{ fromJson(needs.setup-python-version.outputs.python-version) }}
         include:
           - os: windows
-            instruction: accuracy
+            instruction: gen13
           - os: ubuntu-20.04-lts
             instruction: avx512
     runs-on: [self-hosted, llm, "${{matrix.instruction}}", "${{matrix.os}}"]

From d3bd49b533bbe8165e31afa2a3e38e80e9b2f3cf Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 16:06:34 +0800
Subject: [PATCH 23/40] update

---
 .github/workflows/ipex_llm_performance_tests.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index a0a58444107..de46d4efc33 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -13,8 +13,8 @@ on:
   schedule:
     - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China
   # please uncomment it for PR tests
-  # pull_request:
-  #   branches: [main]
+  pull_request:
+    branches: [main]
   #   paths:
   #     - ".github/workflows/llm_performance_tests.yml"
   #     - "python/llm/test/benchmark/**"
@@ -230,7 +230,7 @@ jobs:
           cd python/llm/dev/benchmark/all-in-one
           export http_proxy=${HTTP_PROXY}
           export https_proxy=${HTTPS_PROXY}
-          source bigdl-llm-init -t
+          source ipex-llm-init -t
           export OMP_NUM_THREADS=48
           # hide time info
           sed -i 's/str(end - st)/"xxxxxx"/g' run.py

From f63019cb40c081d49bb106a6cb39a35466577425 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 16:27:38 +0800
Subject: [PATCH 24/40] test

---
 .github/workflows/ipex_llm_unit_tests.yml | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 7b8ee105d8c..63bc627a3b6 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -74,18 +74,12 @@ jobs:
       THREAD_NUM: 24
       ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
     steps:
-      - name: Test GITHUB_ACCESS_TOKEN on Windows
+      - name: Test SSH_SOCKS_PROXY on Windows
         if: matrix.os == 'windows'
-        shell: powershell
         run: |
-          $token = "${GITHUB_ACCESS_TOKEN}"
-          $headers = @{Authorization = "token $token"}
-          $user = Invoke-RestMethod -Uri https://api.github.com/user -Method Get -Headers $headers
-          if ($user.login) {
-              Write-Output "Token is valid. User login: $($user.login)"
-          } else {
-              Write-Output "Token is invalid or not set."
-          }
+          echo ("BRANCH_NAME=${SSH_SOCKS_PROXY}") >> $env:GITHUB_ENV
+      - run: echo "${{ env.BRANCH_NAME }}"
+      - run: echo $env:BRANCH_NAME
 
   llm-unit-test:
     needs: [setup-python-version, token-test]

From 9c6d6f85302de55a23b0d34d8f0e3382ffc84aef Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 16:28:50 +0800
Subject: [PATCH 25/40] test

---
 .github/workflows/ipex_llm_performance_tests.yml | 4 ++--
 .github/workflows/ipex_llm_unit_tests.yml        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index de46d4efc33..bb04523fc06 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -13,8 +13,8 @@ on:
   schedule:
     - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China
   # please uncomment it for PR tests
-  pull_request:
-    branches: [main]
+  # pull_request:
+  #   branches: [main]
   #   paths:
   #     - ".github/workflows/llm_performance_tests.yml"
   #     - "python/llm/test/benchmark/**"
diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 63bc627a3b6..488c93129ad 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -66,7 +66,7 @@ jobs:
         python-version: ${{ fromJson(needs.setup-python-version.outputs.python-version) }}
         include:
           - os: windows
-            instruction: gen13
+            instruction: AVX-VNNI-UT
           - os: ubuntu-20.04-lts
             instruction: avx512
     runs-on: [self-hosted, llm, "${{matrix.instruction}}", "${{matrix.os}}"]

From 86fcf3be6b31faf362a327497e773495c3cc45df Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 16:54:09 +0800
Subject: [PATCH 26/40] test

---
 .github/workflows/ipex_llm_unit_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 488c93129ad..2152f4e0dfc 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -77,7 +77,7 @@ jobs:
       - name: Test SSH_SOCKS_PROXY on Windows
         if: matrix.os == 'windows'
         run: |
-          echo ("BRANCH_NAME=${SSH_SOCKS_PROXY}") >> $env:GITHUB_ENV
+          echo "BRANCH_NAME=xxx" >> $env:GITHUB_ENV
       - run: echo "${{ env.BRANCH_NAME }}"
       - run: echo $env:BRANCH_NAME
 

From 777fcaaf3704a6aeb7d4dfbfe7fdb6d65afb807e Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 17:09:03 +0800
Subject: [PATCH 27/40] test

---
 .github/workflows/ipex_llm_unit_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 2152f4e0dfc..318976af3d9 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -77,7 +77,7 @@ jobs:
       - name: Test SSH_SOCKS_PROXY on Windows
         if: matrix.os == 'windows'
         run: |
-          echo "BRANCH_NAME=xxx" >> $env:GITHUB_ENV
+          echo "BRANCH_NAME=$env:SSH_SOCKS_PROXY" >> $env:GITHUB_ENV
       - run: echo "${{ env.BRANCH_NAME }}"
       - run: echo $env:BRANCH_NAME
 

From 9885abedc903a4278acef92bab8fdeb24bf4b845 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 17:34:37 +0800
Subject: [PATCH 28/40] test

---
 .github/workflows/ipex_llm_unit_tests.yml | 51 +++++++++++------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 318976af3d9..66235f0f6e4 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -57,32 +57,32 @@ jobs:
           echo "python-version=${list}" >> "$GITHUB_OUTPUT"
 
 
-  token-test:
-    needs: [setup-python-version]
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [windows, ubuntu-20.04-lts]
-        python-version: ${{ fromJson(needs.setup-python-version.outputs.python-version) }}
-        include:
-          - os: windows
-            instruction: AVX-VNNI-UT
-          - os: ubuntu-20.04-lts
-            instruction: avx512
-    runs-on: [self-hosted, llm, "${{matrix.instruction}}", "${{matrix.os}}"]
-    env:
-      THREAD_NUM: 24
-      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-    steps:
-      - name: Test SSH_SOCKS_PROXY on Windows
-        if: matrix.os == 'windows'
-        run: |
-          echo "BRANCH_NAME=$env:SSH_SOCKS_PROXY" >> $env:GITHUB_ENV
-      - run: echo "${{ env.BRANCH_NAME }}"
-      - run: echo $env:BRANCH_NAME
+  # token-test:
+  #   needs: [setup-python-version]
+  #   strategy:
+  #     fail-fast: false
+  #     matrix:
+  #       os: [windows, ubuntu-20.04-lts]
+  #       python-version: ${{ fromJson(needs.setup-python-version.outputs.python-version) }}
+  #       include:
+  #         - os: windows
+  #           instruction: AVX-VNNI-UT
+  #         - os: ubuntu-20.04-lts
+  #           instruction: avx512
+  #   runs-on: [self-hosted, llm, "${{matrix.instruction}}", "${{matrix.os}}"]
+  #   env:
+  #     THREAD_NUM: 24
+  #     ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+  #   steps:
+  #     - name: Test SSH_SOCKS_PROXY on Windows
+  #       if: matrix.os == 'windows'
+  #       run: |
+  #         echo "BRANCH_NAME=$env:SSH_SOCKS_PROXY" >> $env:GITHUB_ENV
+  #     - run: echo "${{ env.BRANCH_NAME }}"
+  #     - run: echo $env:BRANCH_NAME
 
   llm-unit-test:
-    needs: [setup-python-version, token-test]
+    needs: [setup-python-version]
     strategy:
       fail-fast: false
       matrix:
@@ -148,8 +148,7 @@ jobs:
       - name: Set access token for Windows
         if: matrix.os == 'windows'
         run: |
-          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> $Env:GITHUB_ENV
-        shell: cmd
+          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
 
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:

From 26df2a74f3ec02d5bc38defc57b4d2444fb8f0c0 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 17:37:33 +0800
Subject: [PATCH 29/40] update

---
 .github/workflows/ipex_llm_example_tests.yml | 2 +-
 .github/workflows/ipex_llm_unit_tests.yml    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ipex_llm_example_tests.yml b/.github/workflows/ipex_llm_example_tests.yml
index 88007bb7b7e..e2d16f4264d 100644
--- a/.github/workflows/ipex_llm_example_tests.yml
+++ b/.github/workflows/ipex_llm_example_tests.yml
@@ -51,7 +51,7 @@ jobs:
       - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # actions/checkout@v2
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308-action"
+          ref: "ipex-llm-20240308"
           token: ${{ env.github_access_token }}
           submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}
diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 66235f0f6e4..0d4ee9c97fe 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -153,7 +153,7 @@ jobs:
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308-action"
+          ref: "ipex-llm-20240308"
           token: ${{ env.github_access_token }}
           submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}

From bd748b27f606e23fff854609bf24eb3e83769ec0 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 17:39:26 +0800
Subject: [PATCH 30/40] update

---
 .github/workflows/ipex_llm_performance_tests.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index bb04523fc06..f1be6f4fc95 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -13,8 +13,8 @@ on:
   schedule:
     - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China
   # please uncomment it for PR tests
-  # pull_request:
-  #   branches: [main]
+  pull_request:
+    branches: [main]
   #   paths:
   #     - ".github/workflows/llm_performance_tests.yml"
   #     - "python/llm/test/benchmark/**"
@@ -264,7 +264,7 @@ jobs:
     steps:
       - name: Set access token
         run: |
-          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
+          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
@@ -339,7 +339,7 @@ jobs:
     steps:
       - name: Set access token
         run: |
-          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
+          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"

From 7e8cfc6a9117ef8685b1cc870c1be6b2324aa9fe Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 18:00:30 +0800
Subject: [PATCH 31/40] revert

---
 .github/workflows/ipex-llm-nightly-test.yml      | 6 +++---
 .github/workflows/ipex_llm_performance_tests.yml | 5 -----
 .github/workflows/ipex_llm_unit_tests.yml        | 6 +-----
 3 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/ipex-llm-nightly-test.yml b/.github/workflows/ipex-llm-nightly-test.yml
index f4bc1fa9ff5..2418845a953 100644
--- a/.github/workflows/ipex-llm-nightly-test.yml
+++ b/.github/workflows/ipex-llm-nightly-test.yml
@@ -27,6 +27,6 @@ jobs:
   llm-unit-tests:
     # needs: llm-cpp-build
     uses: ./.github/workflows/ipex_llm_unit_tests.yml
-  # llm-example-test:
-  #   # needs: llm-cpp-build
-  #   uses: ./.github/workflows/ipex_llm_example_tests.yml
+  llm-example-test:
+    # needs: llm-cpp-build
+    uses: ./.github/workflows/ipex_llm_example_tests.yml
diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index f1be6f4fc95..2552e0e39eb 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -399,11 +399,6 @@ jobs:
           pip install --upgrade tiktoken einops transformers_stream_generator
 
           pip install --pre --upgrade ipex-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
-          # pip show bigdl-llm | findstr %TEST_VERSION_DATE%
-          # if %ERRORLEVEL% neq 0 (
-          #   echo "Did not install bigdl-llm with excepted version %TEST_VERSION_DATE%"
-          #   exit /b 1
-          # )
           pip list
 
           call conda deactivate
diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 0d4ee9c97fe..f26f9204463 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -135,10 +135,6 @@ jobs:
           echo "BLOOM_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_bloom_7b_q4_0.bin" >> "$GITHUB_ENV"
           echo "STARCODER_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_santacoder_1b_q4_0.bin" >> "$GITHUB_ENV"
           echo "CHATGLM_INT4_CKPT_PATH=${INT4_CKPT_DIR}/chatglm2-6b-q4_0.bin" >> "$GITHUB_ENV"
-      # - name: Set access token
-      #   run: |
-      #     echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
-
 
       - name: Set access token for Ubuntu
         if: matrix.os == 'ubuntu-20.04-lts'
@@ -272,7 +268,7 @@ jobs:
           pip install transformers==4.31.0
           bash python/llm/test/run-llm-llamaindex-tests.sh
   llm-unit-test-on-arc:
-    needs: [setup-python-version, llm-unit-test]
+    needs: [setup-python-version]
     strategy:
       fail-fast: false
       matrix:

From ec372650f72626db31e5e89f2de93f712a8c8ef4 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Wed, 13 Mar 2024 23:11:48 +0800
Subject: [PATCH 32/40] test

---
 .github/workflows/ipex-llm-nightly-test.yml   |    4 +-
 .../workflows/ipex_llm_performance_tests.yml  | 1330 ++++++++---------
 2 files changed, 667 insertions(+), 667 deletions(-)

diff --git a/.github/workflows/ipex-llm-nightly-test.yml b/.github/workflows/ipex-llm-nightly-test.yml
index 2418845a953..82de7ac2c9a 100644
--- a/.github/workflows/ipex-llm-nightly-test.yml
+++ b/.github/workflows/ipex-llm-nightly-test.yml
@@ -12,8 +12,8 @@ permissions:
 on:
   schedule:
     - cron: "00 13 * * *" # GMT time, 13:00 GMT == 21:00 China
-  pull_request:
-    branches: [main]
+  # pull_request:
+  #   branches: [main]
     # paths:
     #   - ".github/workflows/llm-nightly-test.yml"
     #   - ".github/actions/llm/setup-llm-env/action.yml"
diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index 2552e0e39eb..8d2213c8683 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -95,22 +95,22 @@ jobs:
           source /opt/intel/oneapi/setvars.sh
           bash python/llm/test/run-llm-install-tests.sh
 
-      - name: Test on xpu(transformers==4.31.0)
-        shell: bash
-        run: |
-          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
-          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
-
-          source /opt/intel/oneapi/setvars.sh
-          export USE_XETLA=OFF
-          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-          cp python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
-          cd python/llm/dev/benchmark/all-in-one
-          # hide time info
-          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
-          # change csv name
-          sed -i 's/{today}/{today}_test1/g' run.py
-          python run.py
+      # - name: Test on xpu(transformers==4.31.0)
+      #   shell: bash
+      #   run: |
+      #     date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+      #     sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+
+      #     source /opt/intel/oneapi/setvars.sh
+      #     export USE_XETLA=OFF
+      #     export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+      #     cp python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+      #     cd python/llm/dev/benchmark/all-in-one
+      #     # hide time info
+      #     sed -i 's/str(end - st)/"xxxxxx"/g' run.py
+      #     # change csv name
+      #     sed -i 's/{today}/{today}_test1/g' run.py
+      #     python run.py
 
       - name: Test on xpu(transformers==4.34.0)
         shell: bash
@@ -165,653 +165,653 @@ jobs:
             curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/
           fi
           
-  llm-performance-test-on-spr:
-    # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-spr' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
-    # needs: llm-cpp-build # please uncomment it for PR tests
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.9"]
-    runs-on: [self-hosted, llm, spr01-perf]
-    env:
-      OMP_NUM_THREADS: 16
-      THREAD_NUM: 16
-      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-    steps:
-      - name: Set access token
-        run: |
-          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
-      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
-        with:
-          repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308"
-          token: ${{ env.github_access_token }}
-          submodules: "recursive"
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install dependencies
-        shell: bash
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install --upgrade wheel
-          python -m pip install --upgrade omegaconf
-          python -m pip install --upgrade pandas
-          python -m pip install --upgrade einops
-          python -m pip install --upgrade tiktoken
-          python -m pip install --upgrade transformers_stream_generator
-
-      # please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests
-      # - name: Download llm binary
-      #   uses: ./.github/actions/llm/download-llm-binary
-
-      # - name: Run LLM install (all) test
-      #   uses: ./.github/actions/llm/setup-llm-env
-
-      - name: Install BigDL-LLM from Pypi
-        shell: bash
-        run: |
-          pip install --pre --upgrade ipex-llm[all] -f https://developer.intel.com/ipex-whl-stable-xpu
-          # test_version_date=`date -d 'yesterday' '+%Y%m%d'`
-          # if ! pip show bigdl-llm | grep $test_version_date; then
-          #   echo "Did not install bigdl-llm with excepted version $test_version_date"
-          #   exit 1
-          # fi
-
-      - name: Test on cpu
-        shell: bash
-        run: |
-          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
-          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
-
-          mv python/llm/test/benchmark/cpu-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
-          cd python/llm/dev/benchmark/all-in-one
-          export http_proxy=${HTTP_PROXY}
-          export https_proxy=${HTTPS_PROXY}
-          source ipex-llm-init -t
-          export OMP_NUM_THREADS=48
-          # hide time info
-          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
-          python run.py
-          cp ./*.csv /models/nightly_perf_cpu
-          cd ../../../test/benchmark
-          python -m pip install pandas==1.5.3
-          python csv_to_html.py -f /models/nightly_perf_cpu
-          cd /models/nightly_perf_cpu
-          for f in *.html; do
-            curl -T "$f" ${LLM_FTP_URL}/llm/nightly_perf/nightly_perf_cpu/
-          done
-
-  llm-performance-test-on-core:
-    # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-core' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
-    # needs: llm-cpp-build # please uncomment it for PR tests
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - os: windows
-            platform: dp
-            python-version: "3.9"
-          # - os: windows
-          #   platform: lp
-          #   python-version: "3.9"
-    runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-core, "${{ matrix.platform }}"]
-    env:
-      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-      CSV_SAVE_PATH: ${{ github.event.schedule && 'D:/action-runners/nightly_perf_core_' || 'D:/action-runners/pr_perf_core_' }}${{ matrix.platform }}/
-    steps:
-      - name: Set access token
-        run: |
-          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
-      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
-        with:
-          repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308"
-          token: ${{ env.github_access_token }}
-          submodules: "recursive"
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install dependencies
-        shell: bash
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install --upgrade wheel
-          python -m pip install --upgrade omegaconf pandas
-          python -m pip install --upgrade tiktoken einops transformers_stream_generator
+  # llm-performance-test-on-spr:
+  #   # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-spr' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
+  #   # needs: llm-cpp-build # please uncomment it for PR tests
+  #   strategy:
+  #     fail-fast: false
+  #     matrix:
+  #       python-version: ["3.9"]
+  #   runs-on: [self-hosted, llm, spr01-perf]
+  #   env:
+  #     OMP_NUM_THREADS: 16
+  #     THREAD_NUM: 16
+  #     ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+  #   steps:
+  #     - name: Set access token
+  #       run: |
+  #         echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
+  #     - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+  #       with:
+  #         repository: "analytics-zoo/bigdl-llm-internal"
+  #         ref: "ipex-llm-20240308"
+  #         token: ${{ env.github_access_token }}
+  #         submodules: "recursive"
+  #     - name: Set up Python ${{ matrix.python-version }}
+  #       uses: actions/setup-python@v4
+  #       with:
+  #         python-version: ${{ matrix.python-version }}
+
+  #     - name: Install dependencies
+  #       shell: bash
+  #       run: |
+  #         python -m pip install --upgrade pip
+  #         python -m pip install --upgrade wheel
+  #         python -m pip install --upgrade omegaconf
+  #         python -m pip install --upgrade pandas
+  #         python -m pip install --upgrade einops
+  #         python -m pip install --upgrade tiktoken
+  #         python -m pip install --upgrade transformers_stream_generator
+
+  #     # please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests
+  #     # - name: Download llm binary
+  #     #   uses: ./.github/actions/llm/download-llm-binary
+
+  #     # - name: Run LLM install (all) test
+  #     #   uses: ./.github/actions/llm/setup-llm-env
+
+  #     - name: Install BigDL-LLM from Pypi
+  #       shell: bash
+  #       run: |
+  #         pip install --pre --upgrade ipex-llm[all] -f https://developer.intel.com/ipex-whl-stable-xpu
+  #         # test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+  #         # if ! pip show bigdl-llm | grep $test_version_date; then
+  #         #   echo "Did not install bigdl-llm with excepted version $test_version_date"
+  #         #   exit 1
+  #         # fi
+
+  #     - name: Test on cpu
+  #       shell: bash
+  #       run: |
+  #         date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+  #         sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+
+  #         mv python/llm/test/benchmark/cpu-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+  #         cd python/llm/dev/benchmark/all-in-one
+  #         export http_proxy=${HTTP_PROXY}
+  #         export https_proxy=${HTTPS_PROXY}
+  #         source ipex-llm-init -t
+  #         export OMP_NUM_THREADS=48
+  #         # hide time info
+  #         sed -i 's/str(end - st)/"xxxxxx"/g' run.py
+  #         python run.py
+  #         cp ./*.csv /models/nightly_perf_cpu
+  #         cd ../../../test/benchmark
+  #         python -m pip install pandas==1.5.3
+  #         python csv_to_html.py -f /models/nightly_perf_cpu
+  #         cd /models/nightly_perf_cpu
+  #         for f in *.html; do
+  #           curl -T "$f" ${LLM_FTP_URL}/llm/nightly_perf/nightly_perf_cpu/
+  #         done
+
+  # llm-performance-test-on-core:
+  #   # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-core' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
+  #   # needs: llm-cpp-build # please uncomment it for PR tests
+  #   strategy:
+  #     fail-fast: false
+  #     matrix:
+  #       include:
+  #         - os: windows
+  #           platform: dp
+  #           python-version: "3.9"
+  #         # - os: windows
+  #         #   platform: lp
+  #         #   python-version: "3.9"
+  #   runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-core, "${{ matrix.platform }}"]
+  #   env:
+  #     ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+  #     CSV_SAVE_PATH: ${{ github.event.schedule && 'D:/action-runners/nightly_perf_core_' || 'D:/action-runners/pr_perf_core_' }}${{ matrix.platform }}/
+  #   steps:
+  #     - name: Set access token
+  #       run: |
+  #         echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
+  #     - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+  #       with:
+  #         repository: "analytics-zoo/bigdl-llm-internal"
+  #         ref: "ipex-llm-20240308"
+  #         token: ${{ env.github_access_token }}
+  #         submodules: "recursive"
+
+  #     - name: Set up Python ${{ matrix.python-version }}
+  #       uses: actions/setup-python@v4
+  #       with:
+  #         python-version: ${{ matrix.python-version }}
+
+  #     - name: Install dependencies
+  #       shell: bash
+  #       run: |
+  #         python -m pip install --upgrade pip
+  #         python -m pip install --upgrade wheel
+  #         python -m pip install --upgrade omegaconf pandas
+  #         python -m pip install --upgrade tiktoken einops transformers_stream_generator
     
-      # please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests
-      # - name: Download llm binary
-      #   uses: ./.github/actions/llm/download-llm-binary
-
-      # - name: Run LLM install (all) test
-      #   uses: ./.github/actions/llm/setup-llm-env
-
-      - name: Install BigDL-LLM from Pypi
-        shell: bash
-        run: |
-          pip install --pre --upgrade ipex-llm[all] -f https://developer.intel.com/ipex-whl-stable-xpu
-          # test_version_date=`date -d 'yesterday' '+%Y%m%d'`
-          # if ! pip show bigdl-llm | grep $test_version_date; then
-          #   echo "Did not install bigdl-llm with excepted version $test_version_date"
-          #   exit 1
-          # fi
-
-      - name: Test on core ${{ matrix.platform }}
-        shell: bash
-        run: |
-          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
-          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
-
-          mv python/llm/test/benchmark/core-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
-          cd python/llm/dev/benchmark/all-in-one
-          export http_proxy=${HTTP_PROXY}
-          export https_proxy=${HTTPS_PROXY}
-          # hide time info
-          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
-          python run.py
-          cp ./*.csv $CSV_SAVE_PATH
-          cd ../../../test/benchmark
-          python -m pip install pandas==1.5.3
-          python csv_to_html.py -f $CSV_SAVE_PATH
-          cd ../../dev/benchmark/all-in-one/
-          if [ ${{ github.event.schedule}} ]; then
-            curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/core_${{ matrix.platform }}/
-          fi
-
-  llm-performance-test-on-igpu:
-    # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
-    # needs: llm-cpp-build # please uncomment it for PR tests
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - os: windows
-            python-version: "3.9"
-    runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-igpu]
-    env:
-      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-    steps:
-      - name: Set access token
-        run: |
-          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
-      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
-        with:
-          repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308"
-          token: ${{ env.github_access_token }}
-          submodules: "recursive"
-
-      # TODO: Put the bigdl-llm related install process for win gpu into a action function
-
-      # Please uncomment it and commment the install from pypi for PR tests
-      # - name: Download llm binary
-      #   uses: ./.github/actions/llm/download-llm-binary
-
-      # - name: Prepare for install bigdl-llm from source
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/"bigdl-core-xe-21==" + VERSION + "/"bigdl-core-xe-21/g' python/llm/setup.py
-      #     sed -i 's/"bigdl-core-xe-21==" + VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py
-
-      # - name: Install bigdl-llm and other related packages (install from source)
-      #   shell: cmd
-      #   run: |
-      #     call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
-      #     call conda activate igpu-perf
-
-      #     pip install --upgrade pip
-      #     pip install --upgrade wheel
-      #     pip install --upgrade omegaconf pandas
-      #     pip install --upgrade tiktoken einops transformers_stream_generator
-
-      #     cd python\llm
-      #     python setup.py clean --all bdist_wheel --win
-      #     if not exist dist\bigdl_llm*.whl (exit /b 1)
-      #     for %%i in (dist\bigdl_llm*.whl) do set whl_name=%%i
-
-      #     pip install --pre --upgrade %whl_name%[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
-      #     pip list
-
-      #     call conda deactivate
-
-      - name: Determine desired bigdl-llm version
-        shell: bash
-        run: |
-          test_version_date=`date -d 'yesterday' '+%Y%m%d'`
-          echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV"
-
-      - name: Install bigdl-llm and other related packages (install from pypi)
-        shell: cmd
-        run: |
-          call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
-          call conda activate igpu-perf
-
-          pip install --upgrade pip
-          pip install --upgrade wheel
-          pip install --upgrade omegaconf pandas
-          pip install --upgrade tiktoken einops transformers_stream_generator
-
-          pip install --pre --upgrade ipex-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
-          pip list
-
-          call conda deactivate
-
-      - name: Create env for html generation
-        shell: cmd
-        run: |
-          call conda create -n html-gen python=3.9 -y
-          call conda activate html-gen
-
-          pip install pandas==1.5.3
-          pip install Jinja2
-
-          call conda deactivate
-
-      - name: Set directory envs & and fix generated csv date name
-        shell: bash
-        run: |
-          if [ ${{ github.event_name }} == 'schedule' ]; then
-            echo "CSV_SAVE_PATH=${CSV_NIGHTLY_PATH}" >> "$GITHUB_ENV"
-          else
-            echo "CSV_SAVE_PATH=${CSV_PR_PATH}" >> "$GITHUB_ENV"
-          fi
-          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
-          echo "LOG_FILE=${date_for_test_version}_output.txt" >> "$GITHUB_ENV"
-
-          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
-
-      - name: Prepare igpu perf test (32-32)
-        shell: bash
-        run: |
-          # hide time info
-          # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32.yaml
-
-      - name: Test on igpu (32-32)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
-          REM for llava
-          set TRANSFORMERS_OFFLINE=1
-
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\32-32.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-
-          call conda deactivate
-
-      - name: Prepare igpu perf test for Mistral (32-32)
-        shell: bash
-        run: |
-          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml
-
-      - name: Test on igpu for Mistral (32-32)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.34.0
-
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
-
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-
-          call conda deactivate
-
-      - name: Prepare igpu perf test for Qwen1.5 (32-32)
-        shell: bash
-        run: |
-          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_437.yaml
-
-      - name: Test on igpu for Qwen1.5 (32-32)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.37.0
-
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
-
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\32-32_437.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-
-          call conda deactivate
-
-      - name: Concat csv and generate html (32-32)
-        shell: cmd
-        run: |
-          call conda activate html-gen
-
-          cd python\llm\dev\benchmark\all-in-one
-          python ..\..\..\test\benchmark\concat_csv.py
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-          del /q *test*.csv
-          move *.csv %CSV_SAVE_PATH%\32-32\
-          cd ..\..\..\test\benchmark
-          python csv_to_html.py -f %CSV_SAVE_PATH%\32-32\
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-          move %CSV_SAVE_PATH%\32-32\*.html %CSV_SAVE_PATH%
-
-          call conda deactivate
-
-      # TODO: create a action function here for different input
-      # 1024-128
-      - name: Prepare igpu perf test (1024-128)
-        shell: bash
-        run: |
-          sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml
-
-      - name: Test on igpu (1024-128)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.31.0
-
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
-          REM for llava
-          set TRANSFORMERS_OFFLINE=1
-
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\1024-128.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-
-          call conda deactivate
-
-      - name: Prepare igpu perf test for Mistral (1024-128)
-        shell: bash
-        run: |
-          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml
-
-      - name: Test on igpu for Mistral (1024-128)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.34.0
-
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
-
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-
-          call conda deactivate
-
-      - name: Prepare igpu perf test for Qwen 1.5 (1024-128)
-        shell: bash
-        run: |
-          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_437.yaml
-
-      - name: Test on igpu for Qwen 1.5 (1024-128)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.37.0
-
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
-
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\1024-128_437.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-
-          call conda deactivate
-
-      - name: Concat csv and generate html (1024-128)
-        shell: cmd
-        run: |
-          call conda activate html-gen
-
-          cd python\llm\dev\benchmark\all-in-one
-          python ..\..\..\test\benchmark\concat_csv.py
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-          del /q *test*.csv
-          move *.csv %CSV_SAVE_PATH%\1024-128\
-          cd ..\..\..\test\benchmark
-          python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128\
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-          move %CSV_SAVE_PATH%\1024-128\*.html %CSV_SAVE_PATH%
-
-          call conda deactivate
-
-      # 2048-256
-      - name: Prepare igpu perf test (2048-256)
-        shell: bash
-        run: |
-          sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256.yaml
-
-      - name: Test on igpu (2048-256)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.31.0
-
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
-          REM for llava
-          set TRANSFORMERS_OFFLINE=1
-
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\2048-256.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-
-          call conda deactivate
-
-      - name: Prepare igpu perf test for Mistral (2048-256)
-        shell: bash
-        run: |
-          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml
-
-      - name: Test on igpu for Mistral (2048-256)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.34.0
-
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
-
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-
-          call conda deactivate
-
-      - name: Prepare igpu perf test for Qwen 1.5 (2048-256)
-        shell: bash
-        run: |
-          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_437.yaml
-
-      - name: Test on igpu for Qwen 1.5 (2048-256)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.37.0
-
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
-
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\2048-256_437.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-
-          call conda deactivate
-
-      - name: Concat csv and generate html (2048-256)
-        shell: cmd
-        run: |
-          call conda activate html-gen
-
-          cd python\llm\dev\benchmark\all-in-one
-          python ..\..\..\test\benchmark\concat_csv.py
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-          del /q *test*.csv
-          move *.csv %CSV_SAVE_PATH%\2048-256\
-          cd ..\..\..\test\benchmark
-          python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256\
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-          move %CSV_SAVE_PATH%\2048-256\*.html %CSV_SAVE_PATH%
-
-          call conda deactivate
-
-      # load_low_bit 1024-128 
-      - name: Prepare igpu perf test (load_low_bit 1024-128)
-        shell: bash
-        run: |
-          sed -i 's/2048-256/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml
-
-      - name: Test on igpu (load_low_bit 1024-128)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.31.0
-
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
-          REM for llava
-          set TRANSFORMERS_OFFLINE=1
-
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-
-          call conda deactivate
-
-      - name: Prepare igpu perf test for Mistral (load_low_bit 1024-128)
-        shell: bash
-        run: |
-          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml
-
-      - name: Test on igpu for Mistral (load_low_bit 1024-128)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.34.0
-
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
-
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-
-          call conda deactivate
-
-      - name: Prepare igpu perf test for Qwen 1.5 (load_low_bit 1024-128)
-        shell: bash
-        run: |
-          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml
-
-      - name: Test on igpu for Qwen 1.5 (load_low_bit 1024-128)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.37.0
-
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
-
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_437.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-
-          call conda deactivate
-
-      - name: Concat csv and generate html (load_low_bit 1024-128)
-        shell: cmd
-        run: |
-          call conda activate html-gen
-
-          cd python\llm\dev\benchmark\all-in-one
-          python ..\..\..\test\benchmark\concat_csv.py
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-          del /q *test*.csv
-          move *.csv %CSV_SAVE_PATH%\1024-128_loadlowbit\
-          cd ..\..\..\test\benchmark
-          python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_loadlowbit\
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-          move %CSV_SAVE_PATH%\1024-128_loadlowbit\*.html %CSV_SAVE_PATH%
-
-          call conda deactivate
-
-      - name: Upload results to ftp
-        if: ${{ always() }}
-        shell: cmd
-        run: |
-          cd %CSV_SAVE_PATH%
-          IF "${{ github.event_name }}"=="schedule" (
-            for %%f in (*.html) do (
-                curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH%
-            )
-          )
-
-      # for test on machine when encountering error
-      # - name: Remove conda env
-      #   if: ${{ always() }}
-      #   shell: cmd
-      #   run: |
-      #     call conda env remove -n igpu-perf -y
+  #     # please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests
+  #     # - name: Download llm binary
+  #     #   uses: ./.github/actions/llm/download-llm-binary
+
+  #     # - name: Run LLM install (all) test
+  #     #   uses: ./.github/actions/llm/setup-llm-env
+
+  #     - name: Install BigDL-LLM from Pypi
+  #       shell: bash
+  #       run: |
+  #         pip install --pre --upgrade ipex-llm[all] -f https://developer.intel.com/ipex-whl-stable-xpu
+  #         # test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+  #         # if ! pip show bigdl-llm | grep $test_version_date; then
+  #         #   echo "Did not install bigdl-llm with excepted version $test_version_date"
+  #         #   exit 1
+  #         # fi
+
+  #     - name: Test on core ${{ matrix.platform }}
+  #       shell: bash
+  #       run: |
+  #         date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+  #         sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+
+  #         mv python/llm/test/benchmark/core-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+  #         cd python/llm/dev/benchmark/all-in-one
+  #         export http_proxy=${HTTP_PROXY}
+  #         export https_proxy=${HTTPS_PROXY}
+  #         # hide time info
+  #         sed -i 's/str(end - st)/"xxxxxx"/g' run.py
+  #         python run.py
+  #         cp ./*.csv $CSV_SAVE_PATH
+  #         cd ../../../test/benchmark
+  #         python -m pip install pandas==1.5.3
+  #         python csv_to_html.py -f $CSV_SAVE_PATH
+  #         cd ../../dev/benchmark/all-in-one/
+  #         if [ ${{ github.event.schedule}} ]; then
+  #           curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/core_${{ matrix.platform }}/
+  #         fi
+
+  # llm-performance-test-on-igpu:
+  #   # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
+  #   # needs: llm-cpp-build # please uncomment it for PR tests
+  #   strategy:
+  #     fail-fast: false
+  #     matrix:
+  #       include:
+  #         - os: windows
+  #           python-version: "3.9"
+  #   runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-igpu]
+  #   env:
+  #     ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+  #   steps:
+  #     - name: Set access token
+  #       run: |
+  #         echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
+  #     - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+  #       with:
+  #         repository: "analytics-zoo/bigdl-llm-internal"
+  #         ref: "ipex-llm-20240308"
+  #         token: ${{ env.github_access_token }}
+  #         submodules: "recursive"
+
+  #     # TODO: Put the bigdl-llm related install process for win gpu into a action function
+
+  #     # Please uncomment it and commment the install from pypi for PR tests
+  #     # - name: Download llm binary
+  #     #   uses: ./.github/actions/llm/download-llm-binary
+
+  #     # - name: Prepare for install bigdl-llm from source
+  #     #   shell: bash
+  #     #   run: |
+  #     #     sed -i 's/"bigdl-core-xe-21==" + VERSION + "/"bigdl-core-xe-21/g' python/llm/setup.py
+  #     #     sed -i 's/"bigdl-core-xe-21==" + VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py
+
+  #     # - name: Install bigdl-llm and other related packages (install from source)
+  #     #   shell: cmd
+  #     #   run: |
+  #     #     call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
+  #     #     call conda activate igpu-perf
+
+  #     #     pip install --upgrade pip
+  #     #     pip install --upgrade wheel
+  #     #     pip install --upgrade omegaconf pandas
+  #     #     pip install --upgrade tiktoken einops transformers_stream_generator
+
+  #     #     cd python\llm
+  #     #     python setup.py clean --all bdist_wheel --win
+  #     #     if not exist dist\bigdl_llm*.whl (exit /b 1)
+  #     #     for %%i in (dist\bigdl_llm*.whl) do set whl_name=%%i
+
+  #     #     pip install --pre --upgrade %whl_name%[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+  #     #     if %ERRORLEVEL% neq 0 (exit /b 1)
+  #     #     pip list
+
+  #     #     call conda deactivate
+
+  #     - name: Determine desired bigdl-llm version
+  #       shell: bash
+  #       run: |
+  #         test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+  #         echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV"
+
+  #     - name: Install bigdl-llm and other related packages (install from pypi)
+  #       shell: cmd
+  #       run: |
+  #         call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
+  #         call conda activate igpu-perf
+
+  #         pip install --upgrade pip
+  #         pip install --upgrade wheel
+  #         pip install --upgrade omegaconf pandas
+  #         pip install --upgrade tiktoken einops transformers_stream_generator
+
+  #         pip install --pre --upgrade ipex-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+  #         pip list
+
+  #         call conda deactivate
+
+  #     - name: Create env for html generation
+  #       shell: cmd
+  #       run: |
+  #         call conda create -n html-gen python=3.9 -y
+  #         call conda activate html-gen
+
+  #         pip install pandas==1.5.3
+  #         pip install Jinja2
+
+  #         call conda deactivate
+
+  #     - name: Set directory envs & and fix generated csv date name
+  #       shell: bash
+  #       run: |
+  #         if [ ${{ github.event_name }} == 'schedule' ]; then
+  #           echo "CSV_SAVE_PATH=${CSV_NIGHTLY_PATH}" >> "$GITHUB_ENV"
+  #         else
+  #           echo "CSV_SAVE_PATH=${CSV_PR_PATH}" >> "$GITHUB_ENV"
+  #         fi
+  #         date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+  #         echo "LOG_FILE=${date_for_test_version}_output.txt" >> "$GITHUB_ENV"
+
+  #         sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+
+  #     - name: Prepare igpu perf test (32-32)
+  #       shell: bash
+  #       run: |
+  #         # hide time info
+  #         # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32.yaml
+
+  #     - name: Test on igpu (32-32)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+  #         REM for llava
+  #         set TRANSFORMERS_OFFLINE=1
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\32-32.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+
+  #         call conda deactivate
+
+  #     - name: Prepare igpu perf test for Mistral (32-32)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml
+
+  #     - name: Test on igpu for Mistral (32-32)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.34.0
+
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+
+  #         call conda deactivate
+
+  #     - name: Prepare igpu perf test for Qwen1.5 (32-32)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_437.yaml
+
+  #     - name: Test on igpu for Qwen1.5 (32-32)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.37.0
+
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\32-32_437.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+
+  #         call conda deactivate
+
+  #     - name: Concat csv and generate html (32-32)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate html-gen
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         python ..\..\..\test\benchmark\concat_csv.py
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         del /q *test*.csv
+  #         move *.csv %CSV_SAVE_PATH%\32-32\
+  #         cd ..\..\..\test\benchmark
+  #         python csv_to_html.py -f %CSV_SAVE_PATH%\32-32\
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         move %CSV_SAVE_PATH%\32-32\*.html %CSV_SAVE_PATH%
+
+  #         call conda deactivate
+
+  #     # TODO: create a action function here for different input
+  #     # 1024-128
+  #     - name: Prepare igpu perf test (1024-128)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml
+
+  #     - name: Test on igpu (1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.31.0
+
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+  #         REM for llava
+  #         set TRANSFORMERS_OFFLINE=1
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\1024-128.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+
+  #         call conda deactivate
+
+  #     - name: Prepare igpu perf test for Mistral (1024-128)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml
+
+  #     - name: Test on igpu for Mistral (1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.34.0
+
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+
+  #         call conda deactivate
+
+  #     - name: Prepare igpu perf test for Qwen 1.5 (1024-128)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_437.yaml
+
+  #     - name: Test on igpu for Qwen 1.5 (1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.37.0
+
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_437.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+
+  #         call conda deactivate
+
+  #     - name: Concat csv and generate html (1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate html-gen
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         python ..\..\..\test\benchmark\concat_csv.py
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         del /q *test*.csv
+  #         move *.csv %CSV_SAVE_PATH%\1024-128\
+  #         cd ..\..\..\test\benchmark
+  #         python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128\
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         move %CSV_SAVE_PATH%\1024-128\*.html %CSV_SAVE_PATH%
+
+  #         call conda deactivate
+
+  #     # 2048-256
+  #     - name: Prepare igpu perf test (2048-256)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256.yaml
+
+  #     - name: Test on igpu (2048-256)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.31.0
+
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+  #         REM for llava
+  #         set TRANSFORMERS_OFFLINE=1
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\2048-256.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+
+  #         call conda deactivate
+
+  #     - name: Prepare igpu perf test for Mistral (2048-256)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml
+
+  #     - name: Test on igpu for Mistral (2048-256)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.34.0
+
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+
+  #         call conda deactivate
+
+  #     - name: Prepare igpu perf test for Qwen 1.5 (2048-256)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_437.yaml
+
+  #     - name: Test on igpu for Qwen 1.5 (2048-256)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.37.0
+
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\2048-256_437.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+
+  #         call conda deactivate
+
+  #     - name: Concat csv and generate html (2048-256)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate html-gen
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         python ..\..\..\test\benchmark\concat_csv.py
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         del /q *test*.csv
+  #         move *.csv %CSV_SAVE_PATH%\2048-256\
+  #         cd ..\..\..\test\benchmark
+  #         python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256\
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         move %CSV_SAVE_PATH%\2048-256\*.html %CSV_SAVE_PATH%
+
+  #         call conda deactivate
+
+  #     # load_low_bit 1024-128 
+  #     - name: Prepare igpu perf test (load_low_bit 1024-128)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/2048-256/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml
+
+  #     - name: Test on igpu (load_low_bit 1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.31.0
+
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+  #         REM for llava
+  #         set TRANSFORMERS_OFFLINE=1
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+
+  #         call conda deactivate
+
+  #     - name: Prepare igpu perf test for Mistral (load_low_bit 1024-128)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml
+
+  #     - name: Test on igpu for Mistral (load_low_bit 1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.34.0
+
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+
+  #         call conda deactivate
+
+  #     - name: Prepare igpu perf test for Qwen 1.5 (load_low_bit 1024-128)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml
+
+  #     - name: Test on igpu for Qwen 1.5 (load_low_bit 1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.37.0
+
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_437.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+
+  #         call conda deactivate
+
+  #     - name: Concat csv and generate html (load_low_bit 1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate html-gen
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         python ..\..\..\test\benchmark\concat_csv.py
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         del /q *test*.csv
+  #         move *.csv %CSV_SAVE_PATH%\1024-128_loadlowbit\
+  #         cd ..\..\..\test\benchmark
+  #         python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_loadlowbit\
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         move %CSV_SAVE_PATH%\1024-128_loadlowbit\*.html %CSV_SAVE_PATH%
+
+  #         call conda deactivate
+
+  #     - name: Upload results to ftp
+  #       if: ${{ always() }}
+  #       shell: cmd
+  #       run: |
+  #         cd %CSV_SAVE_PATH%
+  #         IF "${{ github.event_name }}"=="schedule" (
+  #           for %%f in (*.html) do (
+  #               curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH%
+  #           )
+  #         )
+
+  #     # for test on machine when encountering error
+  #     # - name: Remove conda env
+  #     #   if: ${{ always() }}
+  #     #   shell: cmd
+  #     #   run: |
+  #     #     call conda env remove -n igpu-perf -y

From 8385ae1f04ec2495132650c16bccea884583a3c3 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Thu, 14 Mar 2024 11:39:51 +0800
Subject: [PATCH 33/40] test arc transformers 4.31.0

---
 .../workflows/ipex_llm_performance_tests.yml  | 90 +++++++++----------
 .github/workflows/ipex_llm_unit_tests.yml     | 22 ++---
 2 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index 8d2213c8683..7577837ab95 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -95,51 +95,51 @@ jobs:
           source /opt/intel/oneapi/setvars.sh
           bash python/llm/test/run-llm-install-tests.sh
 
-      # - name: Test on xpu(transformers==4.31.0)
-      #   shell: bash
-      #   run: |
-      #     date_for_test_version=$(date -d yesterday +%Y-%m-%d)
-      #     sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
-
-      #     source /opt/intel/oneapi/setvars.sh
-      #     export USE_XETLA=OFF
-      #     export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-      #     cp python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
-      #     cd python/llm/dev/benchmark/all-in-one
-      #     # hide time info
-      #     sed -i 's/str(end - st)/"xxxxxx"/g' run.py
-      #     # change csv name
-      #     sed -i 's/{today}/{today}_test1/g' run.py
-      #     python run.py
-
-      - name: Test on xpu(transformers==4.34.0)
+      - name: Test on xpu(transformers==4.31.0)
         shell: bash
         run: |
-          source /opt/intel/oneapi/setvars.sh
-          export USE_XETLA=OFF
-          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-          # upgrade transformers for model Mistral-7B-v0.1
-          python -m pip install transformers==4.34.0
-          cp python/llm/test/benchmark/arc-perf-transformers-434.yaml python/llm/dev/benchmark/all-in-one/config.yaml
-          cd python/llm/dev/benchmark/all-in-one
-          # change csv name
-          sed -i 's/test1/test2/g' run.py
-          python run.py
+          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
 
-      - name: Test on xpu(transformers==4.37.0)
-        shell: bash
-        run: |
           source /opt/intel/oneapi/setvars.sh
           export USE_XETLA=OFF
           export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-          # upgrade transformers for model Qwen/Qwen1.5-7B-Chat
-          python -m pip install transformers==4.37.0
-          cp python/llm/test/benchmark/arc-perf-transformers-437.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cp python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
           cd python/llm/dev/benchmark/all-in-one
+          # hide time info
+          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
           # change csv name
-          sed -i 's/test2/test3/g' run.py
+          sed -i 's/{today}/{today}_test1/g' run.py
           python run.py
 
+      # - name: Test on xpu(transformers==4.34.0)
+      #   shell: bash
+      #   run: |
+      #     source /opt/intel/oneapi/setvars.sh
+      #     export USE_XETLA=OFF
+      #     export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+      #     # upgrade transformers for model Mistral-7B-v0.1
+      #     python -m pip install transformers==4.34.0
+      #     cp python/llm/test/benchmark/arc-perf-transformers-434.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+      #     cd python/llm/dev/benchmark/all-in-one
+      #     # change csv name
+      #     sed -i 's/test1/test2/g' run.py
+      #     python run.py
+
+      # - name: Test on xpu(transformers==4.37.0)
+      #   shell: bash
+      #   run: |
+      #     source /opt/intel/oneapi/setvars.sh
+      #     export USE_XETLA=OFF
+      #     export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+      #     # upgrade transformers for model Qwen/Qwen1.5-7B-Chat
+      #     python -m pip install transformers==4.37.0
+      #     cp python/llm/test/benchmark/arc-perf-transformers-437.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+      #     cd python/llm/dev/benchmark/all-in-one
+      #     # change csv name
+      #     sed -i 's/test2/test3/g' run.py
+      #     python run.py
+
       - name: Concat csv and generate html
         shell: bash
         run: |
@@ -154,16 +154,16 @@ jobs:
           cd ../../../test/benchmark
           python csv_to_html.py -f $CSV_SAVE_PATH
 
-      - name: Check and upload results to ftp
-        shell: bash
-        run: |
-          cd python/llm/dev/benchmark/all-in-one
-          python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml
-          python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml
-          find . -name "*test*.csv" -delete
-          if [ ${{ github.event.schedule}} ]; then
-            curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/
-          fi
+      # - name: Check and upload results to ftp
+      #   shell: bash
+      #   run: |
+      #     cd python/llm/dev/benchmark/all-in-one
+      #     python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml
+      #     python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml
+      #     find . -name "*test*.csv" -delete
+      #     if [ ${{ github.event.schedule}} ]; then
+      #       curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/
+      #     fi
           
   # llm-performance-test-on-spr:
   #   # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-spr' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index f26f9204463..11a645eea13 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -22,17 +22,17 @@ on:
       - ".github/actions/llm/cli-test-linux/action.yml"
       - ".github/actions/llm/cli-test-windows/action.yml"
       - ".github/actions/llm/download-llm-binary/action.yml"
-  pull_request:
-    branches: [main]
-    paths:
-      - "python/llm/**"
-      - ".github/workflows/llm_unit_tests.yml"
-      - ".github/workflows/llm-binary-build.yml"
-      - ".github/actions/llm/setup-llm-env/action.yml"
-      - ".github/actions/llm/remove-llm-env/action.yml"
-      - ".github/actions/llm/cli-test-linux/action.yml"
-      - ".github/actions/llm/cli-test-windows/action.yml"
-      - ".github/actions/llm/download-llm-binary/action.yml"
+  # pull_request:
+  #   branches: [main]
+  #   paths:
+  #     - "python/llm/**"
+  #     - ".github/workflows/llm_unit_tests.yml"
+  #     - ".github/workflows/llm-binary-build.yml"
+  #     - ".github/actions/llm/setup-llm-env/action.yml"
+  #     - ".github/actions/llm/remove-llm-env/action.yml"
+  #     - ".github/actions/llm/cli-test-linux/action.yml"
+  #     - ".github/actions/llm/cli-test-windows/action.yml"
+  #     - ".github/actions/llm/download-llm-binary/action.yml"
   workflow_dispatch:
   workflow_call:
 

From 34af5c419f245fe6e5edd5cd509a7e9bae6ab5c8 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Thu, 14 Mar 2024 11:40:19 +0800
Subject: [PATCH 34/40] update

---
 .github/workflows/ipex_llm_performance_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index 7577837ab95..d7018fa4362 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -48,7 +48,7 @@ jobs:
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308"
+          ref: "ipex-llm-20240308-test"
           token: ${{ env.github_access_token }}
           submodules: "recursive"
 

From f3d5047704dec1c22797e851979975f40c82f7ea Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Thu, 14 Mar 2024 12:09:08 +0800
Subject: [PATCH 35/40] update

---
 .../workflows/ipex_llm_performance_tests.yml  | 832 +++++++++---------
 1 file changed, 416 insertions(+), 416 deletions(-)

diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index d7018fa4362..522f4dd82e1 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -324,494 +324,494 @@ jobs:
   #           curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/core_${{ matrix.platform }}/
   #         fi
 
-  # llm-performance-test-on-igpu:
-  #   # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
-  #   # needs: llm-cpp-build # please uncomment it for PR tests
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       include:
-  #         - os: windows
-  #           python-version: "3.9"
-  #   runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-igpu]
-  #   env:
-  #     ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-  #   steps:
-  #     - name: Set access token
-  #       run: |
-  #         echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
-  #     - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
-  #       with:
-  #         repository: "analytics-zoo/bigdl-llm-internal"
-  #         ref: "ipex-llm-20240308"
-  #         token: ${{ env.github_access_token }}
-  #         submodules: "recursive"
+  llm-performance-test-on-igpu:
+    # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
+    # needs: llm-cpp-build # please uncomment it for PR tests
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: windows
+            python-version: "3.9"
+    runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-igpu]
+    env:
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+    steps:
+      - name: Set access token
+        run: |
+          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+        with:
+          repository: "analytics-zoo/bigdl-llm-internal"
+          ref: "ipex-llm-20240308"
+          token: ${{ env.github_access_token }}
+          submodules: "recursive"
 
-  #     # TODO: Put the bigdl-llm related install process for win gpu into a action function
+      # TODO: Put the bigdl-llm related install process for win gpu into a action function
 
-  #     # Please uncomment it and commment the install from pypi for PR tests
-  #     # - name: Download llm binary
-  #     #   uses: ./.github/actions/llm/download-llm-binary
+      # Please uncomment it and commment the install from pypi for PR tests
+      # - name: Download llm binary
+      #   uses: ./.github/actions/llm/download-llm-binary
 
-  #     # - name: Prepare for install bigdl-llm from source
-  #     #   shell: bash
-  #     #   run: |
-  #     #     sed -i 's/"bigdl-core-xe-21==" + VERSION + "/"bigdl-core-xe-21/g' python/llm/setup.py
-  #     #     sed -i 's/"bigdl-core-xe-21==" + VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py
+      # - name: Prepare for install bigdl-llm from source
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/"bigdl-core-xe-21==" + VERSION + "/"bigdl-core-xe-21/g' python/llm/setup.py
+      #     sed -i 's/"bigdl-core-xe-21==" + VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py
 
-  #     # - name: Install bigdl-llm and other related packages (install from source)
-  #     #   shell: cmd
-  #     #   run: |
-  #     #     call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
-  #     #     call conda activate igpu-perf
+      # - name: Install bigdl-llm and other related packages (install from source)
+      #   shell: cmd
+      #   run: |
+      #     call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
+      #     call conda activate igpu-perf
 
-  #     #     pip install --upgrade pip
-  #     #     pip install --upgrade wheel
-  #     #     pip install --upgrade omegaconf pandas
-  #     #     pip install --upgrade tiktoken einops transformers_stream_generator
+      #     pip install --upgrade pip
+      #     pip install --upgrade wheel
+      #     pip install --upgrade omegaconf pandas
+      #     pip install --upgrade tiktoken einops transformers_stream_generator
 
-  #     #     cd python\llm
-  #     #     python setup.py clean --all bdist_wheel --win
-  #     #     if not exist dist\bigdl_llm*.whl (exit /b 1)
-  #     #     for %%i in (dist\bigdl_llm*.whl) do set whl_name=%%i
+      #     cd python\llm
+      #     python setup.py clean --all bdist_wheel --win
+      #     if not exist dist\bigdl_llm*.whl (exit /b 1)
+      #     for %%i in (dist\bigdl_llm*.whl) do set whl_name=%%i
 
-  #     #     pip install --pre --upgrade %whl_name%[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
-  #     #     if %ERRORLEVEL% neq 0 (exit /b 1)
-  #     #     pip list
+      #     pip install --pre --upgrade %whl_name%[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     pip list
 
-  #     #     call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Determine desired bigdl-llm version
-  #       shell: bash
-  #       run: |
-  #         test_version_date=`date -d 'yesterday' '+%Y%m%d'`
-  #         echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV"
+      - name: Determine desired bigdl-llm version
+        shell: bash
+        run: |
+          test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+          echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV"
 
-  #     - name: Install bigdl-llm and other related packages (install from pypi)
-  #       shell: cmd
-  #       run: |
-  #         call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
-  #         call conda activate igpu-perf
+      - name: Install bigdl-llm and other related packages (install from pypi)
+        shell: cmd
+        run: |
+          call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
+          call conda activate igpu-perf
 
-  #         pip install --upgrade pip
-  #         pip install --upgrade wheel
-  #         pip install --upgrade omegaconf pandas
-  #         pip install --upgrade tiktoken einops transformers_stream_generator
+          pip install --upgrade pip
+          pip install --upgrade wheel
+          pip install --upgrade omegaconf pandas
+          pip install --upgrade tiktoken einops transformers_stream_generator
 
-  #         pip install --pre --upgrade ipex-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
-  #         pip list
+          pip install --pre --upgrade ipex-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+          pip list
 
-  #         call conda deactivate
+          call conda deactivate
 
-  #     - name: Create env for html generation
-  #       shell: cmd
-  #       run: |
-  #         call conda create -n html-gen python=3.9 -y
-  #         call conda activate html-gen
+      - name: Create env for html generation
+        shell: cmd
+        run: |
+          call conda create -n html-gen python=3.9 -y
+          call conda activate html-gen
 
-  #         pip install pandas==1.5.3
-  #         pip install Jinja2
+          pip install pandas==1.5.3
+          pip install Jinja2
 
-  #         call conda deactivate
+          call conda deactivate
 
-  #     - name: Set directory envs & and fix generated csv date name
-  #       shell: bash
-  #       run: |
-  #         if [ ${{ github.event_name }} == 'schedule' ]; then
-  #           echo "CSV_SAVE_PATH=${CSV_NIGHTLY_PATH}" >> "$GITHUB_ENV"
-  #         else
-  #           echo "CSV_SAVE_PATH=${CSV_PR_PATH}" >> "$GITHUB_ENV"
-  #         fi
-  #         date_for_test_version=$(date -d yesterday +%Y-%m-%d)
-  #         echo "LOG_FILE=${date_for_test_version}_output.txt" >> "$GITHUB_ENV"
+      - name: Set directory envs & and fix generated csv date name
+        shell: bash
+        run: |
+          if [ ${{ github.event_name }} == 'schedule' ]; then
+            echo "CSV_SAVE_PATH=${CSV_NIGHTLY_PATH}" >> "$GITHUB_ENV"
+          else
+            echo "CSV_SAVE_PATH=${CSV_PR_PATH}" >> "$GITHUB_ENV"
+          fi
+          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+          echo "LOG_FILE=${date_for_test_version}_output.txt" >> "$GITHUB_ENV"
 
-  #         sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
 
-  #     - name: Prepare igpu perf test (32-32)
-  #       shell: bash
-  #       run: |
-  #         # hide time info
-  #         # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32.yaml
+      # - name: Prepare igpu perf test (32-32)
+      #   shell: bash
+      #   run: |
+      #     # hide time info
+      #     # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32.yaml
 
-  #     - name: Test on igpu (32-32)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
-  #         REM for llava
-  #         set TRANSFORMERS_OFFLINE=1
-
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\32-32.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-
-  #         call conda deactivate
-
-  #     - name: Prepare igpu perf test for Mistral (32-32)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml
+      # - name: Test on igpu (32-32)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
+      #     REM for llava
+      #     set TRANSFORMERS_OFFLINE=1
+
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\32-32.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+
+      #     call conda deactivate
+
+      # - name: Prepare igpu perf test for Mistral (32-32)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml
 
-  #     - name: Test on igpu for Mistral (32-32)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.34.0
+      # - name: Test on igpu for Mistral (32-32)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.34.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Prepare igpu perf test for Qwen1.5 (32-32)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_437.yaml
+      # - name: Prepare igpu perf test for Qwen1.5 (32-32)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_437.yaml
 
-  #     - name: Test on igpu for Qwen1.5 (32-32)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.37.0
+      # - name: Test on igpu for Qwen1.5 (32-32)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.37.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\32-32_437.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\32-32_437.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Concat csv and generate html (32-32)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate html-gen
-
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         python ..\..\..\test\benchmark\concat_csv.py
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         del /q *test*.csv
-  #         move *.csv %CSV_SAVE_PATH%\32-32\
-  #         cd ..\..\..\test\benchmark
-  #         python csv_to_html.py -f %CSV_SAVE_PATH%\32-32\
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         move %CSV_SAVE_PATH%\32-32\*.html %CSV_SAVE_PATH%
-
-  #         call conda deactivate
-
-  #     # TODO: create a action function here for different input
-  #     # 1024-128
-  #     - name: Prepare igpu perf test (1024-128)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml
+      # - name: Concat csv and generate html (32-32)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate html-gen
+
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     python ..\..\..\test\benchmark\concat_csv.py
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     del /q *test*.csv
+      #     move *.csv %CSV_SAVE_PATH%\32-32\
+      #     cd ..\..\..\test\benchmark
+      #     python csv_to_html.py -f %CSV_SAVE_PATH%\32-32\
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     move %CSV_SAVE_PATH%\32-32\*.html %CSV_SAVE_PATH%
+
+      #     call conda deactivate
+
+      # # TODO: create a action function here for different input
+      # # 1024-128
+      # - name: Prepare igpu perf test (1024-128)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml
 
-  #     - name: Test on igpu (1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.31.0
+      # - name: Test on igpu (1024-128)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.31.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
-  #         REM for llava
-  #         set TRANSFORMERS_OFFLINE=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
+      #     REM for llava
+      #     set TRANSFORMERS_OFFLINE=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\1024-128.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\1024-128.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Prepare igpu perf test for Mistral (1024-128)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml
+      # - name: Prepare igpu perf test for Mistral (1024-128)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml
 
-  #     - name: Test on igpu for Mistral (1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.34.0
+      # - name: Test on igpu for Mistral (1024-128)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.34.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Prepare igpu perf test for Qwen 1.5 (1024-128)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_437.yaml
+      # - name: Prepare igpu perf test for Qwen 1.5 (1024-128)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_437.yaml
 
-  #     - name: Test on igpu for Qwen 1.5 (1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.37.0
+      # - name: Test on igpu for Qwen 1.5 (1024-128)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.37.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_437.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\1024-128_437.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Concat csv and generate html (1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate html-gen
-
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         python ..\..\..\test\benchmark\concat_csv.py
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         del /q *test*.csv
-  #         move *.csv %CSV_SAVE_PATH%\1024-128\
-  #         cd ..\..\..\test\benchmark
-  #         python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128\
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         move %CSV_SAVE_PATH%\1024-128\*.html %CSV_SAVE_PATH%
-
-  #         call conda deactivate
-
-  #     # 2048-256
-  #     - name: Prepare igpu perf test (2048-256)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256.yaml
+      # - name: Concat csv and generate html (1024-128)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate html-gen
+
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     python ..\..\..\test\benchmark\concat_csv.py
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     del /q *test*.csv
+      #     move *.csv %CSV_SAVE_PATH%\1024-128\
+      #     cd ..\..\..\test\benchmark
+      #     python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128\
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     move %CSV_SAVE_PATH%\1024-128\*.html %CSV_SAVE_PATH%
+
+      #     call conda deactivate
+
+      # # 2048-256
+      # - name: Prepare igpu perf test (2048-256)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256.yaml
 
-  #     - name: Test on igpu (2048-256)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.31.0
+      # - name: Test on igpu (2048-256)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.31.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
-  #         REM for llava
-  #         set TRANSFORMERS_OFFLINE=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
+      #     REM for llava
+      #     set TRANSFORMERS_OFFLINE=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\2048-256.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\2048-256.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Prepare igpu perf test for Mistral (2048-256)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml
+      - name: Prepare igpu perf test for Mistral (2048-256)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml
 
-  #     - name: Test on igpu for Mistral (2048-256)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.34.0
+      - name: Test on igpu for Mistral (2048-256)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.34.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+          call conda deactivate
 
-  #     - name: Prepare igpu perf test for Qwen 1.5 (2048-256)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_437.yaml
+      - name: Prepare igpu perf test for Qwen 1.5 (2048-256)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_437.yaml
 
-  #     - name: Test on igpu for Qwen 1.5 (2048-256)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.37.0
+      - name: Test on igpu for Qwen 1.5 (2048-256)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.37.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\2048-256_437.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\2048-256_437.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+          call conda deactivate
 
-  #     - name: Concat csv and generate html (2048-256)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate html-gen
-
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         python ..\..\..\test\benchmark\concat_csv.py
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         del /q *test*.csv
-  #         move *.csv %CSV_SAVE_PATH%\2048-256\
-  #         cd ..\..\..\test\benchmark
-  #         python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256\
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         move %CSV_SAVE_PATH%\2048-256\*.html %CSV_SAVE_PATH%
-
-  #         call conda deactivate
-
-  #     # load_low_bit 1024-128 
-  #     - name: Prepare igpu perf test (load_low_bit 1024-128)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/2048-256/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml
+      - name: Concat csv and generate html (2048-256)
+        shell: cmd
+        run: |
+          call conda activate html-gen
+
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\2048-256\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\2048-256\*.html %CSV_SAVE_PATH%
+
+          call conda deactivate
+
+      # load_low_bit 1024-128 
+      - name: Prepare igpu perf test (load_low_bit 1024-128)
+        shell: bash
+        run: |
+          sed -i 's/2048-256/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml
 
-  #     - name: Test on igpu (load_low_bit 1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.31.0
+      - name: Test on igpu (load_low_bit 1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.31.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
-  #         REM for llava
-  #         set TRANSFORMERS_OFFLINE=1
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+          call conda deactivate
 
-  #     - name: Prepare igpu perf test for Mistral (load_low_bit 1024-128)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml
+      - name: Prepare igpu perf test for Mistral (load_low_bit 1024-128)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml
 
-  #     - name: Test on igpu for Mistral (load_low_bit 1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.34.0
+      - name: Test on igpu for Mistral (load_low_bit 1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.34.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+          call conda deactivate
 
-  #     - name: Prepare igpu perf test for Qwen 1.5 (load_low_bit 1024-128)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml
+      - name: Prepare igpu perf test for Qwen 1.5 (load_low_bit 1024-128)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml
 
-  #     - name: Test on igpu for Qwen 1.5 (load_low_bit 1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.37.0
+      - name: Test on igpu for Qwen 1.5 (load_low_bit 1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.37.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_437.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_437.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+          call conda deactivate
 
-  #     - name: Concat csv and generate html (load_low_bit 1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate html-gen
-
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         python ..\..\..\test\benchmark\concat_csv.py
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         del /q *test*.csv
-  #         move *.csv %CSV_SAVE_PATH%\1024-128_loadlowbit\
-  #         cd ..\..\..\test\benchmark
-  #         python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_loadlowbit\
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         move %CSV_SAVE_PATH%\1024-128_loadlowbit\*.html %CSV_SAVE_PATH%
-
-  #         call conda deactivate
-
-  #     - name: Upload results to ftp
-  #       if: ${{ always() }}
-  #       shell: cmd
-  #       run: |
-  #         cd %CSV_SAVE_PATH%
-  #         IF "${{ github.event_name }}"=="schedule" (
-  #           for %%f in (*.html) do (
-  #               curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH%
-  #           )
-  #         )
-
-  #     # for test on machine when encountering error
-  #     # - name: Remove conda env
-  #     #   if: ${{ always() }}
-  #     #   shell: cmd
-  #     #   run: |
-  #     #     call conda env remove -n igpu-perf -y
+      - name: Concat csv and generate html (load_low_bit 1024-128)
+        shell: cmd
+        run: |
+          call conda activate html-gen
+
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\1024-128_loadlowbit\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_loadlowbit\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\1024-128_loadlowbit\*.html %CSV_SAVE_PATH%
+
+          call conda deactivate
+
+      - name: Upload results to ftp
+        if: ${{ always() }}
+        shell: cmd
+        run: |
+          cd %CSV_SAVE_PATH%
+          IF "${{ github.event_name }}"=="schedule" (
+            for %%f in (*.html) do (
+                curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH%
+            )
+          )
+
+      # for test on machine when encountering error
+      # - name: Remove conda env
+      #   if: ${{ always() }}
+      #   shell: cmd
+      #   run: |
+      #     call conda env remove -n igpu-perf -y

From a635fe92b3b307de19b6a72b340ce7149ccd281b Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Thu, 14 Mar 2024 14:13:26 +0800
Subject: [PATCH 36/40] retrigger

---
 .../workflows/ipex_llm_performance_tests.yml  | 886 +++++++++---------
 1 file changed, 443 insertions(+), 443 deletions(-)

diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index 522f4dd82e1..1b6a487da50 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -48,7 +48,7 @@ jobs:
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308-test"
+          ref: "ipex-llm-20240308"
           token: ${{ env.github_access_token }}
           submodules: "recursive"
 
@@ -112,33 +112,33 @@ jobs:
           sed -i 's/{today}/{today}_test1/g' run.py
           python run.py
 
-      # - name: Test on xpu(transformers==4.34.0)
-      #   shell: bash
-      #   run: |
-      #     source /opt/intel/oneapi/setvars.sh
-      #     export USE_XETLA=OFF
-      #     export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-      #     # upgrade transformers for model Mistral-7B-v0.1
-      #     python -m pip install transformers==4.34.0
-      #     cp python/llm/test/benchmark/arc-perf-transformers-434.yaml python/llm/dev/benchmark/all-in-one/config.yaml
-      #     cd python/llm/dev/benchmark/all-in-one
-      #     # change csv name
-      #     sed -i 's/test1/test2/g' run.py
-      #     python run.py
+      - name: Test on xpu(transformers==4.34.0)
+        shell: bash
+        run: |
+          source /opt/intel/oneapi/setvars.sh
+          export USE_XETLA=OFF
+          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          # upgrade transformers for model Mistral-7B-v0.1
+          python -m pip install transformers==4.34.0
+          cp python/llm/test/benchmark/arc-perf-transformers-434.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cd python/llm/dev/benchmark/all-in-one
+          # change csv name
+          sed -i 's/test1/test2/g' run.py
+          python run.py
 
-      # - name: Test on xpu(transformers==4.37.0)
-      #   shell: bash
-      #   run: |
-      #     source /opt/intel/oneapi/setvars.sh
-      #     export USE_XETLA=OFF
-      #     export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-      #     # upgrade transformers for model Qwen/Qwen1.5-7B-Chat
-      #     python -m pip install transformers==4.37.0
-      #     cp python/llm/test/benchmark/arc-perf-transformers-437.yaml python/llm/dev/benchmark/all-in-one/config.yaml
-      #     cd python/llm/dev/benchmark/all-in-one
-      #     # change csv name
-      #     sed -i 's/test2/test3/g' run.py
-      #     python run.py
+      - name: Test on xpu(transformers==4.37.0)
+        shell: bash
+        run: |
+          source /opt/intel/oneapi/setvars.sh
+          export USE_XETLA=OFF
+          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          # upgrade transformers for model Qwen/Qwen1.5-7B-Chat
+          python -m pip install transformers==4.37.0
+          cp python/llm/test/benchmark/arc-perf-transformers-437.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cd python/llm/dev/benchmark/all-in-one
+          # change csv name
+          sed -i 's/test2/test3/g' run.py
+          python run.py
 
       - name: Concat csv and generate html
         shell: bash
@@ -324,494 +324,494 @@ jobs:
   #           curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/core_${{ matrix.platform }}/
   #         fi
 
-  llm-performance-test-on-igpu:
-    # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
-    # needs: llm-cpp-build # please uncomment it for PR tests
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - os: windows
-            python-version: "3.9"
-    runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-igpu]
-    env:
-      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-    steps:
-      - name: Set access token
-        run: |
-          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
-      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
-        with:
-          repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308"
-          token: ${{ env.github_access_token }}
-          submodules: "recursive"
+  # llm-performance-test-on-igpu:
+  #   # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
+  #   # needs: llm-cpp-build # please uncomment it for PR tests
+  #   strategy:
+  #     fail-fast: false
+  #     matrix:
+  #       include:
+  #         - os: windows
+  #           python-version: "3.9"
+  #   runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-igpu]
+  #   env:
+  #     ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+  #   steps:
+  #     - name: Set access token
+  #       run: |
+  #         echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
+  #     - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+  #       with:
+  #         repository: "analytics-zoo/bigdl-llm-internal"
+  #         ref: "ipex-llm-20240308"
+  #         token: ${{ env.github_access_token }}
+  #         submodules: "recursive"
 
-      # TODO: Put the bigdl-llm related install process for win gpu into a action function
+  #     # TODO: Put the bigdl-llm related install process for win gpu into a action function
 
-      # Please uncomment it and commment the install from pypi for PR tests
-      # - name: Download llm binary
-      #   uses: ./.github/actions/llm/download-llm-binary
+  #     # Please uncomment it and commment the install from pypi for PR tests
+  #     # - name: Download llm binary
+  #     #   uses: ./.github/actions/llm/download-llm-binary
 
-      # - name: Prepare for install bigdl-llm from source
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/"bigdl-core-xe-21==" + VERSION + "/"bigdl-core-xe-21/g' python/llm/setup.py
-      #     sed -i 's/"bigdl-core-xe-21==" + VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py
+  #     # - name: Prepare for install bigdl-llm from source
+  #     #   shell: bash
+  #     #   run: |
+  #     #     sed -i 's/"bigdl-core-xe-21==" + VERSION + "/"bigdl-core-xe-21/g' python/llm/setup.py
+  #     #     sed -i 's/"bigdl-core-xe-21==" + VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py
 
-      # - name: Install bigdl-llm and other related packages (install from source)
-      #   shell: cmd
-      #   run: |
-      #     call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
-      #     call conda activate igpu-perf
+  #     # - name: Install bigdl-llm and other related packages (install from source)
+  #     #   shell: cmd
+  #     #   run: |
+  #     #     call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
+  #     #     call conda activate igpu-perf
 
-      #     pip install --upgrade pip
-      #     pip install --upgrade wheel
-      #     pip install --upgrade omegaconf pandas
-      #     pip install --upgrade tiktoken einops transformers_stream_generator
+  #     #     pip install --upgrade pip
+  #     #     pip install --upgrade wheel
+  #     #     pip install --upgrade omegaconf pandas
+  #     #     pip install --upgrade tiktoken einops transformers_stream_generator
 
-      #     cd python\llm
-      #     python setup.py clean --all bdist_wheel --win
-      #     if not exist dist\bigdl_llm*.whl (exit /b 1)
-      #     for %%i in (dist\bigdl_llm*.whl) do set whl_name=%%i
+  #     #     cd python\llm
+  #     #     python setup.py clean --all bdist_wheel --win
+  #     #     if not exist dist\bigdl_llm*.whl (exit /b 1)
+  #     #     for %%i in (dist\bigdl_llm*.whl) do set whl_name=%%i
 
-      #     pip install --pre --upgrade %whl_name%[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
-      #     pip list
+  #     #     pip install --pre --upgrade %whl_name%[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+  #     #     if %ERRORLEVEL% neq 0 (exit /b 1)
+  #     #     pip list
 
-      #     call conda deactivate
+  #     #     call conda deactivate
 
-      - name: Determine desired bigdl-llm version
-        shell: bash
-        run: |
-          test_version_date=`date -d 'yesterday' '+%Y%m%d'`
-          echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV"
+  #     - name: Determine desired bigdl-llm version
+  #       shell: bash
+  #       run: |
+  #         test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+  #         echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV"
 
-      - name: Install bigdl-llm and other related packages (install from pypi)
-        shell: cmd
-        run: |
-          call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
-          call conda activate igpu-perf
+  #     - name: Install bigdl-llm and other related packages (install from pypi)
+  #       shell: cmd
+  #       run: |
+  #         call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
+  #         call conda activate igpu-perf
 
-          pip install --upgrade pip
-          pip install --upgrade wheel
-          pip install --upgrade omegaconf pandas
-          pip install --upgrade tiktoken einops transformers_stream_generator
+  #         pip install --upgrade pip
+  #         pip install --upgrade wheel
+  #         pip install --upgrade omegaconf pandas
+  #         pip install --upgrade tiktoken einops transformers_stream_generator
 
-          pip install --pre --upgrade ipex-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
-          pip list
+  #         pip install --pre --upgrade ipex-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+  #         pip list
 
-          call conda deactivate
+  #         call conda deactivate
 
-      - name: Create env for html generation
-        shell: cmd
-        run: |
-          call conda create -n html-gen python=3.9 -y
-          call conda activate html-gen
+  #     - name: Create env for html generation
+  #       shell: cmd
+  #       run: |
+  #         call conda create -n html-gen python=3.9 -y
+  #         call conda activate html-gen
 
-          pip install pandas==1.5.3
-          pip install Jinja2
+  #         pip install pandas==1.5.3
+  #         pip install Jinja2
 
-          call conda deactivate
+  #         call conda deactivate
 
-      - name: Set directory envs & and fix generated csv date name
-        shell: bash
-        run: |
-          if [ ${{ github.event_name }} == 'schedule' ]; then
-            echo "CSV_SAVE_PATH=${CSV_NIGHTLY_PATH}" >> "$GITHUB_ENV"
-          else
-            echo "CSV_SAVE_PATH=${CSV_PR_PATH}" >> "$GITHUB_ENV"
-          fi
-          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
-          echo "LOG_FILE=${date_for_test_version}_output.txt" >> "$GITHUB_ENV"
+  #     - name: Set directory envs & and fix generated csv date name
+  #       shell: bash
+  #       run: |
+  #         if [ ${{ github.event_name }} == 'schedule' ]; then
+  #           echo "CSV_SAVE_PATH=${CSV_NIGHTLY_PATH}" >> "$GITHUB_ENV"
+  #         else
+  #           echo "CSV_SAVE_PATH=${CSV_PR_PATH}" >> "$GITHUB_ENV"
+  #         fi
+  #         date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+  #         echo "LOG_FILE=${date_for_test_version}_output.txt" >> "$GITHUB_ENV"
 
-          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
 
-      # - name: Prepare igpu perf test (32-32)
-      #   shell: bash
-      #   run: |
-      #     # hide time info
-      #     # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32.yaml
+  #     - name: Prepare igpu perf test (32-32)
+  #       shell: bash
+  #       run: |
+  #         # hide time info
+  #         # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32.yaml
 
-      # - name: Test on igpu (32-32)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
-      #     REM for llava
-      #     set TRANSFORMERS_OFFLINE=1
-
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\32-32.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
-
-      #     call conda deactivate
-
-      # - name: Prepare igpu perf test for Mistral (32-32)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml
+  #     - name: Test on igpu (32-32)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+  #         REM for llava
+  #         set TRANSFORMERS_OFFLINE=1
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\32-32.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+
+  #         call conda deactivate
+
+  #     - name: Prepare igpu perf test for Mistral (32-32)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml
 
-      # - name: Test on igpu for Mistral (32-32)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.34.0
+  #     - name: Test on igpu for Mistral (32-32)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.34.0
 
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      #     call conda deactivate
+  #         call conda deactivate
 
-      # - name: Prepare igpu perf test for Qwen1.5 (32-32)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_437.yaml
+  #     - name: Prepare igpu perf test for Qwen1.5 (32-32)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_437.yaml
 
-      # - name: Test on igpu for Qwen1.5 (32-32)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.37.0
+  #     - name: Test on igpu for Qwen1.5 (32-32)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.37.0
 
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\32-32_437.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\32-32_437.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      #     call conda deactivate
+  #         call conda deactivate
 
-      # - name: Concat csv and generate html (32-32)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate html-gen
-
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     python ..\..\..\test\benchmark\concat_csv.py
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
-      #     del /q *test*.csv
-      #     move *.csv %CSV_SAVE_PATH%\32-32\
-      #     cd ..\..\..\test\benchmark
-      #     python csv_to_html.py -f %CSV_SAVE_PATH%\32-32\
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
-      #     move %CSV_SAVE_PATH%\32-32\*.html %CSV_SAVE_PATH%
-
-      #     call conda deactivate
-
-      # # TODO: create a action function here for different input
-      # # 1024-128
-      # - name: Prepare igpu perf test (1024-128)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml
+  #     - name: Concat csv and generate html (32-32)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate html-gen
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         python ..\..\..\test\benchmark\concat_csv.py
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         del /q *test*.csv
+  #         move *.csv %CSV_SAVE_PATH%\32-32\
+  #         cd ..\..\..\test\benchmark
+  #         python csv_to_html.py -f %CSV_SAVE_PATH%\32-32\
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         move %CSV_SAVE_PATH%\32-32\*.html %CSV_SAVE_PATH%
+
+  #         call conda deactivate
+
+  #     # TODO: create a action function here for different input
+  #     # 1024-128
+  #     - name: Prepare igpu perf test (1024-128)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml
 
-      # - name: Test on igpu (1024-128)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.31.0
+  #     - name: Test on igpu (1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.31.0
 
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
-      #     REM for llava
-      #     set TRANSFORMERS_OFFLINE=1
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+  #         REM for llava
+  #         set TRANSFORMERS_OFFLINE=1
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\1024-128.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\1024-128.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      #     call conda deactivate
+  #         call conda deactivate
 
-      # - name: Prepare igpu perf test for Mistral (1024-128)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml
+  #     - name: Prepare igpu perf test for Mistral (1024-128)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml
 
-      # - name: Test on igpu for Mistral (1024-128)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.34.0
+  #     - name: Test on igpu for Mistral (1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.34.0
 
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      #     call conda deactivate
+  #         call conda deactivate
 
-      # - name: Prepare igpu perf test for Qwen 1.5 (1024-128)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_437.yaml
+  #     - name: Prepare igpu perf test for Qwen 1.5 (1024-128)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_437.yaml
 
-      # - name: Test on igpu for Qwen 1.5 (1024-128)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.37.0
+  #     - name: Test on igpu for Qwen 1.5 (1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.37.0
 
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\1024-128_437.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_437.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      #     call conda deactivate
+  #         call conda deactivate
 
-      # - name: Concat csv and generate html (1024-128)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate html-gen
-
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     python ..\..\..\test\benchmark\concat_csv.py
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
-      #     del /q *test*.csv
-      #     move *.csv %CSV_SAVE_PATH%\1024-128\
-      #     cd ..\..\..\test\benchmark
-      #     python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128\
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
-      #     move %CSV_SAVE_PATH%\1024-128\*.html %CSV_SAVE_PATH%
-
-      #     call conda deactivate
-
-      # # 2048-256
-      # - name: Prepare igpu perf test (2048-256)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256.yaml
+  #     - name: Concat csv and generate html (1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate html-gen
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         python ..\..\..\test\benchmark\concat_csv.py
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         del /q *test*.csv
+  #         move *.csv %CSV_SAVE_PATH%\1024-128\
+  #         cd ..\..\..\test\benchmark
+  #         python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128\
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         move %CSV_SAVE_PATH%\1024-128\*.html %CSV_SAVE_PATH%
+
+  #         call conda deactivate
+
+  #     # 2048-256
+  #     - name: Prepare igpu perf test (2048-256)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256.yaml
 
-      # - name: Test on igpu (2048-256)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.31.0
+  #     - name: Test on igpu (2048-256)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.31.0
 
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
-      #     REM for llava
-      #     set TRANSFORMERS_OFFLINE=1
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+  #         REM for llava
+  #         set TRANSFORMERS_OFFLINE=1
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\2048-256.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\2048-256.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      #     call conda deactivate
+  #         call conda deactivate
 
-      - name: Prepare igpu perf test for Mistral (2048-256)
-        shell: bash
-        run: |
-          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml
+  #     - name: Prepare igpu perf test for Mistral (2048-256)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml
 
-      - name: Test on igpu for Mistral (2048-256)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.34.0
+  #     - name: Test on igpu for Mistral (2048-256)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.34.0
 
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
 
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
 
-          call conda deactivate
+  #         call conda deactivate
 
-      - name: Prepare igpu perf test for Qwen 1.5 (2048-256)
-        shell: bash
-        run: |
-          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_437.yaml
+  #     - name: Prepare igpu perf test for Qwen 1.5 (2048-256)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_437.yaml
 
-      - name: Test on igpu for Qwen 1.5 (2048-256)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.37.0
+  #     - name: Test on igpu for Qwen 1.5 (2048-256)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.37.0
 
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
 
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\2048-256_437.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\2048-256_437.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
 
-          call conda deactivate
+  #         call conda deactivate
 
-      - name: Concat csv and generate html (2048-256)
-        shell: cmd
-        run: |
-          call conda activate html-gen
-
-          cd python\llm\dev\benchmark\all-in-one
-          python ..\..\..\test\benchmark\concat_csv.py
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-          del /q *test*.csv
-          move *.csv %CSV_SAVE_PATH%\2048-256\
-          cd ..\..\..\test\benchmark
-          python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256\
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-          move %CSV_SAVE_PATH%\2048-256\*.html %CSV_SAVE_PATH%
-
-          call conda deactivate
-
-      # load_low_bit 1024-128 
-      - name: Prepare igpu perf test (load_low_bit 1024-128)
-        shell: bash
-        run: |
-          sed -i 's/2048-256/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml
+  #     - name: Concat csv and generate html (2048-256)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate html-gen
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         python ..\..\..\test\benchmark\concat_csv.py
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         del /q *test*.csv
+  #         move *.csv %CSV_SAVE_PATH%\2048-256\
+  #         cd ..\..\..\test\benchmark
+  #         python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256\
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         move %CSV_SAVE_PATH%\2048-256\*.html %CSV_SAVE_PATH%
+
+  #         call conda deactivate
+
+  #     # load_low_bit 1024-128 
+  #     - name: Prepare igpu perf test (load_low_bit 1024-128)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/2048-256/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml
 
-      - name: Test on igpu (load_low_bit 1024-128)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.31.0
+  #     - name: Test on igpu (load_low_bit 1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.31.0
 
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
-          REM for llava
-          set TRANSFORMERS_OFFLINE=1
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
+  #         REM for llava
+  #         set TRANSFORMERS_OFFLINE=1
 
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
 
-          call conda deactivate
+  #         call conda deactivate
 
-      - name: Prepare igpu perf test for Mistral (load_low_bit 1024-128)
-        shell: bash
-        run: |
-          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml
+  #     - name: Prepare igpu perf test for Mistral (load_low_bit 1024-128)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml
 
-      - name: Test on igpu for Mistral (load_low_bit 1024-128)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.34.0
+  #     - name: Test on igpu for Mistral (load_low_bit 1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.34.0
 
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
 
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
 
-          call conda deactivate
+  #         call conda deactivate
 
-      - name: Prepare igpu perf test for Qwen 1.5 (load_low_bit 1024-128)
-        shell: bash
-        run: |
-          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml
+  #     - name: Prepare igpu perf test for Qwen 1.5 (load_low_bit 1024-128)
+  #       shell: bash
+  #       run: |
+  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml
 
-      - name: Test on igpu for Qwen 1.5 (load_low_bit 1024-128)
-        shell: cmd
-        run: |
-          call conda activate igpu-perf
-          pip install transformers==4.37.0
+  #     - name: Test on igpu for Qwen 1.5 (load_low_bit 1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate igpu-perf
+  #         pip install transformers==4.37.0
 
-          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+  #         set SYCL_CACHE_PERSISTENT=1
+  #         set BIGDL_LLM_XMX_DISABLED=1
 
-          cd python\llm\dev\benchmark\all-in-one
-          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_437.yaml config.yaml
-          set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_437.yaml config.yaml
+  #         set PYTHONIOENCODING=utf-8
+  #         python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
 
-          call conda deactivate
+  #         call conda deactivate
 
-      - name: Concat csv and generate html (load_low_bit 1024-128)
-        shell: cmd
-        run: |
-          call conda activate html-gen
-
-          cd python\llm\dev\benchmark\all-in-one
-          python ..\..\..\test\benchmark\concat_csv.py
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-          del /q *test*.csv
-          move *.csv %CSV_SAVE_PATH%\1024-128_loadlowbit\
-          cd ..\..\..\test\benchmark
-          python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_loadlowbit\
-          if %ERRORLEVEL% neq 0 (exit /b 1)
-          move %CSV_SAVE_PATH%\1024-128_loadlowbit\*.html %CSV_SAVE_PATH%
-
-          call conda deactivate
-
-      - name: Upload results to ftp
-        if: ${{ always() }}
-        shell: cmd
-        run: |
-          cd %CSV_SAVE_PATH%
-          IF "${{ github.event_name }}"=="schedule" (
-            for %%f in (*.html) do (
-                curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH%
-            )
-          )
-
-      # for test on machine when encountering error
-      # - name: Remove conda env
-      #   if: ${{ always() }}
-      #   shell: cmd
-      #   run: |
-      #     call conda env remove -n igpu-perf -y
+  #     - name: Concat csv and generate html (load_low_bit 1024-128)
+  #       shell: cmd
+  #       run: |
+  #         call conda activate html-gen
+
+  #         cd python\llm\dev\benchmark\all-in-one
+  #         python ..\..\..\test\benchmark\concat_csv.py
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         del /q *test*.csv
+  #         move *.csv %CSV_SAVE_PATH%\1024-128_loadlowbit\
+  #         cd ..\..\..\test\benchmark
+  #         python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_loadlowbit\
+  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+  #         move %CSV_SAVE_PATH%\1024-128_loadlowbit\*.html %CSV_SAVE_PATH%
+
+  #         call conda deactivate
+
+  #     - name: Upload results to ftp
+  #       if: ${{ always() }}
+  #       shell: cmd
+  #       run: |
+  #         cd %CSV_SAVE_PATH%
+  #         IF "${{ github.event_name }}"=="schedule" (
+  #           for %%f in (*.html) do (
+  #               curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH%
+  #           )
+  #         )
+
+  #     # for test on machine when encountering error
+  #     # - name: Remove conda env
+  #     #   if: ${{ always() }}
+  #     #   shell: cmd
+  #     #   run: |
+  #     #     call conda env remove -n igpu-perf -y

From 8f570deac2222cee758b7125769ed3f96f4f36fb Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Thu, 14 Mar 2024 14:36:39 +0800
Subject: [PATCH 37/40] test iGPU

---
 .../workflows/ipex_llm_performance_tests.yml  | 838 +++++++++---------
 1 file changed, 421 insertions(+), 417 deletions(-)

diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index 1b6a487da50..0b4d497876c 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -82,7 +82,7 @@ jobs:
       - name: Install BigDL-LLM from Pypi
         shell: bash
         run: |
-          pip install --pre --upgrade ipex-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+          pip install --pre --upgrade ipex-llm[xpu]==2.1.0b20240308 -f https://developer.intel.com/ipex-whl-stable-xpu
           # test_version_date=`date -d 'yesterday' '+%Y%m%d'`
           # if ! pip show bigdl-llm | grep $test_version_date; then
           #   echo "Did not install bigdl-llm with excepted version $test_version_date"
@@ -324,494 +324,498 @@ jobs:
   #           curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/core_${{ matrix.platform }}/
   #         fi
 
-  # llm-performance-test-on-igpu:
-  #   # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
-  #   # needs: llm-cpp-build # please uncomment it for PR tests
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       include:
-  #         - os: windows
-  #           python-version: "3.9"
-  #   runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-igpu]
-  #   env:
-  #     ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-  #   steps:
-  #     - name: Set access token
-  #       run: |
-  #         echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
-  #     - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
-  #       with:
-  #         repository: "analytics-zoo/bigdl-llm-internal"
-  #         ref: "ipex-llm-20240308"
-  #         token: ${{ env.github_access_token }}
-  #         submodules: "recursive"
+  llm-performance-test-on-igpu:
+    # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
+    # needs: llm-cpp-build # please uncomment it for PR tests
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: windows
+            python-version: "3.9"
+    runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-igpu]
+    env:
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+    steps:
+      - name: Set access token
+        run: |
+          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+        with:
+          repository: "analytics-zoo/bigdl-llm-internal"
+          ref: "ipex-llm-20240308-test"
+          token: ${{ env.github_access_token }}
+          submodules: "recursive"
 
-  #     # TODO: Put the bigdl-llm related install process for win gpu into a action function
+      # TODO: Put the bigdl-llm related install process for win gpu into a action function
 
-  #     # Please uncomment it and commment the install from pypi for PR tests
-  #     # - name: Download llm binary
-  #     #   uses: ./.github/actions/llm/download-llm-binary
+      # Please uncomment it and commment the install from pypi for PR tests
+      # - name: Download llm binary
+      #   uses: ./.github/actions/llm/download-llm-binary
 
-  #     # - name: Prepare for install bigdl-llm from source
-  #     #   shell: bash
-  #     #   run: |
-  #     #     sed -i 's/"bigdl-core-xe-21==" + VERSION + "/"bigdl-core-xe-21/g' python/llm/setup.py
-  #     #     sed -i 's/"bigdl-core-xe-21==" + VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py
+      # - name: Prepare for install bigdl-llm from source
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/"bigdl-core-xe-21==" + VERSION + "/"bigdl-core-xe-21/g' python/llm/setup.py
+      #     sed -i 's/"bigdl-core-xe-21==" + VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py
 
-  #     # - name: Install bigdl-llm and other related packages (install from source)
-  #     #   shell: cmd
-  #     #   run: |
-  #     #     call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
-  #     #     call conda activate igpu-perf
+      # - name: Install bigdl-llm and other related packages (install from source)
+      #   shell: cmd
+      #   run: |
+      #     call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
+      #     call conda activate igpu-perf
 
-  #     #     pip install --upgrade pip
-  #     #     pip install --upgrade wheel
-  #     #     pip install --upgrade omegaconf pandas
-  #     #     pip install --upgrade tiktoken einops transformers_stream_generator
+      #     pip install --upgrade pip
+      #     pip install --upgrade wheel
+      #     pip install --upgrade omegaconf pandas
+      #     pip install --upgrade tiktoken einops transformers_stream_generator
 
-  #     #     cd python\llm
-  #     #     python setup.py clean --all bdist_wheel --win
-  #     #     if not exist dist\bigdl_llm*.whl (exit /b 1)
-  #     #     for %%i in (dist\bigdl_llm*.whl) do set whl_name=%%i
+      #     cd python\llm
+      #     python setup.py clean --all bdist_wheel --win
+      #     if not exist dist\bigdl_llm*.whl (exit /b 1)
+      #     for %%i in (dist\bigdl_llm*.whl) do set whl_name=%%i
 
-  #     #     pip install --pre --upgrade %whl_name%[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
-  #     #     if %ERRORLEVEL% neq 0 (exit /b 1)
-  #     #     pip list
+      #     pip install --pre --upgrade %whl_name%[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     pip list
 
-  #     #     call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Determine desired bigdl-llm version
-  #       shell: bash
-  #       run: |
-  #         test_version_date=`date -d 'yesterday' '+%Y%m%d'`
-  #         echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV"
+      - name: Determine desired bigdl-llm version
+        shell: bash
+        run: |
+          test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+          echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV"
 
-  #     - name: Install bigdl-llm and other related packages (install from pypi)
-  #       shell: cmd
-  #       run: |
-  #         call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
-  #         call conda activate igpu-perf
+      - name: Install bigdl-llm and other related packages (install from pypi)
+        shell: cmd
+        run: |
+          call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y
+          call conda activate igpu-perf
 
-  #         pip install --upgrade pip
-  #         pip install --upgrade wheel
-  #         pip install --upgrade omegaconf pandas
-  #         pip install --upgrade tiktoken einops transformers_stream_generator
+          pip install --upgrade pip
+          pip install --upgrade wheel
+          pip install --upgrade omegaconf pandas
+          pip install --upgrade tiktoken einops transformers_stream_generator
 
-  #         pip install --pre --upgrade ipex-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
-  #         pip list
+          pip install --pre --upgrade ipex-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
+          pip list
 
-  #         call conda deactivate
+          call conda deactivate
 
-  #     - name: Create env for html generation
-  #       shell: cmd
-  #       run: |
-  #         call conda create -n html-gen python=3.9 -y
-  #         call conda activate html-gen
+      - name: Create env for html generation
+        shell: cmd
+        run: |
+          call conda create -n html-gen python=3.9 -y
+          call conda activate html-gen
 
-  #         pip install pandas==1.5.3
-  #         pip install Jinja2
+          pip install pandas==1.5.3
+          pip install Jinja2
 
-  #         call conda deactivate
+          call conda deactivate
 
-  #     - name: Set directory envs & and fix generated csv date name
-  #       shell: bash
-  #       run: |
-  #         if [ ${{ github.event_name }} == 'schedule' ]; then
-  #           echo "CSV_SAVE_PATH=${CSV_NIGHTLY_PATH}" >> "$GITHUB_ENV"
-  #         else
-  #           echo "CSV_SAVE_PATH=${CSV_PR_PATH}" >> "$GITHUB_ENV"
-  #         fi
-  #         date_for_test_version=$(date -d yesterday +%Y-%m-%d)
-  #         echo "LOG_FILE=${date_for_test_version}_output.txt" >> "$GITHUB_ENV"
+      - name: Set directory envs & and fix generated csv date name
+        shell: bash
+        run: |
+          if [ ${{ github.event_name }} == 'schedule' ]; then
+            echo "CSV_SAVE_PATH=${CSV_NIGHTLY_PATH}" >> "$GITHUB_ENV"
+          else
+            echo "CSV_SAVE_PATH=${CSV_PR_PATH}" >> "$GITHUB_ENV"
+          fi
+          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+          echo "LOG_FILE=${date_for_test_version}_output.txt" >> "$GITHUB_ENV"
 
-  #         sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
 
-  #     - name: Prepare igpu perf test (32-32)
-  #       shell: bash
-  #       run: |
-  #         # hide time info
-  #         # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32.yaml
+      # - name: Prepare igpu perf test (32-32)
+      #   shell: bash
+      #   run: |
+      #     # hide time info
+      #     # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32.yaml
 
-  #     - name: Test on igpu (32-32)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
-  #         REM for llava
-  #         set TRANSFORMERS_OFFLINE=1
-
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\32-32.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-
-  #         call conda deactivate
-
-  #     - name: Prepare igpu perf test for Mistral (32-32)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml
+      # - name: Test on igpu (32-32)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
+      #     REM for llava
+      #     set TRANSFORMERS_OFFLINE=1
+
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\32-32.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+
+      #     call conda deactivate
+
+      # - name: Prepare igpu perf test for Mistral (32-32)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml
 
-  #     - name: Test on igpu for Mistral (32-32)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.34.0
+      # - name: Test on igpu for Mistral (32-32)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.34.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Prepare igpu perf test for Qwen1.5 (32-32)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_437.yaml
+      # - name: Prepare igpu perf test for Qwen1.5 (32-32)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_437.yaml
 
-  #     - name: Test on igpu for Qwen1.5 (32-32)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.37.0
+      # - name: Test on igpu for Qwen1.5 (32-32)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.37.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\32-32_437.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\32-32_437.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Concat csv and generate html (32-32)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate html-gen
-
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         python ..\..\..\test\benchmark\concat_csv.py
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         del /q *test*.csv
-  #         move *.csv %CSV_SAVE_PATH%\32-32\
-  #         cd ..\..\..\test\benchmark
-  #         python csv_to_html.py -f %CSV_SAVE_PATH%\32-32\
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         move %CSV_SAVE_PATH%\32-32\*.html %CSV_SAVE_PATH%
-
-  #         call conda deactivate
-
-  #     # TODO: create a action function here for different input
-  #     # 1024-128
-  #     - name: Prepare igpu perf test (1024-128)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml
+      # - name: Concat csv and generate html (32-32)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate html-gen
+
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     python ..\..\..\test\benchmark\concat_csv.py
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     del /q *test*.csv
+      #     move *.csv %CSV_SAVE_PATH%\32-32\
+      #     cd ..\..\..\test\benchmark
+      #     python csv_to_html.py -f %CSV_SAVE_PATH%\32-32\
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     move %CSV_SAVE_PATH%\32-32\*.html %CSV_SAVE_PATH%
+
+      #     call conda deactivate
+
+      # # TODO: create a action function here for different input
+      # # 1024-128
+      # - name: Prepare igpu perf test (1024-128)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml
 
-  #     - name: Test on igpu (1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.31.0
+      # - name: Test on igpu (1024-128)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.31.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
-  #         REM for llava
-  #         set TRANSFORMERS_OFFLINE=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
+      #     REM for llava
+      #     set TRANSFORMERS_OFFLINE=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\1024-128.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\1024-128.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Prepare igpu perf test for Mistral (1024-128)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml
+      # - name: Prepare igpu perf test for Mistral (1024-128)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml
 
-  #     - name: Test on igpu for Mistral (1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.34.0
+      # - name: Test on igpu for Mistral (1024-128)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.34.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Prepare igpu perf test for Qwen 1.5 (1024-128)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_437.yaml
+      # - name: Prepare igpu perf test for Qwen 1.5 (1024-128)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_437.yaml
 
-  #     - name: Test on igpu for Qwen 1.5 (1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.37.0
+      # - name: Test on igpu for Qwen 1.5 (1024-128)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.37.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_437.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\1024-128_437.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Concat csv and generate html (1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate html-gen
-
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         python ..\..\..\test\benchmark\concat_csv.py
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         del /q *test*.csv
-  #         move *.csv %CSV_SAVE_PATH%\1024-128\
-  #         cd ..\..\..\test\benchmark
-  #         python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128\
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         move %CSV_SAVE_PATH%\1024-128\*.html %CSV_SAVE_PATH%
-
-  #         call conda deactivate
-
-  #     # 2048-256
-  #     - name: Prepare igpu perf test (2048-256)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256.yaml
+      # - name: Concat csv and generate html (1024-128)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate html-gen
+
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     python ..\..\..\test\benchmark\concat_csv.py
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     del /q *test*.csv
+      #     move *.csv %CSV_SAVE_PATH%\1024-128\
+      #     cd ..\..\..\test\benchmark
+      #     python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128\
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     move %CSV_SAVE_PATH%\1024-128\*.html %CSV_SAVE_PATH%
+
+      #     call conda deactivate
+
+      # # 2048-256
+      # - name: Prepare igpu perf test (2048-256)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256.yaml
 
-  #     - name: Test on igpu (2048-256)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.31.0
+      # - name: Test on igpu (2048-256)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.31.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
-  #         REM for llava
-  #         set TRANSFORMERS_OFFLINE=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
+      #     REM for llava
+      #     set TRANSFORMERS_OFFLINE=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\2048-256.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\2048-256.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Prepare igpu perf test for Mistral (2048-256)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml
+      # - name: Prepare igpu perf test for Mistral (2048-256)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml
 
-  #     - name: Test on igpu for Mistral (2048-256)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.34.0
+      # - name: Test on igpu for Mistral (2048-256)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.34.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Prepare igpu perf test for Qwen 1.5 (2048-256)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_437.yaml
+      # - name: Prepare igpu perf test for Qwen 1.5 (2048-256)
+      #   shell: bash
+      #   run: |
+      #     sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_437.yaml
 
-  #     - name: Test on igpu for Qwen 1.5 (2048-256)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.37.0
+      # - name: Test on igpu for Qwen 1.5 (2048-256)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate igpu-perf
+      #     pip install transformers==4.37.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+      #     set SYCL_CACHE_PERSISTENT=1
+      #     set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\2048-256_437.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     move ..\..\..\test\benchmark\igpu-perf\2048-256_437.yaml config.yaml
+      #     set PYTHONIOENCODING=utf-8
+      #     python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+      #     call conda deactivate
 
-  #     - name: Concat csv and generate html (2048-256)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate html-gen
-
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         python ..\..\..\test\benchmark\concat_csv.py
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         del /q *test*.csv
-  #         move *.csv %CSV_SAVE_PATH%\2048-256\
-  #         cd ..\..\..\test\benchmark
-  #         python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256\
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         move %CSV_SAVE_PATH%\2048-256\*.html %CSV_SAVE_PATH%
-
-  #         call conda deactivate
-
-  #     # load_low_bit 1024-128 
-  #     - name: Prepare igpu perf test (load_low_bit 1024-128)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/2048-256/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml
+      # - name: Concat csv and generate html (2048-256)
+      #   shell: cmd
+      #   run: |
+      #     call conda activate html-gen
+
+      #     cd python\llm\dev\benchmark\all-in-one
+      #     python ..\..\..\test\benchmark\concat_csv.py
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     del /q *test*.csv
+      #     move *.csv %CSV_SAVE_PATH%\2048-256\
+      #     cd ..\..\..\test\benchmark
+      #     python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256\
+      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      #     move %CSV_SAVE_PATH%\2048-256\*.html %CSV_SAVE_PATH%
+
+      #     call conda deactivate
+
+      # load_low_bit 1024-128 
+      - name: Prepare igpu perf test (load_low_bit 1024-128)
+        shell: bash
+        run: |
+          sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml
 
-  #     - name: Test on igpu (load_low_bit 1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.31.0
+          # sed -i 's/2048-256/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+          # sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml
+
+      - name: Test on igpu (load_low_bit 1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.31.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
-  #         REM for llava
-  #         set TRANSFORMERS_OFFLINE=1
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+          call conda deactivate
 
-  #     - name: Prepare igpu perf test for Mistral (load_low_bit 1024-128)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml
+      - name: Prepare igpu perf test for Mistral (load_low_bit 1024-128)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml
 
-  #     - name: Test on igpu for Mistral (load_low_bit 1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.34.0
+      - name: Test on igpu for Mistral (load_low_bit 1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.34.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+          call conda deactivate
 
-  #     - name: Prepare igpu perf test for Qwen 1.5 (load_low_bit 1024-128)
-  #       shell: bash
-  #       run: |
-  #         sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-  #         sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml
+      - name: Prepare igpu perf test for Qwen 1.5 (load_low_bit 1024-128)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml
 
-  #     - name: Test on igpu for Qwen 1.5 (load_low_bit 1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate igpu-perf
-  #         pip install transformers==4.37.0
+      - name: Test on igpu for Qwen 1.5 (load_low_bit 1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.37.0
 
-  #         call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  #         set SYCL_CACHE_PERSISTENT=1
-  #         set BIGDL_LLM_XMX_DISABLED=1
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
 
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_437.yaml config.yaml
-  #         set PYTHONIOENCODING=utf-8
-  #         python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_437.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-  #         call conda deactivate
+          call conda deactivate
 
-  #     - name: Concat csv and generate html (load_low_bit 1024-128)
-  #       shell: cmd
-  #       run: |
-  #         call conda activate html-gen
-
-  #         cd python\llm\dev\benchmark\all-in-one
-  #         python ..\..\..\test\benchmark\concat_csv.py
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         del /q *test*.csv
-  #         move *.csv %CSV_SAVE_PATH%\1024-128_loadlowbit\
-  #         cd ..\..\..\test\benchmark
-  #         python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_loadlowbit\
-  #         if %ERRORLEVEL% neq 0 (exit /b 1)
-  #         move %CSV_SAVE_PATH%\1024-128_loadlowbit\*.html %CSV_SAVE_PATH%
-
-  #         call conda deactivate
-
-  #     - name: Upload results to ftp
-  #       if: ${{ always() }}
-  #       shell: cmd
-  #       run: |
-  #         cd %CSV_SAVE_PATH%
-  #         IF "${{ github.event_name }}"=="schedule" (
-  #           for %%f in (*.html) do (
-  #               curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH%
-  #           )
-  #         )
-
-  #     # for test on machine when encountering error
-  #     # - name: Remove conda env
-  #     #   if: ${{ always() }}
-  #     #   shell: cmd
-  #     #   run: |
-  #     #     call conda env remove -n igpu-perf -y
+      - name: Concat csv and generate html (load_low_bit 1024-128)
+        shell: cmd
+        run: |
+          call conda activate html-gen
+
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\1024-128_loadlowbit\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_loadlowbit\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\1024-128_loadlowbit\*.html %CSV_SAVE_PATH%
+
+          call conda deactivate
+
+      - name: Upload results to ftp
+        if: ${{ always() }}
+        shell: cmd
+        run: |
+          cd %CSV_SAVE_PATH%
+          IF "${{ github.event_name }}"=="schedule" (
+            for %%f in (*.html) do (
+                curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH%
+            )
+          )
+
+      # for test on machine when encountering error
+      # - name: Remove conda env
+      #   if: ${{ always() }}
+      #   shell: cmd
+      #   run: |
+      #     call conda env remove -n igpu-perf -y

From d9e3667257e47d4efe3feac15dd860f0c60c6e6c Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Thu, 14 Mar 2024 15:32:31 +0800
Subject: [PATCH 38/40] test

---
 .github/workflows/ipex_llm_performance_tests.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index 0b4d497876c..d63730b199e 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -710,7 +710,7 @@ jobs:
         shell: bash
         run: |
           sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{api}-results-{today}.csv/1024-128-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml
 
           # sed -i 's/2048-256/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
@@ -732,7 +732,7 @@ jobs:
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit.yaml config.yaml
           set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+          python run.py 
           if %ERRORLEVEL% neq 0 (exit /b 1)
 
           call conda deactivate
@@ -756,7 +756,7 @@ jobs:
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml
           set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+          python run.py 
           if %ERRORLEVEL% neq 0 (exit /b 1)
 
           call conda deactivate
@@ -780,7 +780,7 @@ jobs:
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_437.yaml config.yaml
           set PYTHONIOENCODING=utf-8
-          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
+          python run.py 
           if %ERRORLEVEL% neq 0 (exit /b 1)
 
           call conda deactivate

From 65c3c863526a72f817d92f76ccc2c6f5227b5a49 Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Thu, 14 Mar 2024 15:47:24 +0800
Subject: [PATCH 39/40] update

---
 .github/workflows/ipex_llm_performance_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index d63730b199e..5d2c22a3fc6 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -48,7 +48,7 @@ jobs:
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308"
+          ref: "ipex-llm-20240308-test"
           token: ${{ env.github_access_token }}
           submodules: "recursive"
 

From 66bfd5b4806cfe3f08e56190bb2919a9e739479a Mon Sep 17 00:00:00 2001
From: liu-shaojun <johnssalyn@outlook.com>
Date: Sat, 16 Mar 2024 08:41:49 +0800
Subject: [PATCH 40/40] test on ipex-llm-20240315 branch

---
 .github/workflows/ipex-llm-nightly-test.yml   |   4 +-
 .github/workflows/ipex_llm_example_tests.yml  |   2 +-
 .../workflows/ipex_llm_performance_tests.yml  | 794 +++++++++---------
 .github/workflows/ipex_llm_unit_tests.yml     |   4 +-
 4 files changed, 400 insertions(+), 404 deletions(-)

diff --git a/.github/workflows/ipex-llm-nightly-test.yml b/.github/workflows/ipex-llm-nightly-test.yml
index 82de7ac2c9a..2418845a953 100644
--- a/.github/workflows/ipex-llm-nightly-test.yml
+++ b/.github/workflows/ipex-llm-nightly-test.yml
@@ -12,8 +12,8 @@ permissions:
 on:
   schedule:
     - cron: "00 13 * * *" # GMT time, 13:00 GMT == 21:00 China
-  # pull_request:
-  #   branches: [main]
+  pull_request:
+    branches: [main]
     # paths:
     #   - ".github/workflows/llm-nightly-test.yml"
     #   - ".github/actions/llm/setup-llm-env/action.yml"
diff --git a/.github/workflows/ipex_llm_example_tests.yml b/.github/workflows/ipex_llm_example_tests.yml
index e2d16f4264d..51a93285297 100644
--- a/.github/workflows/ipex_llm_example_tests.yml
+++ b/.github/workflows/ipex_llm_example_tests.yml
@@ -51,7 +51,7 @@ jobs:
       - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # actions/checkout@v2
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308"
+          ref: "ipex-llm-20240315"
           token: ${{ env.github_access_token }}
           submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}
diff --git a/.github/workflows/ipex_llm_performance_tests.yml b/.github/workflows/ipex_llm_performance_tests.yml
index 5d2c22a3fc6..45d4eeb98b5 100644
--- a/.github/workflows/ipex_llm_performance_tests.yml
+++ b/.github/workflows/ipex_llm_performance_tests.yml
@@ -48,7 +48,7 @@ jobs:
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308-test"
+          ref: "ipex-llm-20240315"
           token: ${{ env.github_access_token }}
           submodules: "recursive"
 
@@ -165,164 +165,164 @@ jobs:
       #       curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/
       #     fi
           
-  # llm-performance-test-on-spr:
-  #   # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-spr' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
-  #   # needs: llm-cpp-build # please uncomment it for PR tests
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       python-version: ["3.9"]
-  #   runs-on: [self-hosted, llm, spr01-perf]
-  #   env:
-  #     OMP_NUM_THREADS: 16
-  #     THREAD_NUM: 16
-  #     ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-  #   steps:
-  #     - name: Set access token
-  #       run: |
-  #         echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
-  #     - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
-  #       with:
-  #         repository: "analytics-zoo/bigdl-llm-internal"
-  #         ref: "ipex-llm-20240308"
-  #         token: ${{ env.github_access_token }}
-  #         submodules: "recursive"
-  #     - name: Set up Python ${{ matrix.python-version }}
-  #       uses: actions/setup-python@v4
-  #       with:
-  #         python-version: ${{ matrix.python-version }}
-
-  #     - name: Install dependencies
-  #       shell: bash
-  #       run: |
-  #         python -m pip install --upgrade pip
-  #         python -m pip install --upgrade wheel
-  #         python -m pip install --upgrade omegaconf
-  #         python -m pip install --upgrade pandas
-  #         python -m pip install --upgrade einops
-  #         python -m pip install --upgrade tiktoken
-  #         python -m pip install --upgrade transformers_stream_generator
-
-  #     # please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests
-  #     # - name: Download llm binary
-  #     #   uses: ./.github/actions/llm/download-llm-binary
-
-  #     # - name: Run LLM install (all) test
-  #     #   uses: ./.github/actions/llm/setup-llm-env
-
-  #     - name: Install BigDL-LLM from Pypi
-  #       shell: bash
-  #       run: |
-  #         pip install --pre --upgrade ipex-llm[all] -f https://developer.intel.com/ipex-whl-stable-xpu
-  #         # test_version_date=`date -d 'yesterday' '+%Y%m%d'`
-  #         # if ! pip show bigdl-llm | grep $test_version_date; then
-  #         #   echo "Did not install bigdl-llm with excepted version $test_version_date"
-  #         #   exit 1
-  #         # fi
-
-  #     - name: Test on cpu
-  #       shell: bash
-  #       run: |
-  #         date_for_test_version=$(date -d yesterday +%Y-%m-%d)
-  #         sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
-
-  #         mv python/llm/test/benchmark/cpu-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
-  #         cd python/llm/dev/benchmark/all-in-one
-  #         export http_proxy=${HTTP_PROXY}
-  #         export https_proxy=${HTTPS_PROXY}
-  #         source ipex-llm-init -t
-  #         export OMP_NUM_THREADS=48
-  #         # hide time info
-  #         sed -i 's/str(end - st)/"xxxxxx"/g' run.py
-  #         python run.py
-  #         cp ./*.csv /models/nightly_perf_cpu
-  #         cd ../../../test/benchmark
-  #         python -m pip install pandas==1.5.3
-  #         python csv_to_html.py -f /models/nightly_perf_cpu
-  #         cd /models/nightly_perf_cpu
-  #         for f in *.html; do
-  #           curl -T "$f" ${LLM_FTP_URL}/llm/nightly_perf/nightly_perf_cpu/
-  #         done
-
-  # llm-performance-test-on-core:
-  #   # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-core' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
-  #   # needs: llm-cpp-build # please uncomment it for PR tests
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       include:
-  #         - os: windows
-  #           platform: dp
-  #           python-version: "3.9"
-  #         # - os: windows
-  #         #   platform: lp
-  #         #   python-version: "3.9"
-  #   runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-core, "${{ matrix.platform }}"]
-  #   env:
-  #     ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-  #     CSV_SAVE_PATH: ${{ github.event.schedule && 'D:/action-runners/nightly_perf_core_' || 'D:/action-runners/pr_perf_core_' }}${{ matrix.platform }}/
-  #   steps:
-  #     - name: Set access token
-  #       run: |
-  #         echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
-  #     - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
-  #       with:
-  #         repository: "analytics-zoo/bigdl-llm-internal"
-  #         ref: "ipex-llm-20240308"
-  #         token: ${{ env.github_access_token }}
-  #         submodules: "recursive"
-
-  #     - name: Set up Python ${{ matrix.python-version }}
-  #       uses: actions/setup-python@v4
-  #       with:
-  #         python-version: ${{ matrix.python-version }}
-
-  #     - name: Install dependencies
-  #       shell: bash
-  #       run: |
-  #         python -m pip install --upgrade pip
-  #         python -m pip install --upgrade wheel
-  #         python -m pip install --upgrade omegaconf pandas
-  #         python -m pip install --upgrade tiktoken einops transformers_stream_generator
+  llm-performance-test-on-spr:
+    # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-spr' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
+    # needs: llm-cpp-build # please uncomment it for PR tests
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9"]
+    runs-on: [self-hosted, llm, spr01-perf]
+    env:
+      OMP_NUM_THREADS: 16
+      THREAD_NUM: 16
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+    steps:
+      - name: Set access token
+        run: |
+          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+        with:
+          repository: "analytics-zoo/bigdl-llm-internal"
+          ref: "ipex-llm-20240315"
+          token: ${{ env.github_access_token }}
+          submodules: "recursive"
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        shell: bash
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade wheel
+          python -m pip install --upgrade omegaconf
+          python -m pip install --upgrade pandas
+          python -m pip install --upgrade einops
+          python -m pip install --upgrade tiktoken
+          python -m pip install --upgrade transformers_stream_generator
+
+      # please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests
+      # - name: Download llm binary
+      #   uses: ./.github/actions/llm/download-llm-binary
+
+      # - name: Run LLM install (all) test
+      #   uses: ./.github/actions/llm/setup-llm-env
+
+      - name: Install BigDL-LLM from Pypi
+        shell: bash
+        run: |
+          pip install --pre --upgrade ipex-llm[all] -f https://developer.intel.com/ipex-whl-stable-xpu
+          # test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+          # if ! pip show bigdl-llm | grep $test_version_date; then
+          #   echo "Did not install bigdl-llm with excepted version $test_version_date"
+          #   exit 1
+          # fi
+
+      - name: Test on cpu
+        shell: bash
+        run: |
+          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+
+          mv python/llm/test/benchmark/cpu-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cd python/llm/dev/benchmark/all-in-one
+          export http_proxy=${HTTP_PROXY}
+          export https_proxy=${HTTPS_PROXY}
+          source ipex-llm-init -t
+          export OMP_NUM_THREADS=48
+          # hide time info
+          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
+          python run.py
+          cp ./*.csv /models/nightly_perf_cpu
+          cd ../../../test/benchmark
+          python -m pip install pandas==1.5.3
+          python csv_to_html.py -f /models/nightly_perf_cpu
+          cd /models/nightly_perf_cpu
+          for f in *.html; do
+            curl -T "$f" ${LLM_FTP_URL}/llm/nightly_perf/nightly_perf_cpu/
+          done
+
+  llm-performance-test-on-core:
+    # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-core' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
+    # needs: llm-cpp-build # please uncomment it for PR tests
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: windows
+            platform: dp
+            python-version: "3.9"
+          # - os: windows
+          #   platform: lp
+          #   python-version: "3.9"
+    runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-core, "${{ matrix.platform }}"]
+    env:
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+      CSV_SAVE_PATH: ${{ github.event.schedule && 'D:/action-runners/nightly_perf_core_' || 'D:/action-runners/pr_perf_core_' }}${{ matrix.platform }}/
+    steps:
+      - name: Set access token
+        run: |
+          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $Env:GITHUB_ENV
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
+        with:
+          repository: "analytics-zoo/bigdl-llm-internal"
+          ref: "ipex-llm-20240315"
+          token: ${{ env.github_access_token }}
+          submodules: "recursive"
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        shell: bash
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade wheel
+          python -m pip install --upgrade omegaconf pandas
+          python -m pip install --upgrade tiktoken einops transformers_stream_generator
     
-  #     # please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests
-  #     # - name: Download llm binary
-  #     #   uses: ./.github/actions/llm/download-llm-binary
-
-  #     # - name: Run LLM install (all) test
-  #     #   uses: ./.github/actions/llm/setup-llm-env
-
-  #     - name: Install BigDL-LLM from Pypi
-  #       shell: bash
-  #       run: |
-  #         pip install --pre --upgrade ipex-llm[all] -f https://developer.intel.com/ipex-whl-stable-xpu
-  #         # test_version_date=`date -d 'yesterday' '+%Y%m%d'`
-  #         # if ! pip show bigdl-llm | grep $test_version_date; then
-  #         #   echo "Did not install bigdl-llm with excepted version $test_version_date"
-  #         #   exit 1
-  #         # fi
-
-  #     - name: Test on core ${{ matrix.platform }}
-  #       shell: bash
-  #       run: |
-  #         date_for_test_version=$(date -d yesterday +%Y-%m-%d)
-  #         sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
-
-  #         mv python/llm/test/benchmark/core-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
-  #         cd python/llm/dev/benchmark/all-in-one
-  #         export http_proxy=${HTTP_PROXY}
-  #         export https_proxy=${HTTPS_PROXY}
-  #         # hide time info
-  #         sed -i 's/str(end - st)/"xxxxxx"/g' run.py
-  #         python run.py
-  #         cp ./*.csv $CSV_SAVE_PATH
-  #         cd ../../../test/benchmark
-  #         python -m pip install pandas==1.5.3
-  #         python csv_to_html.py -f $CSV_SAVE_PATH
-  #         cd ../../dev/benchmark/all-in-one/
-  #         if [ ${{ github.event.schedule}} ]; then
-  #           curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/core_${{ matrix.platform }}/
-  #         fi
+      # please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests
+      # - name: Download llm binary
+      #   uses: ./.github/actions/llm/download-llm-binary
+
+      # - name: Run LLM install (all) test
+      #   uses: ./.github/actions/llm/setup-llm-env
+
+      - name: Install BigDL-LLM from Pypi
+        shell: bash
+        run: |
+          pip install --pre --upgrade ipex-llm[all] -f https://developer.intel.com/ipex-whl-stable-xpu
+          # test_version_date=`date -d 'yesterday' '+%Y%m%d'`
+          # if ! pip show bigdl-llm | grep $test_version_date; then
+          #   echo "Did not install bigdl-llm with excepted version $test_version_date"
+          #   exit 1
+          # fi
+
+      - name: Test on core ${{ matrix.platform }}
+        shell: bash
+        run: |
+          date_for_test_version=$(date -d yesterday +%Y-%m-%d)
+          sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
+
+          mv python/llm/test/benchmark/core-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cd python/llm/dev/benchmark/all-in-one
+          export http_proxy=${HTTP_PROXY}
+          export https_proxy=${HTTPS_PROXY}
+          # hide time info
+          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
+          python run.py
+          cp ./*.csv $CSV_SAVE_PATH
+          cd ../../../test/benchmark
+          python -m pip install pandas==1.5.3
+          python csv_to_html.py -f $CSV_SAVE_PATH
+          cd ../../dev/benchmark/all-in-one/
+          if [ ${{ github.event.schedule}} ]; then
+            curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/core_${{ matrix.platform }}/
+          fi
 
   llm-performance-test-on-igpu:
     # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
@@ -343,7 +343,7 @@ jobs:
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308-test"
+          ref: "ipex-llm-20240315"
           token: ${{ env.github_access_token }}
           submodules: "recursive"
 
@@ -427,296 +427,292 @@ jobs:
 
           sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
 
-      # - name: Prepare igpu perf test (32-32)
-      #   shell: bash
-      #   run: |
-      #     # hide time info
-      #     # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32.yaml
+      - name: Prepare igpu perf test (32-32)
+        shell: bash
+        run: |
+          # hide time info
+          # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32.yaml
 
-      # - name: Test on igpu (32-32)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
-      #     REM for llava
-      #     set TRANSFORMERS_OFFLINE=1
-
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\32-32.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      - name: Test on igpu (32-32)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
 
-      #     call conda deactivate
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\32-32.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      # - name: Prepare igpu perf test for Mistral (32-32)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml
+          call conda deactivate
 
-      # - name: Test on igpu for Mistral (32-32)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.34.0
+      - name: Prepare igpu perf test for Mistral (32-32)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml
 
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
+      - name: Test on igpu for Mistral (32-32)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.34.0
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
 
-      #     call conda deactivate
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      # - name: Prepare igpu perf test for Qwen1.5 (32-32)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_437.yaml
+          call conda deactivate
 
-      # - name: Test on igpu for Qwen1.5 (32-32)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.37.0
+      - name: Prepare igpu perf test for Qwen1.5 (32-32)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_437.yaml
 
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
+      - name: Test on igpu for Qwen1.5 (32-32)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.37.0
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\32-32_437.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
 
-      #     call conda deactivate
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\32-32_437.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      # - name: Concat csv and generate html (32-32)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate html-gen
+          call conda deactivate
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     python ..\..\..\test\benchmark\concat_csv.py
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
-      #     del /q *test*.csv
-      #     move *.csv %CSV_SAVE_PATH%\32-32\
-      #     cd ..\..\..\test\benchmark
-      #     python csv_to_html.py -f %CSV_SAVE_PATH%\32-32\
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
-      #     move %CSV_SAVE_PATH%\32-32\*.html %CSV_SAVE_PATH%
+      - name: Concat csv and generate html (32-32)
+        shell: cmd
+        run: |
+          call conda activate html-gen
 
-      #     call conda deactivate
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\32-32\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\32-32\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\32-32\*.html %CSV_SAVE_PATH%
 
-      # # TODO: create a action function here for different input
-      # # 1024-128
-      # - name: Prepare igpu perf test (1024-128)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml
+          call conda deactivate
 
-      # - name: Test on igpu (1024-128)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.31.0
-
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
-      #     REM for llava
-      #     set TRANSFORMERS_OFFLINE=1
-
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\1024-128.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      # TODO: create a action function here for different input
+      # 1024-128
+      - name: Prepare igpu perf test (1024-128)
+        shell: bash
+        run: |
+          sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml
 
-      #     call conda deactivate
+      - name: Test on igpu (1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.31.0
 
-      # - name: Prepare igpu perf test for Mistral (1024-128)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
 
-      # - name: Test on igpu for Mistral (1024-128)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.34.0
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
+          call conda deactivate
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      - name: Prepare igpu perf test for Mistral (1024-128)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml
 
-      #     call conda deactivate
+      - name: Test on igpu for Mistral (1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.34.0
 
-      # - name: Prepare igpu perf test for Qwen 1.5 (1024-128)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_437.yaml
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
 
-      # - name: Test on igpu for Qwen 1.5 (1024-128)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.37.0
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
+          call conda deactivate
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\1024-128_437.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+      - name: Prepare igpu perf test for Qwen 1.5 (1024-128)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_437.yaml
 
-      #     call conda deactivate
+      - name: Test on igpu for Qwen 1.5 (1024-128)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.37.0
 
-      # - name: Concat csv and generate html (1024-128)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate html-gen
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     python ..\..\..\test\benchmark\concat_csv.py
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
-      #     del /q *test*.csv
-      #     move *.csv %CSV_SAVE_PATH%\1024-128\
-      #     cd ..\..\..\test\benchmark
-      #     python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128\
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
-      #     move %CSV_SAVE_PATH%\1024-128\*.html %CSV_SAVE_PATH%
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\1024-128_437.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      #     call conda deactivate
+          call conda deactivate
 
-      # # 2048-256
-      # - name: Prepare igpu perf test (2048-256)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256.yaml
+      - name: Concat csv and generate html (1024-128)
+        shell: cmd
+        run: |
+          call conda activate html-gen
 
-      # - name: Test on igpu (2048-256)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.31.0
-
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
-      #     REM for llava
-      #     set TRANSFORMERS_OFFLINE=1
-
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\2048-256.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\1024-128\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\1024-128\*.html %CSV_SAVE_PATH%
 
-      #     call conda deactivate
+          call conda deactivate
 
-      # - name: Prepare igpu perf test for Mistral (2048-256)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml
+      # 2048-256
+      - name: Prepare igpu perf test (2048-256)
+        shell: bash
+        run: |
+          sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256.yaml
 
-      # - name: Test on igpu for Mistral (2048-256)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.34.0
+      - name: Test on igpu (2048-256)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.31.0
 
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+          REM for llava
+          set TRANSFORMERS_OFFLINE=1
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\2048-256.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      #     call conda deactivate
+          call conda deactivate
 
-      # - name: Prepare igpu perf test for Qwen 1.5 (2048-256)
-      #   shell: bash
-      #   run: |
-      #     sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
-      #     sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_437.yaml
+      - name: Prepare igpu perf test for Mistral (2048-256)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml
 
-      # - name: Test on igpu for Qwen 1.5 (2048-256)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate igpu-perf
-      #     pip install transformers==4.37.0
+      - name: Test on igpu for Mistral (2048-256)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.34.0
 
-      #     call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-      #     set SYCL_CACHE_PERSISTENT=1
-      #     set BIGDL_LLM_XMX_DISABLED=1
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     move ..\..\..\test\benchmark\igpu-perf\2048-256_437.yaml config.yaml
-      #     set PYTHONIOENCODING=utf-8
-      #     python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
 
-      #     call conda deactivate
+          call conda deactivate
 
-      # - name: Concat csv and generate html (2048-256)
-      #   shell: cmd
-      #   run: |
-      #     call conda activate html-gen
+      - name: Prepare igpu perf test for Qwen 1.5 (2048-256)
+        shell: bash
+        run: |
+          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_437.yaml
 
-      #     cd python\llm\dev\benchmark\all-in-one
-      #     python ..\..\..\test\benchmark\concat_csv.py
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
-      #     del /q *test*.csv
-      #     move *.csv %CSV_SAVE_PATH%\2048-256\
-      #     cd ..\..\..\test\benchmark
-      #     python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256\
-      #     if %ERRORLEVEL% neq 0 (exit /b 1)
-      #     move %CSV_SAVE_PATH%\2048-256\*.html %CSV_SAVE_PATH%
+      - name: Test on igpu for Qwen 1.5 (2048-256)
+        shell: cmd
+        run: |
+          call conda activate igpu-perf
+          pip install transformers==4.37.0
 
-      #     call conda deactivate
+          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
+          set SYCL_CACHE_PERSISTENT=1
+          set BIGDL_LLM_XMX_DISABLED=1
+
+          cd python\llm\dev\benchmark\all-in-one
+          move ..\..\..\test\benchmark\igpu-perf\2048-256_437.yaml config.yaml
+          set PYTHONIOENCODING=utf-8
+          python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+
+          call conda deactivate
+
+      - name: Concat csv and generate html (2048-256)
+        shell: cmd
+        run: |
+          call conda activate html-gen
+
+          cd python\llm\dev\benchmark\all-in-one
+          python ..\..\..\test\benchmark\concat_csv.py
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          del /q *test*.csv
+          move *.csv %CSV_SAVE_PATH%\2048-256\
+          cd ..\..\..\test\benchmark
+          python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256\
+          if %ERRORLEVEL% neq 0 (exit /b 1)
+          move %CSV_SAVE_PATH%\2048-256\*.html %CSV_SAVE_PATH%
+
+          call conda deactivate
 
       # load_low_bit 1024-128 
       - name: Prepare igpu perf test (load_low_bit 1024-128)
         shell: bash
         run: |
-          sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i 's/{api}-results-{today}.csv/1024-128-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/2048-256/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
+          sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml
 
-          # sed -i 's/2048-256/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
-          # sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
-          # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml
-
       - name: Test on igpu (load_low_bit 1024-128)
         shell: cmd
         run: |
@@ -732,7 +728,7 @@ jobs:
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit.yaml config.yaml
           set PYTHONIOENCODING=utf-8
-          python run.py 
+          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
           if %ERRORLEVEL% neq 0 (exit /b 1)
 
           call conda deactivate
@@ -756,7 +752,7 @@ jobs:
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml
           set PYTHONIOENCODING=utf-8
-          python run.py 
+          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
           if %ERRORLEVEL% neq 0 (exit /b 1)
 
           call conda deactivate
@@ -780,7 +776,7 @@ jobs:
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_437.yaml config.yaml
           set PYTHONIOENCODING=utf-8
-          python run.py 
+          python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1
           if %ERRORLEVEL% neq 0 (exit /b 1)
 
           call conda deactivate
@@ -818,4 +814,4 @@ jobs:
       #   if: ${{ always() }}
       #   shell: cmd
       #   run: |
-      #     call conda env remove -n igpu-perf -y
+      #     call conda env remove -n igpu-perf -y
\ No newline at end of file
diff --git a/.github/workflows/ipex_llm_unit_tests.yml b/.github/workflows/ipex_llm_unit_tests.yml
index 11a645eea13..a43a443827a 100644
--- a/.github/workflows/ipex_llm_unit_tests.yml
+++ b/.github/workflows/ipex_llm_unit_tests.yml
@@ -149,7 +149,7 @@ jobs:
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308"
+          ref: "ipex-llm-20240315"
           token: ${{ env.github_access_token }}
           submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}
@@ -302,7 +302,7 @@ jobs:
       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
         with:
           repository: "analytics-zoo/bigdl-llm-internal"
-          ref: "ipex-llm-20240308"
+          ref: "ipex-llm-20240315"
           token: ${{ env.github_access_token }}
           submodules: "recursive"
       - name: Set up Python ${{ matrix.python-version }}