From f9d0107b99a9e051e27e099f1bad290d7e970dad Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 23 Apr 2024 00:30:05 -0700 Subject: [PATCH 01/57] update unit test --- .github/actions/llm/setup-llm-env/action.yml | 1 + .github/workflows/llm_unit_tests.yml | 51 +++++++++++-------- .../test/inference/test_transformers_api.py | 4 +- .../test/langchain/test_transformers_api.py | 2 +- .../llm/test/run-llm-inference-tests-gpu.sh | 6 +-- python/llm/test/run-llm-inference-tests.sh | 4 -- 6 files changed, 38 insertions(+), 30 deletions(-) diff --git a/.github/actions/llm/setup-llm-env/action.yml b/.github/actions/llm/setup-llm-env/action.yml index 4b25ea0c401..4d0b7550f74 100644 --- a/.github/actions/llm/setup-llm-env/action.yml +++ b/.github/actions/llm/setup-llm-env/action.yml @@ -42,3 +42,4 @@ runs: pip install pytest bash python/llm/test/run-llm-install-tests.sh fi + pip install transformers==4.36.2 diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 5eb5b55e31a..5233a7f879c 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -77,6 +77,7 @@ jobs: run: | echo "DATASET_DIR=${{ github.workspace }}/../llm/datasets" >> "$GITHUB_ENV" echo "ORIGIN_DIR=${{ github.workspace }}/../llm/origin-models" >> "$GITHUB_ENV" + echo "ORIGIN_DIR_436=${{ github.workspace }}/../llm/origin-models-4.36" >> "$GITHUB_ENV" echo "INT4_CKPT_DIR=${{ github.workspace }}/../llm/converted-models" >> "$GITHUB_ENV" - name: Create model directories shell: bash @@ -87,6 +88,9 @@ jobs: if [ ! -d $ORIGIN_DIR ]; then mkdir -p $ORIGIN_DIR fi + if [ ! -d ORIGIN_DIR_436 ]; then + mkdir -p ORIGIN_DIR_436 + fi if [ ! -d $INT4_CKPT_DIR ]; then mkdir -p $INT4_CKPT_DIR fi @@ -98,7 +102,7 @@ jobs: echo "LLAMA_ORIGIN_PATH=${ORIGIN_DIR}/llama-7b-hf" >> "$GITHUB_ENV" echo "BLOOM_ORIGIN_PATH=${ORIGIN_DIR}/bloom-7b1" >> "$GITHUB_ENV" - echo "ORIGINAL_CHATGLM2_6B_PATH=${ORIGIN_DIR}/chatglm2-6b" >> "$GITHUB_ENV" + echo "ORIGINAL_CHATGLM2_6B_PATH=${ORIGIN_DIR_436}/chatglm2-6b" >> "$GITHUB_ENV" echo "ORIGINAL_REPLIT_CODE_PATH=${ORIGIN_DIR}/replit-code-v1-3b" >> "$GITHUB_ENV" echo "ORIGINAL_WHISPER_TINY_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV" echo "MISTRAL_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-v0.1" >> "$GITHUB_ENV" @@ -157,8 +161,8 @@ jobs: # fi if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..." - echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR" - wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR + echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR" + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR_436 fi if [ ! -d $ORIGINAL_REPLIT_CODE_PATH ]; then echo "Directory $ORIGINAL_REPLIT_CODE_PATH not found. Downloading from FTP server..." @@ -210,23 +214,28 @@ jobs: - name: Run LLM cli test (Windows) if: runner.os == 'Windows' uses: ./.github/actions/llm/cli-test-windows - - name: Run LLM inference test +# - name: Run LLM inference test +# shell: bash +# run: | +# python -m pip install einops datasets librosa openai-whisper +# bash python/llm/test/run-llm-inference-tests.sh + - name: Run LLM inference test for 4.36 shell: bash run: | python -m pip install einops datasets librosa openai-whisper bash python/llm/test/run-llm-inference-tests.sh - - name: Run LLM langchain test - shell: bash - run: | - pip install -U langchain==0.0.184 - pip install -U chromadb==0.3.25 - pip install -U pandas==2.0.3 - bash python/llm/test/run-llm-langchain-tests.sh +# - name: Run LLM langchain test +# shell: bash +# run: | +# pip install -U langchain==0.0.184 +# pip install -U chromadb==0.3.25 +# pip install -U pandas==2.0.3 +# bash python/llm/test/run-llm-langchain-tests.sh - name: Run LLM llamaindex test shell: bash run: | pip install llama-index-readers-file llama-index-vector-stores-postgres llama-index-embeddings-huggingface - pip install transformers==4.36.0 +# pip install transformers==4.36.0 pip install "pydantic>=2.0.0" bash python/llm/test/run-llm-llamaindex-tests.sh - name: Run sentence-transformers uninstallation @@ -255,12 +264,12 @@ jobs: echo "SPEECH_DATASET_PATH=${ORIGIN_DIR}/../datasets/librispeech_asr_dummy" >> "$GITHUB_ENV" echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV" - echo "CHATGLM2_6B_ORIGIN_PATH=${ORIGIN_DIR}/chatglm2-6b" >> "$GITHUB_ENV" + echo "CHATGLM2_6B_ORIGIN_PATH=${ORIGIN_DIR_436}/chatglm2-6b" >> "$GITHUB_ENV" echo "FALCON_7B_ORIGIN_PATH=${ORIGIN_DIR}/falcon-7b-instruct-with-patch" >> "$GITHUB_ENV" - echo "MPT_7B_ORIGIN_PATH=${ORIGIN_DIR}/mpt-7b-chat" >> "$GITHUB_ENV" + echo "MPT_7B_ORIGIN_PATH=${ORIGIN_DIR_436}/mpt-7b-chat" >> "$GITHUB_ENV" echo "WHISPER_TINY_ORIGIN_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV" echo "MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-Instruct-v0.1" >> "$GITHUB_ENV" - echo "BAICHUAN2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Baichuan2-7B-Chat" >> "$GITHUB_ENV" + echo "BAICHUAN2_7B_ORIGIN_PATH=${ORIGIN_DIR_436}/Baichuan2-7B-Chat" >> "$GITHUB_ENV" echo "QWEN_7B_ORIGIN_PATH=${ORIGIN_DIR}/Qwen-7B-Chat" >> "$GITHUB_ENV" echo "VICUNA_7B_1_3_ORIGIN_PATH=${ORIGIN_DIR}/vicuna-7b-v1.3" >> "$GITHUB_ENV" - name: Checkout repo @@ -310,7 +319,7 @@ jobs: fi if [ ! -d $CHATGLM2_6B_ORIGIN_PATH ]; then echo "Directory $CHATGLM2_6B_ORIGIN_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/updated_for_4.36//chatglm2-6b -P $ORIGIN_DIR_436 fi if [ ! -d $FALCON_7B_ORIGIN_PATH ]; then echo "Directory $FALCON_7B_ORIGIN_PATH not found. Downloading from FTP server..." @@ -318,7 +327,7 @@ jobs: fi if [ ! -d $MPT_7B_ORIGIN_PATH ]; then echo "Directory $MPT_7B_ORIGIN_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/mpt-7b-chat -P $ORIGIN_DIR + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/updated_for_4.36//mpt-7b-chat -P $ORIGIN_DIR_436 fi if [ ! -d $WHISPER_TINY_ORIGIN_PATH ]; then echo "Directory $WHISPER_TINY_ORIGIN_PATH not found. Downloading from FTP server..." @@ -345,7 +354,7 @@ jobs: fi if [ ! -d $BAICHUAN2_7B_ORIGIN_PATH ]; then echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Baichuan2-7B-Chat -P $ORIGIN_DIR + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/updated_for_4.36//Baichuan2-7B-Chat -P $ORIGIN_DIR_436 fi if [ ! -d $VICUNA_7B_1_3_ORIGIN_PATH ]; then echo "Directory $VICUNA_7B_1_3_ORIGIN_PATH not found. Downloading from FTP server..." @@ -363,8 +372,8 @@ jobs: fi python -m pip install datasets librosa soundfile einops tiktoken transformers_stream_generator bash python/llm/test/run-llm-inference-tests-gpu.sh - python -m pip install transformers==4.34.0 - bash python/llm/test/run-llm-inference-tests-gpu-434.sh +# python -m pip install transformers==4.34.0 +# bash python/llm/test/run-llm-inference-tests-gpu-434.sh - name: Run LLM example tests shell: bash @@ -410,7 +419,7 @@ jobs: pip install --pre --upgrade ipex-llm[xpu_2.0] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ source /home/arda/intel/oneapi/setvars.sh fi - pip install transformers==4.36.0 +# pip install transformers==4.36.0 pip install "pydantic>=2.0.0" bash python/llm/test/run-llm-llamaindex-tests-gpu.sh - name: Run sentence-transformers uninstallation diff --git a/python/llm/test/inference/test_transformers_api.py b/python/llm/test/inference/test_transformers_api.py index 1a72801cc1a..ea54c6a29cc 100644 --- a/python/llm/test/inference/test_transformers_api.py +++ b/python/llm/test/inference/test_transformers_api.py @@ -116,6 +116,7 @@ def test_transformers_chatglm_for_causallm(self): ]) @pytest.mark.parametrize('Model, Tokenizer, model_path',[ (AutoModel, AutoTokenizer, os.environ.get('ORIGINAL_CHATGLM2_6B_PATH')), + (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH')), ]) def test_load_low_bit_completion(Model, Tokenizer, model_path, prompt, answer): tokenizer = Tokenizer.from_pretrained(model_path, trust_remote_code=True) @@ -143,7 +144,8 @@ def test_load_low_bit_completion(Model, Tokenizer, model_path, prompt, answer): (AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA_ORIGIN_PATH'), prompt), (AutoModelForCausalLM, AutoTokenizer, os.environ.get('BLOOM_ORIGIN_PATH'), prompt), (AutoModel, AutoTokenizer, os.environ.get('ORIGINAL_CHATGLM2_6B_PATH'), prompt), - (AutoModelForCausalLM, AutoTokenizer, os.environ.get('ORIGINAL_REPLIT_CODE_PATH'), prompt) + (AutoModelForCausalLM, AutoTokenizer, os.environ.get('ORIGINAL_REPLIT_CODE_PATH'), prompt), + (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH'), prompt) ]) def test_optimize_model(Model, Tokenizer, model_path, prompt): diff --git a/python/llm/test/langchain/test_transformers_api.py b/python/llm/test/langchain/test_transformers_api.py index cbaaa1e0ba7..61d30051b6f 100644 --- a/python/llm/test/langchain/test_transformers_api.py +++ b/python/llm/test/langchain/test_transformers_api.py @@ -38,7 +38,7 @@ class Test_Langchain_Transformers_API(TestCase): def setUp(self): self.auto_model_path = os.environ.get('ORIGINAL_CHATGLM2_6B_PATH') - self.auto_causal_model_path = os.environ.get('ORIGINAL_REPLIT_CODE_PATH') + # self.auto_causal_model_path = os.environ.get('ORIGINAL_REPLIT_CODE_PATH') self.llama_model_path = os.environ.get('LLAMA_ORIGIN_PATH') self.bloom_model_path = os.environ.get('BLOOM_ORIGIN_PATH') thread_num = os.environ.get('THREAD_NUM') diff --git a/python/llm/test/run-llm-inference-tests-gpu.sh b/python/llm/test/run-llm-inference-tests-gpu.sh index ea1abb519f4..5e48c0df876 100644 --- a/python/llm/test/run-llm-inference-tests-gpu.sh +++ b/python/llm/test/run-llm-inference-tests-gpu.sh @@ -21,9 +21,9 @@ pytest ${LLM_INFERENCE_TEST_DIR}/test_transformers_api.py -v -s pytest ${LLM_INFERENCE_TEST_DIR}/test_transformers_api_layernorm.py -v -s export BIGDL_LLM_XMX_DISABLED=1 pytest ${LLM_INFERENCE_TEST_DIR}/test_transformers_api_final_logits.py -v -s -pytest ${LLM_INFERENCE_TEST_DIR}/test_transformers_api_attention.py -v -s -k "not Mistral" -pytest ${LLM_INFERENCE_TEST_DIR}/test_transformers_api_mlp.py -v -s -k "not Mistral" -pytest ${LLM_INFERENCE_TEST_DIR}/test_transformers_api_RMSNorm.py -v -s -k "not Mistral" +pytest ${LLM_INFERENCE_TEST_DIR}/test_transformers_api_attention.py -v -s +pytest ${LLM_INFERENCE_TEST_DIR}/test_transformers_api_mlp.py -v -s +pytest ${LLM_INFERENCE_TEST_DIR}/test_transformers_api_RMSNorm.py -v -s unset BIGDL_LLM_XMX_DISABLED now=$(date "+%s") diff --git a/python/llm/test/run-llm-inference-tests.sh b/python/llm/test/run-llm-inference-tests.sh index e53528dbb56..d3c3c0690ef 100644 --- a/python/llm/test/run-llm-inference-tests.sh +++ b/python/llm/test/run-llm-inference-tests.sh @@ -18,10 +18,6 @@ export OMP_NUM_THREADS=$THREAD_NUM python -m pytest -s ${LLM_INFERENCE_TEST_DIR}/test_transformers_api.py -v python -m pytest -s ${LLM_INFERENCE_TEST_DIR}/test_optimize_model_api.py -v -python -m pip install transformers==4.34.0 -python -m pytest -s ${LLM_INFERENCE_TEST_DIR}/test_transformesr_api_434.py -v -python -m pip install transformers==4.31.0 - now=$(date "+%s") time=$((now-start)) From a86c35fd610dbb6f2947ddb0d4619fd3f3c2c886 Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 23 Apr 2024 00:33:28 -0700 Subject: [PATCH 02/57] update --- .github/workflows/llm_unit_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 5233a7f879c..7e2e796ba87 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -161,7 +161,7 @@ jobs: # fi if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..." - echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR" + echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR_436" wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR_436 fi if [ ! -d $ORIGINAL_REPLIT_CODE_PATH ]; then From 8a6e0f24f6fea51add9c39fd7b3c6a47c8ba48cc Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 23 Apr 2024 00:38:39 -0700 Subject: [PATCH 03/57] update --- .../test/inference/test_transformers_api.py | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/python/llm/test/inference/test_transformers_api.py b/python/llm/test/inference/test_transformers_api.py index ea54c6a29cc..8b208bdd3c1 100644 --- a/python/llm/test/inference/test_transformers_api.py +++ b/python/llm/test/inference/test_transformers_api.py @@ -52,23 +52,23 @@ def test_transformers_auto_model_int4(self): res = 'Paris' in output_str self.assertTrue(res) - def test_transformers_auto_model_for_causal_lm_int4(self): - model_path = os.environ.get('ORIGINAL_REPLIT_CODE_PATH') - tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) - input_str = 'def hello():\n print("hello world")\n' - model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True) - with torch.inference_mode(): - - st = time.time() - input_ids = tokenizer.encode(input_str, return_tensors="pt") - output = model.generate(input_ids, do_sample=False, max_new_tokens=32) - output_str = tokenizer.decode(output[0], skip_special_tokens=True) - end = time.time() - print('Prompt:', input_str) - print('Output:', output_str) - print(f'Inference time: {end-st} s') - res = '\nhello()' in output_str - self.assertTrue(res) + # def test_transformers_auto_model_for_causal_lm_int4(self): + # model_path = os.environ.get('ORIGINAL_REPLIT_CODE_PATH') + # tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + # input_str = 'def hello():\n print("hello world")\n' + # model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True) + # with torch.inference_mode(): + # + # st = time.time() + # input_ids = tokenizer.encode(input_str, return_tensors="pt") + # output = model.generate(input_ids, do_sample=False, max_new_tokens=32) + # output_str = tokenizer.decode(output[0], skip_special_tokens=True) + # end = time.time() + # print('Prompt:', input_str) + # print('Output:', output_str) + # print(f'Inference time: {end-st} s') + # res = '\nhello()' in output_str + # self.assertTrue(res) def test_transformers_auto_model_for_speech_seq2seq_int4(self): @@ -144,7 +144,7 @@ def test_load_low_bit_completion(Model, Tokenizer, model_path, prompt, answer): (AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA_ORIGIN_PATH'), prompt), (AutoModelForCausalLM, AutoTokenizer, os.environ.get('BLOOM_ORIGIN_PATH'), prompt), (AutoModel, AutoTokenizer, os.environ.get('ORIGINAL_CHATGLM2_6B_PATH'), prompt), - (AutoModelForCausalLM, AutoTokenizer, os.environ.get('ORIGINAL_REPLIT_CODE_PATH'), prompt), + # (AutoModelForCausalLM, AutoTokenizer, os.environ.get('ORIGINAL_REPLIT_CODE_PATH'), prompt), (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH'), prompt) ]) From d658968e07dca5ad768a118a8405b724cc5cf327 Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 23 Apr 2024 00:49:59 -0700 Subject: [PATCH 04/57] update --- .github/workflows/llm_unit_tests.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 7e2e796ba87..c85cba9091f 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -224,18 +224,18 @@ jobs: run: | python -m pip install einops datasets librosa openai-whisper bash python/llm/test/run-llm-inference-tests.sh -# - name: Run LLM langchain test -# shell: bash -# run: | -# pip install -U langchain==0.0.184 -# pip install -U chromadb==0.3.25 -# pip install -U pandas==2.0.3 -# bash python/llm/test/run-llm-langchain-tests.sh + # - name: Run LLM langchain test + # shell: bash + # run: | + # pip install -U langchain==0.0.184 + # pip install -U chromadb==0.3.25 + # pip install -U pandas==2.0.3 + # bash python/llm/test/run-llm-langchain-tests.sh - name: Run LLM llamaindex test shell: bash run: | pip install llama-index-readers-file llama-index-vector-stores-postgres llama-index-embeddings-huggingface -# pip install transformers==4.36.0 + # pip install transformers==4.36.0 pip install "pydantic>=2.0.0" bash python/llm/test/run-llm-llamaindex-tests.sh - name: Run sentence-transformers uninstallation @@ -372,8 +372,8 @@ jobs: fi python -m pip install datasets librosa soundfile einops tiktoken transformers_stream_generator bash python/llm/test/run-llm-inference-tests-gpu.sh -# python -m pip install transformers==4.34.0 -# bash python/llm/test/run-llm-inference-tests-gpu-434.sh + # python -m pip install transformers==4.34.0 + # bash python/llm/test/run-llm-inference-tests-gpu-434.sh - name: Run LLM example tests shell: bash @@ -419,7 +419,7 @@ jobs: pip install --pre --upgrade ipex-llm[xpu_2.0] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ source /home/arda/intel/oneapi/setvars.sh fi -# pip install transformers==4.36.0 + # pip install transformers==4.36.0 pip install "pydantic>=2.0.0" bash python/llm/test/run-llm-llamaindex-tests-gpu.sh - name: Run sentence-transformers uninstallation From 66639dce4202e1f092a2c78c42c8e5846ac70471 Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 23 Apr 2024 00:53:22 -0700 Subject: [PATCH 05/57] update --- .github/workflows/llm_unit_tests.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index c85cba9091f..23b061b585f 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -214,11 +214,11 @@ jobs: - name: Run LLM cli test (Windows) if: runner.os == 'Windows' uses: ./.github/actions/llm/cli-test-windows -# - name: Run LLM inference test -# shell: bash -# run: | -# python -m pip install einops datasets librosa openai-whisper -# bash python/llm/test/run-llm-inference-tests.sh + # - name: Run LLM inference test + # shell: bash + # run: | + # python -m pip install einops datasets librosa openai-whisper + # bash python/llm/test/run-llm-inference-tests.sh - name: Run LLM inference test for 4.36 shell: bash run: | From e77cee4a7b8b7dd5a9c258abe997666f5e3c76eb Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 23 Apr 2024 11:40:32 -0700 Subject: [PATCH 06/57] update --- .github/workflows/llm_unit_tests.yml | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 23b061b585f..2e776a3d8e9 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -161,8 +161,8 @@ jobs: # fi if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..." - echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR_436" - wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR_436 + echo "wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR_436" + wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR_436 fi if [ ! -d $ORIGINAL_REPLIT_CODE_PATH ]; then echo "Directory $ORIGINAL_REPLIT_CODE_PATH not found. Downloading from FTP server..." @@ -256,6 +256,16 @@ jobs: # THREAD_NUM: 16 ANALYTICS_ZOO_ROOT: ${{ github.workspace }} steps: + - name: Set model directories for 4.36 + shell: bash + run: | + echo "ORIGIN_DIR_436=${{ github.workspace }}/../llm/origin-models-4.36" >> "$GITHUB_ENV" + - name: Create model directories + shell: bash + run: | + if [ ! -d ORIGIN_DIR_436 ]; then + mkdir -p ORIGIN_DIR_436 + fi - name: Set environment variables shell: bash run: | @@ -319,7 +329,7 @@ jobs: fi if [ ! -d $CHATGLM2_6B_ORIGIN_PATH ]; then echo "Directory $CHATGLM2_6B_ORIGIN_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/updated_for_4.36//chatglm2-6b -P $ORIGIN_DIR_436 + wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR_436 fi if [ ! -d $FALCON_7B_ORIGIN_PATH ]; then echo "Directory $FALCON_7B_ORIGIN_PATH not found. Downloading from FTP server..." @@ -327,7 +337,7 @@ jobs: fi if [ ! -d $MPT_7B_ORIGIN_PATH ]; then echo "Directory $MPT_7B_ORIGIN_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/updated_for_4.36//mpt-7b-chat -P $ORIGIN_DIR_436 + wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/mpt-7b-chat -P $ORIGIN_DIR_436 fi if [ ! -d $WHISPER_TINY_ORIGIN_PATH ]; then echo "Directory $WHISPER_TINY_ORIGIN_PATH not found. Downloading from FTP server..." @@ -354,7 +364,7 @@ jobs: fi if [ ! -d $BAICHUAN2_7B_ORIGIN_PATH ]; then echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/updated_for_4.36//Baichuan2-7B-Chat -P $ORIGIN_DIR_436 + wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/Baichuan2-7B-Chat -P $ORIGIN_DIR_436 fi if [ ! -d $VICUNA_7B_1_3_ORIGIN_PATH ]; then echo "Directory $VICUNA_7B_1_3_ORIGIN_PATH not found. Downloading from FTP server..." From f1d694469c68ebd74bbcaca7dc1d8d242473051e Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 24 Apr 2024 15:24:42 -0700 Subject: [PATCH 07/57] fix gpu attention test --- .../test/inference_gpu/test_transformers_api_attention.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/llm/test/inference_gpu/test_transformers_api_attention.py b/python/llm/test/inference_gpu/test_transformers_api_attention.py index 0990f8ad4b9..83f7aaebfc8 100644 --- a/python/llm/test/inference_gpu/test_transformers_api_attention.py +++ b/python/llm/test/inference_gpu/test_transformers_api_attention.py @@ -104,8 +104,8 @@ def replace_forward_hook(module, input, output, layer_name): if isinstance(t1, torch.Tensor) and isinstance(t2, torch.Tensor): # 'attn_output' is of type torch.Tensor. attn_output_diff.append(t1 - t2) - else: - # 'past_key_value'is of type tuple as default. + elif isinstance(t1, tuple) and isinstance(t2, tuple): + # if 'past_key_value'is of type tuple for i, (t3, t4) in enumerate(zip(t1, t2)): if model.config.architectures[0] == "ChatGLMModel" and \ hasattr(model.config, 'padded_vocab_size') and \ @@ -114,6 +114,10 @@ def replace_forward_hook(module, input, output, layer_name): # We need to narrow it here. t4 = t4[:, :, 15:17, :] attn_output_diff.append(t3 - t4) + else: + # if 'past_key_value'is of type Cache, get last layer cache pair (key, value) + attn_output_diff.append(t1[-1][0] - t2[-1][0]) + attn_output_diff.append(t1[-1][1] - t2[-1][1]) max_diff_tensor = [torch.max(item).item() for item in attn_output_diff] print(max_diff_tensor) From c2fa88b43e8cfa49406f4454508734fd6852e175 Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 24 Apr 2024 15:32:35 -0700 Subject: [PATCH 08/57] update --- .github/workflows/llm_unit_tests.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 2e776a3d8e9..f228d9f45eb 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -362,6 +362,10 @@ jobs: echo "Directory $QWEN_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Qwen-7B-Chat -P $ORIGIN_DIR fi + if [ -d $BAICHUAN2_7B_ORIGIN_PATH ]; then + echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..." + rm -rf $BAICHUAN2_7B_ORIGIN_PATH + fi if [ ! -d $BAICHUAN2_7B_ORIGIN_PATH ]; then echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/Baichuan2-7B-Chat -P $ORIGIN_DIR_436 From b255ac53353261ba395a57cca7faeb19c2156740 Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 24 Apr 2024 16:13:06 -0700 Subject: [PATCH 09/57] update --- python/llm/test/inference_gpu/test_transformers_api_mlp.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/llm/test/inference_gpu/test_transformers_api_mlp.py b/python/llm/test/inference_gpu/test_transformers_api_mlp.py index e3273ad574e..70ba2e7b9f6 100644 --- a/python/llm/test/inference_gpu/test_transformers_api_mlp.py +++ b/python/llm/test/inference_gpu/test_transformers_api_mlp.py @@ -96,9 +96,14 @@ def replace_forward_hook(module, input, output, layer_name): for i, (t1, t2) in enumerate(zip(layer_tensor, opt_layer_tensor)): if isinstance(t1, torch.Tensor) and isinstance(t2, torch.Tensor): MLP_output_diff.append(t1 - t2) - else: + elif isinstance(t1, tuple) and isinstance(t2, tuple): + # if 'past_key_value'is of type tuple for i, (t3, t4) in enumerate(zip(t1, t2)): MLP_output_diff.append(t3 - t4) + else: + # if 'past_key_value'is of type Cache, get last layer cache pair (key, value) + MLP_output_diff.append(t1[-1][0] - t2[-1][0]) + MLP_output_diff.append(t1[-1][1] - t2[-1][1]) max_diff_tensor = [torch.max(item).item() for item in MLP_output_diff] print(max_diff_tensor) From a82199ae6db63fe3c5bc020350b18c83f7e777ec Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 24 Apr 2024 16:16:13 -0700 Subject: [PATCH 10/57] update --- .github/workflows/llm_unit_tests.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index f228d9f45eb..2e776a3d8e9 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -362,10 +362,6 @@ jobs: echo "Directory $QWEN_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Qwen-7B-Chat -P $ORIGIN_DIR fi - if [ -d $BAICHUAN2_7B_ORIGIN_PATH ]; then - echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..." - rm -rf $BAICHUAN2_7B_ORIGIN_PATH - fi if [ ! -d $BAICHUAN2_7B_ORIGIN_PATH ]; then echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/Baichuan2-7B-Chat -P $ORIGIN_DIR_436 From 7e7d09c94869e0cd0af2c1bf1c90c592ccad28d0 Mon Sep 17 00:00:00 2001 From: jenniew Date: Thu, 25 Apr 2024 12:39:39 -0700 Subject: [PATCH 11/57] update --- python/llm/test/inference/test_transformers_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/test/inference/test_transformers_api.py b/python/llm/test/inference/test_transformers_api.py index 8b208bdd3c1..7a11dfe6e8a 100644 --- a/python/llm/test/inference/test_transformers_api.py +++ b/python/llm/test/inference/test_transformers_api.py @@ -141,7 +141,7 @@ def test_load_low_bit_completion(Model, Tokenizer, model_path, prompt, answer): prompt = "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun" @pytest.mark.parametrize("Model, Tokenizer, model_path, prompt", [ - (AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA_ORIGIN_PATH'), prompt), + # (AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA_ORIGIN_PATH'), prompt), (AutoModelForCausalLM, AutoTokenizer, os.environ.get('BLOOM_ORIGIN_PATH'), prompt), (AutoModel, AutoTokenizer, os.environ.get('ORIGINAL_CHATGLM2_6B_PATH'), prompt), # (AutoModelForCausalLM, AutoTokenizer, os.environ.get('ORIGINAL_REPLIT_CODE_PATH'), prompt), From 8f1c35571a2d64cd527c1330fe325d08c93e7043 Mon Sep 17 00:00:00 2001 From: jenniew Date: Thu, 25 Apr 2024 13:39:28 -0700 Subject: [PATCH 12/57] update --- .github/workflows/llm_unit_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 2e776a3d8e9..815d523e9a6 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -389,7 +389,7 @@ jobs: shell: bash run: | python -m pip uninstall datasets -y - python -m pip install transformers==4.34.0 datasets peft==0.5.0 accelerate==0.23.0 + python -m pip install datasets peft==0.5.0 accelerate==0.23.0 python -m pip install bitsandbytes scipy # Specific oneapi position on arc ut test machines if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then From e0c4407cfd5b1f466e3a5fc533d7329d9357cb7a Mon Sep 17 00:00:00 2001 From: jenniew Date: Thu, 25 Apr 2024 15:29:21 -0700 Subject: [PATCH 13/57] update --- .github/workflows/llm_unit_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 815d523e9a6..85f79492597 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -235,7 +235,7 @@ jobs: shell: bash run: | pip install llama-index-readers-file llama-index-vector-stores-postgres llama-index-embeddings-huggingface - # pip install transformers==4.36.0 + pip install transformers==4.36.2 pip install "pydantic>=2.0.0" bash python/llm/test/run-llm-llamaindex-tests.sh - name: Run sentence-transformers uninstallation @@ -429,7 +429,7 @@ jobs: pip install --pre --upgrade ipex-llm[xpu_2.0] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ source /home/arda/intel/oneapi/setvars.sh fi - # pip install transformers==4.36.0 + pip install transformers==4.36.2 pip install "pydantic>=2.0.0" bash python/llm/test/run-llm-llamaindex-tests-gpu.sh - name: Run sentence-transformers uninstallation From c51b7ea789517f8a6afb913b7234ca7d5d10dd4f Mon Sep 17 00:00:00 2001 From: jenniew Date: Thu, 25 Apr 2024 17:17:53 -0700 Subject: [PATCH 14/57] update example test --- python/llm/dev/test/run-example-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/dev/test/run-example-tests.sh b/python/llm/dev/test/run-example-tests.sh index edf767c8719..f099091418a 100644 --- a/python/llm/dev/test/run-example-tests.sh +++ b/python/llm/dev/test/run-example-tests.sh @@ -68,7 +68,7 @@ fi export ORIGINAL_CHATGLM2_PATH=./llm/chatglm2-6b/ if [ ! -d $ORIGINAL_CHATGLM2_PATH ]; then echo "Directory $ORIGINAL_CHATGLM2_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_PATH:2} -P $LLM_DIR + wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/updated_for_4.36/${ORIGINAL_CHATGLM2_PATH:2} -P $LLM_DIR fi echo ">>> Testing ChatGLM2 transformers API" From a4427683e39b8c82e9dd6fe219e507d268186f82 Mon Sep 17 00:00:00 2001 From: jenniew Date: Fri, 26 Apr 2024 14:52:19 -0700 Subject: [PATCH 15/57] replace replit code --- .github/workflows/llm_unit_tests.yml | 24 +++++----- .../test/inference/test_transformers_api.py | 48 +++++++++---------- .../test/langchain/test_transformers_api.py | 26 +++++----- 3 files changed, 49 insertions(+), 49 deletions(-) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 85f79492597..663fcb11da7 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -103,7 +103,7 @@ jobs: echo "LLAMA_ORIGIN_PATH=${ORIGIN_DIR}/llama-7b-hf" >> "$GITHUB_ENV" echo "BLOOM_ORIGIN_PATH=${ORIGIN_DIR}/bloom-7b1" >> "$GITHUB_ENV" echo "ORIGINAL_CHATGLM2_6B_PATH=${ORIGIN_DIR_436}/chatglm2-6b" >> "$GITHUB_ENV" - echo "ORIGINAL_REPLIT_CODE_PATH=${ORIGIN_DIR}/replit-code-v1-3b" >> "$GITHUB_ENV" + echo "ORIGINAL_CODESHELL_7B_PATH=${ORIGIN_DIR}/CodeShell-7B-Chat" >> "$GITHUB_ENV" echo "ORIGINAL_WHISPER_TINY_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV" echo "MISTRAL_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-v0.1" >> "$GITHUB_ENV" echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV" @@ -164,10 +164,10 @@ jobs: echo "wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR_436" wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR_436 fi - if [ ! -d $ORIGINAL_REPLIT_CODE_PATH ]; then - echo "Directory $ORIGINAL_REPLIT_CODE_PATH not found. Downloading from FTP server..." - echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/replit-code-v1-3b -P $ORIGIN_DIR" - wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/replit-code-v1-3b -P $ORIGIN_DIR + if [ ! -d $ORIGINAL_CODESHELL_7B_PATH ]; then + echo "Directory $ORIGINAL_CODESHELL_7B_PATH not found. Downloading from FTP server..." + echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/CodeShell-7B-Chat -P $ORIGIN_DIR" + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/CodeShell-7B-Chat -P $ORIGIN_DIR fi if [ ! -d $ORIGINAL_WHISPER_TINY_PATH ]; then echo "Directory $ORIGINAL_WHISPER_TINY_PATH not found. Downloading from FTP server..." @@ -224,13 +224,13 @@ jobs: run: | python -m pip install einops datasets librosa openai-whisper bash python/llm/test/run-llm-inference-tests.sh - # - name: Run LLM langchain test - # shell: bash - # run: | - # pip install -U langchain==0.0.184 - # pip install -U chromadb==0.3.25 - # pip install -U pandas==2.0.3 - # bash python/llm/test/run-llm-langchain-tests.sh + - name: Run LLM langchain test + shell: bash + run: | + pip install -U langchain==0.0.184 + pip install -U chromadb==0.3.25 + pip install -U pandas==2.0.3 + bash python/llm/test/run-llm-langchain-tests.sh - name: Run LLM llamaindex test shell: bash run: | diff --git a/python/llm/test/inference/test_transformers_api.py b/python/llm/test/inference/test_transformers_api.py index 7a11dfe6e8a..3fadd390aa3 100644 --- a/python/llm/test/inference/test_transformers_api.py +++ b/python/llm/test/inference/test_transformers_api.py @@ -35,7 +35,7 @@ def setUp(self): self.n_threads = 2 def test_transformers_auto_model_int4(self): - model_path = os.environ.get('ORIGINAL_CHATGLM2_6B_PATH') + model_path = os.environ.get('ORIGINAL_CODESHELL_7B_PATH') model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) input_str = "Tell me the capital of France.\n\n" @@ -49,26 +49,26 @@ def test_transformers_auto_model_int4(self): print('Prompt:', input_str) print('Output:', output_str) print(f'Inference time: {end-st} s') - res = 'Paris' in output_str + res = 'Paris' in output_str self.assertTrue(res) - # def test_transformers_auto_model_for_causal_lm_int4(self): - # model_path = os.environ.get('ORIGINAL_REPLIT_CODE_PATH') - # tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) - # input_str = 'def hello():\n print("hello world")\n' - # model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True) - # with torch.inference_mode(): - # - # st = time.time() - # input_ids = tokenizer.encode(input_str, return_tensors="pt") - # output = model.generate(input_ids, do_sample=False, max_new_tokens=32) - # output_str = tokenizer.decode(output[0], skip_special_tokens=True) - # end = time.time() - # print('Prompt:', input_str) - # print('Output:', output_str) - # print(f'Inference time: {end-st} s') - # res = '\nhello()' in output_str - # self.assertTrue(res) + def test_transformers_auto_model_for_causal_lm_int4(self): + model_path = os.environ.get('ORIGINAL_CODESHELL_7B_PATH') + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + input_str = 'def hello():\n print("hello world")\n' + model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True) + with torch.inference_mode(): + + st = time.time() + input_ids = tokenizer.encode(input_str, return_tensors="pt") + output = model.generate(input_ids, do_sample=False, max_new_tokens=32) + output_str = tokenizer.decode(output[0], skip_special_tokens=True) + end = time.time() + print('Prompt:', input_str) + print('Output:', output_str) + print(f'Inference time: {end-st} s') + res = '\nhello()' in output_str + self.assertTrue(res) def test_transformers_auto_model_for_speech_seq2seq_int4(self): @@ -86,7 +86,7 @@ def test_transformers_auto_model_for_speech_seq2seq_int4(self): predicted_ids = model.generate(input_features) # decode token ids to text transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False) - end = time.time() + end = time.time() print('Output:', transcription) print(f'Inference time: {end-st} s') res = 'Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.' in transcription[0] @@ -108,7 +108,7 @@ def test_transformers_chatglm_for_causallm(self): print('Prompt:', input_str) print('Output:', output_str) print(f'Inference time: {end-st} s') - res = 'Paris' in output_str + res = 'Paris' in output_str self.assertTrue(res) @pytest.mark.parametrize('prompt, answer', [ @@ -124,7 +124,7 @@ def test_load_low_bit_completion(Model, Tokenizer, model_path, prompt, answer): load_in_4bit=True, optimize_model=True, trust_remote_code=True) - + with tempfile.TemporaryDirectory() as tempdir: model.save_low_bit(tempdir) loaded_model = Model.load_low_bit(tempdir, @@ -144,10 +144,10 @@ def test_load_low_bit_completion(Model, Tokenizer, model_path, prompt, answer): # (AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA_ORIGIN_PATH'), prompt), (AutoModelForCausalLM, AutoTokenizer, os.environ.get('BLOOM_ORIGIN_PATH'), prompt), (AutoModel, AutoTokenizer, os.environ.get('ORIGINAL_CHATGLM2_6B_PATH'), prompt), - # (AutoModelForCausalLM, AutoTokenizer, os.environ.get('ORIGINAL_REPLIT_CODE_PATH'), prompt), + (AutoModelForCausalLM, AutoTokenizer, os.environ.get('ORIGINAL_CODESHELL_7B_PATH'), prompt), (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH'), prompt) ]) - + def test_optimize_model(Model, Tokenizer, model_path, prompt): tokenizer = Tokenizer.from_pretrained(model_path, trust_remote_code=True) input_ids = tokenizer.encode(prompt, return_tensors="pt") diff --git a/python/llm/test/langchain/test_transformers_api.py b/python/llm/test/langchain/test_transformers_api.py index 61d30051b6f..ad139c74dc6 100644 --- a/python/llm/test/langchain/test_transformers_api.py +++ b/python/llm/test/langchain/test_transformers_api.py @@ -38,7 +38,7 @@ class Test_Langchain_Transformers_API(TestCase): def setUp(self): self.auto_model_path = os.environ.get('ORIGINAL_CHATGLM2_6B_PATH') - # self.auto_causal_model_path = os.environ.get('ORIGINAL_REPLIT_CODE_PATH') + self.auto_causal_model_path = os.environ.get('ORIGINAL_CODESHELL_7B_PATH') self.llama_model_path = os.environ.get('LLAMA_ORIGIN_PATH') self.bloom_model_path = os.environ.get('BLOOM_ORIGIN_PATH') thread_num = os.environ.get('THREAD_NUM') @@ -79,12 +79,12 @@ def test_transformers_llama_embeddings(self): def test_qa_chain(self): texts = ''' - AI is a machine’s ability to perform the cognitive functions - we associate with human minds, such as perceiving, reasoning, + AI is a machine’s ability to perform the cognitive functions + we associate with human minds, such as perceiving, reasoning, learning, interacting with an environment, problem solving, - and even exercising creativity. You’ve probably interacted - with AI even if you didn’t realize it—voice assistants like Siri - and Alexa are founded on AI technology, as are some customer + and even exercising creativity. You’ve probably interacted + with AI even if you didn’t realize it—voice assistants like Siri + and Alexa are founded on AI technology, as are some customer service chatbots that pop up to help you navigate websites. ''' text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) @@ -102,16 +102,16 @@ def test_qa_chain(self): res = "AI" in output self.assertTrue(res) - + """ def test_qa_chain_causalLM(self): texts = ''' - AI is a machine’s ability to perform the cognitive functions - we associate with human minds, such as perceiving, reasoning, + AI is a machine’s ability to perform the cognitive functions + we associate with human minds, such as perceiving, reasoning, learning, interacting with an environment, problem solving, - and even exercising creativity. You’ve probably interacted - with AI even if you didn’t realize it—voice assistants like Siri - and Alexa are founded on AI technology, as are some customer + and even exercising creativity. You’ve probably interacted + with AI even if you didn’t realize it—voice assistants like Siri + and Alexa are founded on AI technology, as are some customer service chatbots that pop up to help you navigate websites. ''' text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) @@ -129,7 +129,7 @@ def test_qa_chain_causalLM(self): res = "AI" in output self.assertTrue(res) """ - + def test_embed_kwargs(self): embeddings = TransformersEmbeddings.from_model_id(model_id=self.llama_model_path) encode_kwargs = {"truncation": True, "max_length": 512} From 5563f288b68ae200bc563b807d5bc04170893208 Mon Sep 17 00:00:00 2001 From: jenniew Date: Fri, 26 Apr 2024 15:05:07 -0700 Subject: [PATCH 16/57] update --- .github/workflows/llm_unit_tests.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 663fcb11da7..c77987ec783 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -224,13 +224,13 @@ jobs: run: | python -m pip install einops datasets librosa openai-whisper bash python/llm/test/run-llm-inference-tests.sh - - name: Run LLM langchain test - shell: bash - run: | - pip install -U langchain==0.0.184 - pip install -U chromadb==0.3.25 - pip install -U pandas==2.0.3 - bash python/llm/test/run-llm-langchain-tests.sh + - name: Run LLM langchain test + shell: bash + run: | + pip install -U langchain==0.0.184 + pip install -U chromadb==0.3.25 + pip install -U pandas==2.0.3 + bash python/llm/test/run-llm-langchain-tests.sh - name: Run LLM llamaindex test shell: bash run: | From b575c48734b4aeae4eb31d633d438635cb0179b5 Mon Sep 17 00:00:00 2001 From: jenniew Date: Fri, 26 Apr 2024 15:13:19 -0700 Subject: [PATCH 17/57] update --- python/llm/dev/test/run-example-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/dev/test/run-example-tests.sh b/python/llm/dev/test/run-example-tests.sh index f099091418a..e5b3d125add 100644 --- a/python/llm/dev/test/run-example-tests.sh +++ b/python/llm/dev/test/run-example-tests.sh @@ -68,7 +68,7 @@ fi export ORIGINAL_CHATGLM2_PATH=./llm/chatglm2-6b/ if [ ! -d $ORIGINAL_CHATGLM2_PATH ]; then echo "Directory $ORIGINAL_CHATGLM2_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/updated_for_4.36/${ORIGINAL_CHATGLM2_PATH:2} -P $LLM_DIR + wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/${ORIGINAL_CHATGLM2_PATH:5} -P $LLM_DIR fi echo ">>> Testing ChatGLM2 transformers API" From cc0ed3006786b0d33e39d1bbd97400313769798f Mon Sep 17 00:00:00 2001 From: jenniew Date: Fri, 26 Apr 2024 15:53:51 -0700 Subject: [PATCH 18/57] update --- python/llm/dev/test/run-example-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/dev/test/run-example-tests.sh b/python/llm/dev/test/run-example-tests.sh index e5b3d125add..3876fa77a68 100644 --- a/python/llm/dev/test/run-example-tests.sh +++ b/python/llm/dev/test/run-example-tests.sh @@ -68,7 +68,7 @@ fi export ORIGINAL_CHATGLM2_PATH=./llm/chatglm2-6b/ if [ ! -d $ORIGINAL_CHATGLM2_PATH ]; then echo "Directory $ORIGINAL_CHATGLM2_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/${ORIGINAL_CHATGLM2_PATH:5} -P $LLM_DIR + wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/${ORIGINAL_CHATGLM2_PATH:6} -P $LLM_DIR fi echo ">>> Testing ChatGLM2 transformers API" From 04333ae142a328063af81cf3f95915c8a8109830 Mon Sep 17 00:00:00 2001 From: jenniew Date: Fri, 26 Apr 2024 16:05:31 -0700 Subject: [PATCH 19/57] update --- python/llm/test/inference/test_transformers_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/test/inference/test_transformers_api.py b/python/llm/test/inference/test_transformers_api.py index 3fadd390aa3..ad1d1b53f3a 100644 --- a/python/llm/test/inference/test_transformers_api.py +++ b/python/llm/test/inference/test_transformers_api.py @@ -35,7 +35,7 @@ def setUp(self): self.n_threads = 2 def test_transformers_auto_model_int4(self): - model_path = os.environ.get('ORIGINAL_CODESHELL_7B_PATH') + model_path = os.environ.get('ORIGINAL_CHATGLM2_6B_PATH') model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) input_str = "Tell me the capital of France.\n\n" From 8ecdeac9e5891c04d282046c27f4fb0aa2e8aa16 Mon Sep 17 00:00:00 2001 From: jenniew Date: Fri, 26 Apr 2024 17:29:38 -0700 Subject: [PATCH 20/57] set safe_serialization false --- python/llm/src/ipex_llm/optimize.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/optimize.py b/python/llm/src/ipex_llm/optimize.py index d69895ec2c2..86db591ca9b 100644 --- a/python/llm/src/ipex_llm/optimize.py +++ b/python/llm/src/ipex_llm/optimize.py @@ -47,7 +47,8 @@ def _save_low_bit(self, save_dir, *args, **kwargs): if isinstance(self, PreTrainedModel): # We borrowed this method to adapt to Transformer model cases # as much as possible, and later we may merge these two situations - self.save_pretrained(save_dir) + kwargs['safe_serialization'] = False + self.save_pretrained(save_dir, *args, **kwargs) else: # TODO: For the lowbit model still larger than 8GB, # save it into shards. From 49a6933d906e5973253bcdfb1b38c00b231798b7 Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 30 Apr 2024 15:53:37 -0700 Subject: [PATCH 21/57] perf test --- .github/workflows/llm_performance_tests.yml | 466 +++++++++--------- .../llm/test/benchmark/arc-perf-test-436.yaml | 20 + python/llm/test/benchmark/arc-perf-test.yaml | 10 +- .../llm/test/benchmark/cpu-perf-test-436.yaml | 26 + python/llm/test/benchmark/cpu-perf-test.yaml | 8 +- .../test/benchmark/igpu-perf/1024-128.yaml | 1 + .../igpu-perf/1024-128_int4_fp16.yaml | 1 + .../igpu-perf/1024-128_loadlowbit.yaml | 1 + .../test/benchmark/igpu-perf/2048-256.yaml | 1 + .../llm/test/benchmark/igpu-perf/32-32.yaml | 1 + 10 files changed, 303 insertions(+), 232 deletions(-) create mode 100644 python/llm/test/benchmark/arc-perf-test-436.yaml create mode 100644 python/llm/test/benchmark/cpu-perf-test-436.yaml diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 48cc7dc763d..97f4f522609 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -11,25 +11,25 @@ permissions: # Controls when the action will run. on: schedule: - - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China + # - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China # please uncomment it for PR tests - # pull_request: - # branches: [main] - # paths: - # - ".github/workflows/llm_performance_tests.yml" - # - "python/llm/test/benchmark/**" - # - "python/llm/dev/benchmark/all-in-one/**" + pull_request: + branches: [main] + paths: + - ".github/workflows/llm_performance_tests.yml" + - "python/llm/test/benchmark/**" + - "python/llm/dev/benchmark/all-in-one/**" workflow_dispatch: workflow_call: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - # llm-cpp-build: # please uncomment it for PR tests - # uses: ./.github/workflows/llm-binary-build.yml + llm-cpp-build: # please uncomment it for PR tests + uses: ./.github/workflows/llm-binary-build.yml llm-performance-test-on-arc: - if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests - # needs: llm-cpp-build # please uncomment it for PR tests + # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + needs: llm-cpp-build # please uncomment it for PR tests strategy: fail-fast: false matrix: @@ -63,23 +63,23 @@ jobs: python -m pip install --upgrade tiktoken # please uncomment it and comment the "Install IPEX-LLM from Pypi" part for PR tests - # - name: Download llm binary - # uses: ./.github/actions/llm/download-llm-binary - - # - name: Run LLM install (all) test - # uses: ./.github/actions/llm/setup-llm-env - # with: - # extra-dependency: "xpu_2.1" + - name: Download llm binary + uses: ./.github/actions/llm/download-llm-binary - - name: Install IPEX-LLM from Pypi - shell: bash - run: | - pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - test_version_date=`date -d 'yesterday' '+%Y%m%d'` - if ! pip show ipex-llm | grep $test_version_date; then - echo "Did not install ipex-llm with excepted version $test_version_date" - exit 1 - fi + - name: Run LLM install (all) test + uses: ./.github/actions/llm/setup-llm-env + with: + extra-dependency: "xpu_2.1" + + #- name: Install IPEX-LLM from Pypi + # shell: bash + # run: | + # pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + # test_version_date=`date -d 'yesterday' '+%Y%m%d'` + # if ! pip show ipex-llm | grep $test_version_date; then + # echo "Did not install ipex-llm with excepted version $test_version_date" + # exit 1 + # fi - name: Test installed xpu version shell: bash @@ -87,12 +87,11 @@ jobs: source /opt/intel/oneapi/setvars.sh bash python/llm/test/run-llm-install-tests.sh - - name: Test on xpu(transformers==4.31.0) + - name: Test on xpu(transformers==4.36.2) shell: bash run: | date_for_test_version=$(date -d yesterday +%Y-%m-%d) sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py - source /opt/intel/oneapi/setvars.sh export USE_XETLA=OFF export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 @@ -103,21 +102,28 @@ jobs: # change csv name sed -i 's/{today}/{today}_test1/g' run.py python run.py - - - name: Test on xpu(transformers==4.34.0) - shell: bash - run: | - source /opt/intel/oneapi/setvars.sh - export USE_XETLA=OFF - export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 - # upgrade transformers for model Mistral-7B-v0.1 - python -m pip install transformers==4.34.0 - cp python/llm/test/benchmark/arc-perf-transformers-434.yaml python/llm/dev/benchmark/all-in-one/config.yaml + # run updated models for 4.36 + cd - + cp python/llm/test/benchmark/arc-perf-test-436.yaml python/llm/dev/benchmark/all-in-one/config.yaml cd python/llm/dev/benchmark/all-in-one # change csv name sed -i 's/test1/test2/g' run.py python run.py + #- name: Test on xpu(transformers==4.34.0) + # shell: bash + # run: | + # source /opt/intel/oneapi/setvars.sh + # export USE_XETLA=OFF + # export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + # upgrade transformers for model Mistral-7B-v0.1 + # python -m pip install transformers==4.34.0 + # cp python/llm/test/benchmark/arc-perf-transformers-434.yaml python/llm/dev/benchmark/all-in-one/config.yaml + # cd python/llm/dev/benchmark/all-in-one + # change csv name + # sed -i 's/test1/test2/g' run.py + # python run.py + - name: Test on xpu(transformers==4.37.0) shell: bash run: | @@ -151,15 +157,17 @@ jobs: run: | cd python/llm/dev/benchmark/all-in-one python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml - python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml + python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-test-436.yaml + python ../../../test/benchmark/check_results.py -c test3 -y ../../../test/benchmark/arc-perf-test-transformers-437.yaml + # python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml find . -name "*test*.csv" -delete if [ ${{ github.event.schedule}} ]; then curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/ fi llm-performance-test-on-spr: - if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-spr' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests - # needs: llm-cpp-build # please uncomment it for PR tests + # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-spr' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + needs: llm-cpp-build # please uncomment it for PR tests strategy: fail-fast: false matrix: @@ -189,21 +197,21 @@ jobs: python -m pip install --upgrade transformers_stream_generator # please uncomment it and comment the "Install IPEX-LLM from Pypi" part for PR tests - # - name: Download llm binary - # uses: ./.github/actions/llm/download-llm-binary - - # - name: Run LLM install (all) test - # uses: ./.github/actions/llm/setup-llm-env - - - name: Install IPEX-LLM from Pypi - shell: bash - run: | - pip install --pre --upgrade ipex-llm[all] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - test_version_date=`date -d 'yesterday' '+%Y%m%d'` - if ! pip show ipex-llm | grep $test_version_date; then - echo "Did not install ipex-llm with excepted version $test_version_date" - exit 1 - fi + - name: Download llm binary + uses: ./.github/actions/llm/download-llm-binary + + - name: Run LLM install (all) test + uses: ./.github/actions/llm/setup-llm-env + + # - name: Install IPEX-LLM from Pypi + # shell: bash + # run: | + # pip install --pre --upgrade ipex-llm[all] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + # test_version_date=`date -d 'yesterday' '+%Y%m%d'` + # if ! pip show ipex-llm | grep $test_version_date; then + # echo "Did not install ipex-llm with excepted version $test_version_date" + # exit 1 + # fi - name: Test on cpu shell: bash @@ -219,7 +227,18 @@ jobs: export OMP_NUM_THREADS=48 # hide time info sed -i 's/str(end - st)/"xxxxxx"/g' run.py + # change csv name + sed -i 's/{today}/{today}_test1/g' run.py + python run.py + # run updated models for 4.36 + cd - + cp python/llm/test/benchmark/cpu-perf-test-436.yaml python/llm/dev/benchmark/all-in-one/config.yaml + cd python/llm/dev/benchmark/all-in-one + # change csv name + sed -i 's/test1/test2/g' run.py python run.py + python ../../../test/benchmark/concat_csv.py + find . -name "*test*.csv" -delete cp ./*.csv /mnt/disk1/models/nightly_perf_cpu cd ../../../test/benchmark python -m pip install pandas==1.5.3 @@ -230,8 +249,8 @@ jobs: done llm-performance-test-on-core: - if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-core' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests - # needs: llm-cpp-build # please uncomment it for PR tests + # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-core' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + needs: llm-cpp-build # please uncomment it for PR tests strategy: fail-fast: false matrix: @@ -263,21 +282,21 @@ jobs: python -m pip install --upgrade tiktoken einops transformers_stream_generator # please uncomment it and comment the "Install IPEX-LLM from Pypi" part for PR tests - # - name: Download llm binary - # uses: ./.github/actions/llm/download-llm-binary - - # - name: Run LLM install (all) test - # uses: ./.github/actions/llm/setup-llm-env - - - name: Install IPEX-LLM from Pypi - shell: bash - run: | - pip install --pre --upgrade ipex-llm[all] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - test_version_date=`date -d 'yesterday' '+%Y%m%d'` - if ! pip show ipex-llm | grep $test_version_date; then - echo "Did not install ipex-llm with excepted version $test_version_date" - exit 1 - fi + - name: Download llm binary + uses: ./.github/actions/llm/download-llm-binary + + - name: Run LLM install (all) test + uses: ./.github/actions/llm/setup-llm-env + + # - name: Install IPEX-LLM from Pypi + # shell: bash + # run: | + # pip install --pre --upgrade ipex-llm[all] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + # test_version_date=`date -d 'yesterday' '+%Y%m%d'` + # if ! pip show ipex-llm | grep $test_version_date; then + # echo "Did not install ipex-llm with excepted version $test_version_date" + # exit 1 + # fi - name: Test on core ${{ matrix.platform }} shell: bash @@ -302,7 +321,7 @@ jobs: fi llm-performance-test-on-igpu: - if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests # needs: llm-cpp-build # please uncomment it for PR tests strategy: fail-fast: false @@ -319,44 +338,16 @@ jobs: # TODO: Put the ipex-llm related install process for win gpu into a action function # Please uncomment it and commment the install from pypi for PR tests - # - name: Download llm binary - # uses: ./.github/actions/llm/download-llm-binary - - # - name: Prepare for install ipex-llm from source - # shell: bash - # run: | - # sed -i 's/"bigdl-core-xe-21==" + VERSION + "/"bigdl-core-xe-21/g' python/llm/setup.py - # sed -i 's/"bigdl-core-xe-21==" + VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py - - # - name: Install ipex-llm and other related packages (install from source) - # shell: cmd - # run: | - # call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y - # call conda activate igpu-perf - - # pip install --upgrade pip - # pip install --upgrade wheel - # pip install --upgrade omegaconf pandas - # pip install --upgrade tiktoken einops transformers_stream_generator - - # cd python\llm - # python setup.py clean --all bdist_wheel --win - # if not exist dist\ipex_llm*.whl (exit /b 1) - # for %%i in (dist\ipex_llm*.whl) do set whl_name=%%i + - name: Download llm binary + uses: ./.github/actions/llm/download-llm-binary - # pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - # if %ERRORLEVEL% neq 0 (exit /b 1) - # pip list - - # call conda deactivate - - - name: Determine desired ipex-llm version + - name: Prepare for install ipex-llm from source shell: bash run: | - test_version_date=`date -d 'yesterday' '+%Y%m%d'` - echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV" + sed -i 's/"bigdl-core-xe-21==" + VERSION + "/"bigdl-core-xe-21/g' python/llm/setup.py + sed -i 's/"bigdl-core-xe-21==" + VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py - - name: Install ipex-llm and other related packages (install from pypi) + - name: Install ipex-llm and other related packages (install from source) shell: cmd run: | call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y @@ -367,16 +358,45 @@ jobs: pip install --upgrade omegaconf pandas pip install --upgrade tiktoken einops transformers_stream_generator - pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - pip show ipex-llm | findstr %TEST_VERSION_DATE% - if %ERRORLEVEL% neq 0 ( - echo "Did not install ipex-llm with excepted version %TEST_VERSION_DATE%" - exit /b 1 - ) + cd python\llm + python setup.py clean --all bdist_wheel --win + if not exist dist\ipex_llm*.whl (exit /b 1) + for %%i in (dist\ipex_llm*.whl) do set whl_name=%%i + + pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + if %ERRORLEVEL% neq 0 (exit /b 1) + pip install transformers==4.36.2 pip list call conda deactivate + #- name: Determine desired ipex-llm version + # shell: bash + # run: | + # test_version_date=`date -d 'yesterday' '+%Y%m%d'` + # echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV" + + #- name: Install ipex-llm and other related packages (install from pypi) + # shell: cmd + # run: | + # call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y + # call conda activate igpu-perf + + # pip install --upgrade pip + # pip install --upgrade wheel + # pip install --upgrade omegaconf pandas + # pip install --upgrade tiktoken einops transformers_stream_generator + + # pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + # pip show ipex-llm | findstr %TEST_VERSION_DATE% + # if %ERRORLEVEL% neq 0 ( + # echo "Did not install ipex-llm with excepted version %TEST_VERSION_DATE%" + # exit /b 1 + # ) + # pip list + + # call conda deactivate + - name: Create env for html generation shell: cmd run: | @@ -427,34 +447,34 @@ jobs: call conda deactivate - - name: Prepare igpu perf test for Mistral (32-32) - shell: bash - run: | - sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml + #- name: Prepare igpu perf test for Mistral (32-32) + # shell: bash + # run: | + # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml - - name: Test on igpu for Mistral (32-32) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.34.0 + #- name: Test on igpu for Mistral (32-32) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.34.0 - call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) - call conda deactivate + # call conda deactivate - name: Prepare igpu perf test for Qwen1.5 (32-32) shell: bash run: | - sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_437.yaml - name: Test on igpu for Qwen1.5 (32-32) @@ -498,14 +518,14 @@ jobs: shell: bash run: | sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i 's/{today}_test2/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml - name: Test on igpu (1024-128) shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.31.0 + pip install transformers==4.36.2 call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" set SYCL_CACHE_PERSISTENT=1 @@ -521,34 +541,34 @@ jobs: call conda deactivate - - name: Prepare igpu perf test for Mistral (1024-128) - shell: bash - run: | - sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml + #- name: Prepare igpu perf test for Mistral (1024-128) + # shell: bash + # run: | + # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml - - name: Test on igpu for Mistral (1024-128) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.34.0 + #- name: Test on igpu for Mistral (1024-128) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.34.0 - call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) - call conda deactivate + # call conda deactivate - name: Prepare igpu perf test for Qwen 1.5 (1024-128) shell: bash run: | - sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_437.yaml - name: Test on igpu for Qwen 1.5 (1024-128) @@ -591,14 +611,14 @@ jobs: shell: bash run: | sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i 's/{today}_test2/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256.yaml - name: Test on igpu (2048-256) shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.31.0 + pip install transformers==4.36.2 call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" set SYCL_CACHE_PERSISTENT=1 @@ -614,34 +634,34 @@ jobs: call conda deactivate - - name: Prepare igpu perf test for Mistral (2048-256) - shell: bash - run: | - sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml + #- name: Prepare igpu perf test for Mistral (2048-256) + # shell: bash + # run: | + # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml - - name: Test on igpu for Mistral (2048-256) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.34.0 + #- name: Test on igpu for Mistral (2048-256) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.34.0 - call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) - call conda deactivate + # call conda deactivate - name: Prepare igpu perf test for Qwen 1.5 (2048-256) shell: bash run: | - sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_437.yaml - name: Test on igpu for Qwen 1.5 (2048-256) @@ -684,14 +704,14 @@ jobs: shell: bash run: | sed -i 's/2048-256/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i 's/{today}_test2/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml - name: Test on igpu (load_low_bit 1024-128) shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.31.0 + pip install transformers==4.36.2 call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" set SYCL_CACHE_PERSISTENT=1 @@ -707,34 +727,34 @@ jobs: call conda deactivate - - name: Prepare igpu perf test for Mistral (load_low_bit 1024-128) - shell: bash - run: | - sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml + #- name: Prepare igpu perf test for Mistral (load_low_bit 1024-128) + # shell: bash + # run: | + # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml - - name: Test on igpu for Mistral (load_low_bit 1024-128) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.34.0 + #- name: Test on igpu for Mistral (load_low_bit 1024-128) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.34.0 - call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) - call conda deactivate + # call conda deactivate - name: Prepare igpu perf test for Qwen 1.5 (load_low_bit 1024-128) shell: bash run: | - sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_437.yaml - name: Test on igpu for Qwen 1.5 (load_low_bit 1024-128) @@ -775,14 +795,14 @@ jobs: - name: Prepare igpu perf test (int4+fp16 1024-128) shell: bash run: | - sed -i 's/{today}_test3/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i 's/{today}_test2/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml - name: Test on igpu (int4+fp16 1024-128) shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.31.0 + pip install transformers==4.36.2 call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" set SYCL_CACHE_PERSISTENT=1 @@ -798,34 +818,34 @@ jobs: call conda deactivate - - name: Prepare igpu perf test for Mistral (int4+fp16 1024-128) - shell: bash - run: | - sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_434.yaml + #- name: Prepare igpu perf test for Mistral (int4+fp16 1024-128) + # shell: bash + # run: | + # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_434.yaml - - name: Test on igpu for Mistral (int4+fp16 1024-128) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.34.0 + #- name: Test on igpu for Mistral (int4+fp16 1024-128) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.34.0 - call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_434.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_434.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) - call conda deactivate + # call conda deactivate - name: Prepare igpu perf test for Qwen 1.5 (int4+fp16 1024-128) shell: bash run: | - sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_437.yaml - name: Test on igpu for Qwen 1.5 (int4+fp16 1024-128) diff --git a/python/llm/test/benchmark/arc-perf-test-436.yaml b/python/llm/test/benchmark/arc-perf-test-436.yaml new file mode 100644 index 00000000000..409834daa4f --- /dev/null +++ b/python/llm/test/benchmark/arc-perf-test-436.yaml @@ -0,0 +1,20 @@ +repo_id: + - 'THUDM/chatglm2-6b_for_4.36' + - 'mosaicml/mpt-7b-chat_for_4.36' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat-4bit' +local_model_hub: '/mnt/disk0/models' +warm_up: 1 +num_trials: 3 +num_beams: 1 # default to greedy search +low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 +in_out_pairs: + - '32-32' + - '1024-128' + - '2048-256' +test_api: + - "transformer_int4_gpu" # on Intel GPU +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) +exclude: + - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml index 47f74b20e7e..07f0633fcd9 100644 --- a/python/llm/test/benchmark/arc-perf-test.yaml +++ b/python/llm/test/benchmark/arc-perf-test.yaml @@ -1,10 +1,10 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - - 'THUDM/chatglm2-6b' +# - 'THUDM/chatglm2-6b' - 'THUDM/chatglm3-6b-4bit' - 'tiiuae/falcon-7b-instruct-with-patch' - - 'mosaicml/mpt-7b-chat' +# - 'mosaicml/mpt-7b-chat' - 'redpajama/gptneox-7b-redpajama-bf16' - 'bigcode/starcoder-15.5b-4bit' - 'databricks/dolly-v1-6b' @@ -13,10 +13,11 @@ repo_id: - 'internlm/internlm-chat-7b-8k' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat-7B' - - 'baichuan-inc/Baichuan2-7B-Chat' - - 'baichuan-inc/Baichuan2-13B-Chat-4bit' +# - 'baichuan-inc/Baichuan2-7B-Chat' +# - 'baichuan-inc/Baichuan2-13B-Chat-4bit' - 'bigscience/bloomz-7b1' - 'fnlp/moss-moon-003-sft-4bit' + - 'mistralai/Mistral-7B-v0.1' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 @@ -33,5 +34,4 @@ cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu w exclude: - 'fnlp/moss-moon-003-sft-4bit:1024' - 'fnlp/moss-moon-003-sft-4bit:2048' - - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' - 'bigscience/bloomz-7b1:2048' diff --git a/python/llm/test/benchmark/cpu-perf-test-436.yaml b/python/llm/test/benchmark/cpu-perf-test-436.yaml new file mode 100644 index 00000000000..c7ba0c1ded8 --- /dev/null +++ b/python/llm/test/benchmark/cpu-perf-test-436.yaml @@ -0,0 +1,26 @@ +repo_id: + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' +local_model_hub: '/mnt/disk1/models/updated_for_4.36' +warm_up: 1 +num_trials: 3 +num_beams: 1 # default to greedy search +low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 +in_out_pairs: + - '32-32' + - '1024-128' + - '2048-256' +test_api: + - "transformer_int4" + # - "native_int4" + # - "optimize_model" + # - "pytorch_autocast_bf16" + # - "ipex_fp16_gpu" # on Intel GPU + # - "transformer_int4_gpu" # on Intel GPU + # - "optimize_model_gpu" # on Intel GPU + # - "deepspeed_transformer_int4_cpu" # on Intel SPR Server + # - "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory) +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/benchmark/cpu-perf-test.yaml b/python/llm/test/benchmark/cpu-perf-test.yaml index 92b12750dbb..c65e3f7618e 100644 --- a/python/llm/test/benchmark/cpu-perf-test.yaml +++ b/python/llm/test/benchmark/cpu-perf-test.yaml @@ -1,10 +1,10 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b' - - 'baichuan-inc/Baichuan2-7B-Chat' - - 'baichuan-inc/Baichuan2-13B-Chat' +# - 'THUDM/chatglm2-6b' +# - 'THUDM/chatglm3-6b_for_4.36' +# - 'baichuan-inc/Baichuan2-7B-Chat' +# - 'baichuan-inc/Baichuan2-13B-Chat' - 'Qwen/Qwen-14B-Chat' local_model_hub: '/mnt/disk1/models' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128.yaml b/python/llm/test/benchmark/igpu-perf/1024-128.yaml index 0bc604795b5..b7975c1d0d1 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128.yaml @@ -16,6 +16,7 @@ repo_id: - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' + - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml index 0ac8951657c..73eced1e400 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml @@ -16,6 +16,7 @@ repo_id: # - 'RWKV/rwkv-4-world-7b' # - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' + - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml index a8dd75eac73..f3e72dd3529 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml @@ -16,6 +16,7 @@ repo_id: - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' + - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/2048-256.yaml b/python/llm/test/benchmark/igpu-perf/2048-256.yaml index 5aa790150e6..4edfcf581e4 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256.yaml @@ -16,6 +16,7 @@ repo_id: - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' + - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/32-32.yaml b/python/llm/test/benchmark/igpu-perf/32-32.yaml index 6f4fd2f0b49..dc6ce9bc6bc 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32.yaml @@ -16,6 +16,7 @@ repo_id: - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' + - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 3 num_trials: 5 From 92176620f618357c9d8afcf09d8ad19d6f93046f Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 30 Apr 2024 16:10:52 -0700 Subject: [PATCH 22/57] update --- .github/workflows/llm_performance_tests.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 248380249c8..1cd670b0383 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -11,14 +11,14 @@ permissions: # Controls when the action will run. on: schedule: - # - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China + - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China # please uncomment it for PR tests - pull_request: - branches: [main] - paths: - - ".github/workflows/llm_performance_tests.yml" - - "python/llm/test/benchmark/**" - - "python/llm/dev/benchmark/all-in-one/**" + pull_request: + branches: [main] + paths: + - ".github/workflows/llm_performance_tests.yml" + - "python/llm/test/benchmark/**" + - "python/llm/dev/benchmark/all-in-one/**" workflow_dispatch: workflow_call: From 3ad25b76917c96d3fc88b89dba40ef4cd1c35ac3 Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 30 Apr 2024 17:22:58 -0700 Subject: [PATCH 23/57] update --- python/llm/test/benchmark/arc-perf-test-436.yaml | 1 + python/llm/test/benchmark/arc-perf-test.yaml | 2 +- python/llm/test/benchmark/core-perf-test.yaml | 2 +- python/llm/test/benchmark/cpu-perf-test.yaml | 2 +- python/llm/test/benchmark/igpu-perf/1024-128.yaml | 2 +- python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml | 2 +- python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml | 2 +- python/llm/test/benchmark/igpu-perf/2048-256.yaml | 2 +- python/llm/test/benchmark/igpu-perf/32-32.yaml | 2 +- 9 files changed, 9 insertions(+), 8 deletions(-) diff --git a/python/llm/test/benchmark/arc-perf-test-436.yaml b/python/llm/test/benchmark/arc-perf-test-436.yaml index 409834daa4f..bebc7a181ac 100644 --- a/python/llm/test/benchmark/arc-perf-test-436.yaml +++ b/python/llm/test/benchmark/arc-perf-test-436.yaml @@ -3,6 +3,7 @@ repo_id: - 'mosaicml/mpt-7b-chat_for_4.36' - 'baichuan-inc/Baichuan2-7B-Chat' - 'baichuan-inc/Baichuan2-13B-Chat-4bit' + - 'internlm/internlm-chat-7b' local_model_hub: '/mnt/disk0/models' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml index 07f0633fcd9..c16a8c014f5 100644 --- a/python/llm/test/benchmark/arc-perf-test.yaml +++ b/python/llm/test/benchmark/arc-perf-test.yaml @@ -10,7 +10,7 @@ repo_id: - 'databricks/dolly-v1-6b' - 'databricks/dolly-v2-7b' - 'databricks/dolly-v2-12b' - - 'internlm/internlm-chat-7b-8k' +# - 'internlm/internlm-chat-7b' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat-7B' # - 'baichuan-inc/Baichuan2-7B-Chat' diff --git a/python/llm/test/benchmark/core-perf-test.yaml b/python/llm/test/benchmark/core-perf-test.yaml index e922d21886e..049a807bd76 100644 --- a/python/llm/test/benchmark/core-perf-test.yaml +++ b/python/llm/test/benchmark/core-perf-test.yaml @@ -2,7 +2,7 @@ repo_id: - 'THUDM/chatglm2-6b' - 'THUDM/chatglm3-6b' - 'baichuan-inc/Baichuan2-7B-Chat' - - 'internlm/internlm-chat-7b-8k' +# - 'internlm/internlm-chat-7b' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat2-7B' - 'meta-llama/Llama-2-7b-chat-hf' diff --git a/python/llm/test/benchmark/cpu-perf-test.yaml b/python/llm/test/benchmark/cpu-perf-test.yaml index c65e3f7618e..6095e2ed805 100644 --- a/python/llm/test/benchmark/cpu-perf-test.yaml +++ b/python/llm/test/benchmark/cpu-perf-test.yaml @@ -2,7 +2,7 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' # - 'THUDM/chatglm2-6b' -# - 'THUDM/chatglm3-6b_for_4.36' +# - 'THUDM/chatglm3-6b' # - 'baichuan-inc/Baichuan2-7B-Chat' # - 'baichuan-inc/Baichuan2-13B-Chat' - 'Qwen/Qwen-14B-Chat' diff --git a/python/llm/test/benchmark/igpu-perf/1024-128.yaml b/python/llm/test/benchmark/igpu-perf/1024-128.yaml index b7975c1d0d1..0b53b28a1f5 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128.yaml @@ -3,7 +3,7 @@ repo_id: - 'THUDM/chatglm3-6b' - 'baichuan-inc/Baichuan2-7B-Chat' - 'baichuan-inc/Baichuan2-13B-Chat' - - 'internlm/internlm-chat-7b-8k' +# - 'internlm/internlm-chat-7b' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat2-7B' # - '01-ai/Yi-6B' diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml index 73eced1e400..85a0c2fb038 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml @@ -3,7 +3,7 @@ repo_id: - 'THUDM/chatglm3-6b' - 'baichuan-inc/Baichuan2-7B-Chat' - 'baichuan-inc/Baichuan2-13B-Chat' - - 'internlm/internlm-chat-7b-8k' +# - 'internlm/internlm-chat-7b' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat2-7B' # - '01-ai/Yi-6B' diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml index f3e72dd3529..f941fcd8a5f 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml @@ -3,7 +3,7 @@ repo_id: - 'THUDM/chatglm3-6b' - 'baichuan-inc/Baichuan2-7B-Chat' - 'baichuan-inc/Baichuan2-13B-Chat' - - 'internlm/internlm-chat-7b-8k' +# - 'internlm/internlm-chat-7b' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat2-7B' # - '01-ai/Yi-6B' diff --git a/python/llm/test/benchmark/igpu-perf/2048-256.yaml b/python/llm/test/benchmark/igpu-perf/2048-256.yaml index 4edfcf581e4..3b03fd554cf 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256.yaml @@ -3,7 +3,7 @@ repo_id: - 'THUDM/chatglm3-6b' - 'baichuan-inc/Baichuan2-7B-Chat' - 'baichuan-inc/Baichuan2-13B-Chat' - - 'internlm/internlm-chat-7b-8k' +# - 'internlm/internlm-chat-7b' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat2-7B' # - '01-ai/Yi-6B' diff --git a/python/llm/test/benchmark/igpu-perf/32-32.yaml b/python/llm/test/benchmark/igpu-perf/32-32.yaml index dc6ce9bc6bc..84ad699369c 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32.yaml @@ -3,7 +3,7 @@ repo_id: - 'THUDM/chatglm3-6b' - 'baichuan-inc/Baichuan2-7B-Chat' - 'baichuan-inc/Baichuan2-13B-Chat' - - 'internlm/internlm-chat-7b-8k' +# - 'internlm/internlm-chat-7b' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat2-7B' # - '01-ai/Yi-6B' From 8ee92d2e0319f181bb506009c4f68a2deda11351 Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 30 Apr 2024 23:27:02 -0700 Subject: [PATCH 24/57] update --- .github/workflows/llm_performance_tests.yml | 4 ++-- python/llm/test/benchmark/arc-perf-test.yaml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 1cd670b0383..bb5678931d9 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -344,8 +344,8 @@ jobs: - name: Prepare for install ipex-llm from source shell: bash run: | - sed -i 's/"bigdl-core-xe-21==" + VERSION + "/"bigdl-core-xe-21/g' python/llm/setup.py - sed -i 's/"bigdl-core-xe-21==" + VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py + sed -i 's/"bigdl-core-xe-21==" + CORE_XE_VERSION/"bigdl-core-xe-21/g' python/llm/setup.py + sed -i 's/"bigdl-core-xe-esimd-21==" + CORE_XE_VERSION/"bigdl-core-xe-esimd-21"/g' python/llm/setup.py - name: Install ipex-llm and other related packages (install from source) shell: cmd diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml index c16a8c014f5..7552b7f84f1 100644 --- a/python/llm/test/benchmark/arc-perf-test.yaml +++ b/python/llm/test/benchmark/arc-perf-test.yaml @@ -16,7 +16,7 @@ repo_id: # - 'baichuan-inc/Baichuan2-7B-Chat' # - 'baichuan-inc/Baichuan2-13B-Chat-4bit' - 'bigscience/bloomz-7b1' - - 'fnlp/moss-moon-003-sft-4bit' +# - 'fnlp/moss-moon-003-sft-4bit' - 'mistralai/Mistral-7B-v0.1' local_model_hub: '/mnt/disk1/models' warm_up: 1 @@ -32,6 +32,6 @@ test_api: - "transformer_int4_gpu" # on Intel GPU cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) exclude: - - 'fnlp/moss-moon-003-sft-4bit:1024' - - 'fnlp/moss-moon-003-sft-4bit:2048' +# - 'fnlp/moss-moon-003-sft-4bit:1024' +# - 'fnlp/moss-moon-003-sft-4bit:2048' - 'bigscience/bloomz-7b1:2048' From 45d23834dcf5438de753390c85ecb16cb920941d Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 30 Apr 2024 23:52:56 -0700 Subject: [PATCH 25/57] update --- .github/workflows/llm_performance_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index bb5678931d9..d27f318786e 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -344,7 +344,7 @@ jobs: - name: Prepare for install ipex-llm from source shell: bash run: | - sed -i 's/"bigdl-core-xe-21==" + CORE_XE_VERSION/"bigdl-core-xe-21/g' python/llm/setup.py + sed -i 's/"bigdl-core-xe-21==" + CORE_XE_VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py sed -i 's/"bigdl-core-xe-esimd-21==" + CORE_XE_VERSION/"bigdl-core-xe-esimd-21"/g' python/llm/setup.py - name: Install ipex-llm and other related packages (install from source) From e968252c3c58d13450d5987a3ad6c5b4a1fd045a Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 1 May 2024 10:24:27 -0700 Subject: [PATCH 26/57] update --- python/llm/test/benchmark/arc-perf-test-436.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/llm/test/benchmark/arc-perf-test-436.yaml b/python/llm/test/benchmark/arc-perf-test-436.yaml index bebc7a181ac..1b31cfa2aad 100644 --- a/python/llm/test/benchmark/arc-perf-test-436.yaml +++ b/python/llm/test/benchmark/arc-perf-test-436.yaml @@ -1,6 +1,6 @@ repo_id: - - 'THUDM/chatglm2-6b_for_4.36' - - 'mosaicml/mpt-7b-chat_for_4.36' + - 'THUDM/chatglm2-6b' + - 'mosaicml/mpt-7b-chat' - 'baichuan-inc/Baichuan2-7B-Chat' - 'baichuan-inc/Baichuan2-13B-Chat-4bit' - 'internlm/internlm-chat-7b' From d59f68c0480f45bc16872c91bf529a89498ab01d Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 1 May 2024 15:37:35 -0700 Subject: [PATCH 27/57] update --- python/llm/test/benchmark/arc-perf-test-436.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/python/llm/test/benchmark/arc-perf-test-436.yaml b/python/llm/test/benchmark/arc-perf-test-436.yaml index 1b31cfa2aad..f619987bfc9 100644 --- a/python/llm/test/benchmark/arc-perf-test-436.yaml +++ b/python/llm/test/benchmark/arc-perf-test-436.yaml @@ -19,3 +19,4 @@ test_api: cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) exclude: - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' + - 'mosaicml/mpt-7b-chat:2048' From f44e9a4aa1de8ceb92a344e0d46cdf1a53bc0a84 Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 1 May 2024 16:33:02 -0700 Subject: [PATCH 28/57] update --- .github/workflows/llm_performance_tests.yml | 83 +++++++++++---------- 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index d27f318786e..337f8361b28 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -158,7 +158,7 @@ jobs: cd python/llm/dev/benchmark/all-in-one python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-test-436.yaml - python ../../../test/benchmark/check_results.py -c test3 -y ../../../test/benchmark/arc-perf-test-transformers-437.yaml + python ../../../test/benchmark/check_results.py -c test3 -y ../../../test/benchmark/arc-perf-transformers-437.yaml # python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml find . -name "*test*.csv" -delete if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then @@ -338,16 +338,45 @@ jobs: # TODO: Put the ipex-llm related install process for win gpu into a action function # Please uncomment it and commment the install from pypi for PR tests - - name: Download llm binary - uses: ./.github/actions/llm/download-llm-binary + #- name: Download llm binary + # uses: ./.github/actions/llm/download-llm-binary + + #- name: Prepare for install ipex-llm from source + # shell: bash + # run: | + # sed -i 's/"bigdl-core-xe-21==" + CORE_XE_VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py + # sed -i 's/"bigdl-core-xe-esimd-21==" + CORE_XE_VERSION/"bigdl-core-xe-esimd-21"/g' python/llm/setup.py + + #- name: Install ipex-llm and other related packages (install from source) + # shell: cmd + # run: | + # call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y + # call conda activate igpu-perf + + # pip install --upgrade pip + # pip install --upgrade wheel + # pip install --upgrade omegaconf pandas + # pip install --upgrade tiktoken einops transformers_stream_generator + + # cd python\llm + # python setup.py clean --all bdist_wheel --win + # if not exist dist\ipex_llm*.whl (exit /b 1) + # for %%i in (dist\ipex_llm*.whl) do set whl_name=%%i - - name: Prepare for install ipex-llm from source + # pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + # if %ERRORLEVEL% neq 0 (exit /b 1) + # pip install transformers==4.36.2 + # pip list + + # call conda deactivate + + - name: Determine desired ipex-llm version shell: bash run: | - sed -i 's/"bigdl-core-xe-21==" + CORE_XE_VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py - sed -i 's/"bigdl-core-xe-esimd-21==" + CORE_XE_VERSION/"bigdl-core-xe-esimd-21"/g' python/llm/setup.py + test_version_date=`date -d 'yesterday' '+%Y%m%d'` + echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV" - - name: Install ipex-llm and other related packages (install from source) + - name: Install ipex-llm and other related packages (install from pypi) shell: cmd run: | call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y @@ -358,45 +387,17 @@ jobs: pip install --upgrade omegaconf pandas pip install --upgrade tiktoken einops transformers_stream_generator - cd python\llm - python setup.py clean --all bdist_wheel --win - if not exist dist\ipex_llm*.whl (exit /b 1) - for %%i in (dist\ipex_llm*.whl) do set whl_name=%%i - - pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - if %ERRORLEVEL% neq 0 (exit /b 1) + pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + pip show ipex-llm | findstr %TEST_VERSION_DATE% + if %ERRORLEVEL% neq 0 ( + echo "Did not install ipex-llm with excepted version %TEST_VERSION_DATE%" + exit /b 1 + ) pip install transformers==4.36.2 pip list call conda deactivate - #- name: Determine desired ipex-llm version - # shell: bash - # run: | - # test_version_date=`date -d 'yesterday' '+%Y%m%d'` - # echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV" - - #- name: Install ipex-llm and other related packages (install from pypi) - # shell: cmd - # run: | - # call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y - # call conda activate igpu-perf - - # pip install --upgrade pip - # pip install --upgrade wheel - # pip install --upgrade omegaconf pandas - # pip install --upgrade tiktoken einops transformers_stream_generator - - # pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - # pip show ipex-llm | findstr %TEST_VERSION_DATE% - # if %ERRORLEVEL% neq 0 ( - # echo "Did not install ipex-llm with excepted version %TEST_VERSION_DATE%" - # exit /b 1 - # ) - # pip list - - # call conda deactivate - - name: Create env for html generation shell: cmd run: | From f9ece00d1cef91cc29f05e9454fd8e412e1a63c3 Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 1 May 2024 16:35:59 -0700 Subject: [PATCH 29/57] update --- .github/workflows/llm_performance_tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 337f8361b28..b687c3a2ffc 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -443,6 +443,7 @@ jobs: cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\32-32.yaml config.yaml set PYTHONIOENCODING=utf-8 + python run.py python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1 if %ERRORLEVEL% neq 0 (exit /b 1) From bf8aecea62a26164e198f3752ab4b34236bf1a55 Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 1 May 2024 17:42:26 -0700 Subject: [PATCH 30/57] update --- python/llm/dev/benchmark/all-in-one/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index 31c3ecee8b2..5eab763187c 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -844,7 +844,7 @@ def run_transformer_int4_gpu_win(repo_id, elif repo_id in LLAVA_IDS: llava_repo_dir = os.environ.get('LLAVA_REPO_DIR') sys.path.append(rf"{llava_repo_dir}") - from llava.model.language_model.llava_llama import LlavaLlamaForCausalLM + # from llava.model.language_model.llava_llama import LlavaLlamaForCausalLM model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit, optimize_model=True, trust_remote_code=True, use_cache=True, cpu_embedding=cpu_embedding).eval() tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) From 98789dbbed5a31e62b5a144193b3562d383cced8 Mon Sep 17 00:00:00 2001 From: jenniew Date: Thu, 2 May 2024 12:57:26 -0700 Subject: [PATCH 31/57] update --- python/llm/dev/benchmark/all-in-one/run.py | 2 +- python/llm/test/benchmark/igpu-perf/1024-128.yaml | 2 +- python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml | 2 +- python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml | 2 +- python/llm/test/benchmark/igpu-perf/2048-256.yaml | 2 +- python/llm/test/benchmark/igpu-perf/32-32.yaml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index 5eab763187c..31c3ecee8b2 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -844,7 +844,7 @@ def run_transformer_int4_gpu_win(repo_id, elif repo_id in LLAVA_IDS: llava_repo_dir = os.environ.get('LLAVA_REPO_DIR') sys.path.append(rf"{llava_repo_dir}") - # from llava.model.language_model.llava_llama import LlavaLlamaForCausalLM + from llava.model.language_model.llava_llama import LlavaLlamaForCausalLM model = AutoModelForCausalLM.from_pretrained(model_path, load_in_low_bit=low_bit, optimize_model=True, trust_remote_code=True, use_cache=True, cpu_embedding=cpu_embedding).eval() tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) diff --git a/python/llm/test/benchmark/igpu-perf/1024-128.yaml b/python/llm/test/benchmark/igpu-perf/1024-128.yaml index 0b53b28a1f5..582d55e26fd 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128.yaml @@ -12,7 +12,7 @@ repo_id: - 'WisdomShell/CodeShell-7B-Chat' - 'tiiuae/falcon-7b-instruct-with-patch' - 'mosaicml/mpt-7b-chat' - - 'liuhaotian/llava-v1.5-7b' +# - 'liuhaotian/llava-v1.5-7b' # Cannot load using AutoModelForCausalLM in 4.36+ - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml index 85a0c2fb038..1208b1b6e63 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml @@ -12,7 +12,7 @@ repo_id: - 'WisdomShell/CodeShell-7B-Chat' - 'tiiuae/falcon-7b-instruct-with-patch' - 'mosaicml/mpt-7b-chat' - - 'liuhaotian/llava-v1.5-7b' +# - 'liuhaotian/llava-v1.5-7b' # Cannot load using AutoModelForCausalLM in 4.36+ # - 'RWKV/rwkv-4-world-7b' # - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml index f941fcd8a5f..5157b56dadc 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml @@ -12,7 +12,7 @@ repo_id: - 'WisdomShell/CodeShell-7B-Chat' - 'tiiuae/falcon-7b-instruct-with-patch' - 'mosaicml/mpt-7b-chat' - - 'liuhaotian/llava-v1.5-7b' +# - 'liuhaotian/llava-v1.5-7b' # Cannot load using AutoModelForCausalLM in 4.36+ - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' diff --git a/python/llm/test/benchmark/igpu-perf/2048-256.yaml b/python/llm/test/benchmark/igpu-perf/2048-256.yaml index 3b03fd554cf..d4872ad19cc 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256.yaml @@ -12,7 +12,7 @@ repo_id: - 'WisdomShell/CodeShell-7B-Chat' - 'tiiuae/falcon-7b-instruct-with-patch' - 'mosaicml/mpt-7b-chat' - - 'liuhaotian/llava-v1.5-7b' +# - 'liuhaotian/llava-v1.5-7b' # Cannot load using AutoModelForCausalLM in 4.36+ - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' diff --git a/python/llm/test/benchmark/igpu-perf/32-32.yaml b/python/llm/test/benchmark/igpu-perf/32-32.yaml index 84ad699369c..53f09c910d5 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32.yaml @@ -12,7 +12,7 @@ repo_id: - 'WisdomShell/CodeShell-7B-Chat' - 'tiiuae/falcon-7b-instruct-with-patch' - 'mosaicml/mpt-7b-chat' - - 'liuhaotian/llava-v1.5-7b' +# - 'liuhaotian/llava-v1.5-7b' # Cannot load using AutoModelForCausalLM in 4.36+ - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' From d459a8297b479973db68fb2602f77733b9d1db02 Mon Sep 17 00:00:00 2001 From: jenniew Date: Thu, 2 May 2024 16:09:16 -0700 Subject: [PATCH 32/57] update --- .github/workflows/llm_performance_tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index b687c3a2ffc..337f8361b28 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -443,7 +443,6 @@ jobs: cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\32-32.yaml config.yaml set PYTHONIOENCODING=utf-8 - python run.py python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1 if %ERRORLEVEL% neq 0 (exit /b 1) From 51134d4511287c32d9a0349f2c3e3d18a0edaa02 Mon Sep 17 00:00:00 2001 From: jenniew Date: Fri, 3 May 2024 11:35:01 -0700 Subject: [PATCH 33/57] update --- .github/workflows/llm-harness-evaluation.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index 544170195ff..29146ca116e 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -164,11 +164,11 @@ jobs: shell: bash run: | pip install --upgrade datasets==2.14.6 - if [ "${{ matrix.model_name }}" = "Mistral-7B-v0.1" ]; then - pip install --upgrade transformers==4.36 - else - pip install --upgrade transformers==4.31 - fi + #if [ "${{ matrix.model_name }}" = "Mistral-7B-v0.1" ]; then + # pip install --upgrade transformers==4.36 + #else + # pip install --upgrade transformers==4.31 + #fi - name: Run harness From 39c104b6cca29cc28a01c0fe4df7e50ca865d37d Mon Sep 17 00:00:00 2001 From: jenniew Date: Fri, 3 May 2024 14:29:54 -0700 Subject: [PATCH 34/57] update --- .github/actions/llm/setup-llm-env/action.yml | 2 +- .github/workflows/llm-ppl-evaluation.yml | 10 +++++----- python/llm/test/benchmark/arc-perf-test.yaml | 2 +- python/llm/test/benchmark/igpu-perf/1024-128.yaml | 2 +- .../test/benchmark/igpu-perf/1024-128_int4_fp16.yaml | 2 +- .../test/benchmark/igpu-perf/1024-128_loadlowbit.yaml | 2 +- python/llm/test/benchmark/igpu-perf/2048-256.yaml | 2 +- python/llm/test/benchmark/igpu-perf/32-32.yaml | 2 +- python/llm/test/inference/test_transformers_api.py | 4 ++-- .../inference_gpu/test_transformers_api_RMSNorm.py | 2 +- .../inference_gpu/test_transformers_api_attention.py | 2 +- .../test/inference_gpu/test_transformers_api_mlp.py | 2 +- 12 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/actions/llm/setup-llm-env/action.yml b/.github/actions/llm/setup-llm-env/action.yml index 4d0b7550f74..01c2660ea8d 100644 --- a/.github/actions/llm/setup-llm-env/action.yml +++ b/.github/actions/llm/setup-llm-env/action.yml @@ -42,4 +42,4 @@ runs: pip install pytest bash python/llm/test/run-llm-install-tests.sh fi - pip install transformers==4.36.2 + # pip install transformers==4.36.2 diff --git a/.github/workflows/llm-ppl-evaluation.yml b/.github/workflows/llm-ppl-evaluation.yml index 7ad621f91e3..bfab5277f80 100644 --- a/.github/workflows/llm-ppl-evaluation.yml +++ b/.github/workflows/llm-ppl-evaluation.yml @@ -149,11 +149,11 @@ jobs: shell: bash run: | pip install --upgrade datasets==2.14.6 - if [ "${{ matrix.model_name }}" = "Mistral-7B-v0.1" ]; then - pip install --upgrade transformers==4.36 - else - pip install --upgrade transformers==4.31 - fi + #if [ "${{ matrix.model_name }}" = "Mistral-7B-v0.1" ]; then + # pip install --upgrade transformers==4.36 + #else + # pip install --upgrade transformers==4.31 + #fi - name: Run perplexity shell: bash diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml index 7552b7f84f1..b08cc5290f6 100644 --- a/python/llm/test/benchmark/arc-perf-test.yaml +++ b/python/llm/test/benchmark/arc-perf-test.yaml @@ -17,7 +17,7 @@ repo_id: # - 'baichuan-inc/Baichuan2-13B-Chat-4bit' - 'bigscience/bloomz-7b1' # - 'fnlp/moss-moon-003-sft-4bit' - - 'mistralai/Mistral-7B-v0.1' +# - 'mistralai/Mistral-7B-v0.1' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128.yaml b/python/llm/test/benchmark/igpu-perf/1024-128.yaml index 582d55e26fd..db7ae48114c 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128.yaml @@ -16,7 +16,7 @@ repo_id: - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' - - 'mistralai/Mistral-7B-Instruct-v0.1' +# - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml index 1208b1b6e63..6554c68bd80 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml @@ -16,7 +16,7 @@ repo_id: # - 'RWKV/rwkv-4-world-7b' # - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' - - 'mistralai/Mistral-7B-Instruct-v0.1' +# - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml index 5157b56dadc..231d4d780d5 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml @@ -16,7 +16,7 @@ repo_id: - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' - - 'mistralai/Mistral-7B-Instruct-v0.1' +# - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/2048-256.yaml b/python/llm/test/benchmark/igpu-perf/2048-256.yaml index d4872ad19cc..c8ff077f919 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256.yaml @@ -16,7 +16,7 @@ repo_id: - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' - - 'mistralai/Mistral-7B-Instruct-v0.1' +# - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/32-32.yaml b/python/llm/test/benchmark/igpu-perf/32-32.yaml index 53f09c910d5..f24141206c3 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32.yaml @@ -16,7 +16,7 @@ repo_id: - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' - - 'mistralai/Mistral-7B-Instruct-v0.1' +# - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 3 num_trials: 5 diff --git a/python/llm/test/inference/test_transformers_api.py b/python/llm/test/inference/test_transformers_api.py index ad1d1b53f3a..db7871fc801 100644 --- a/python/llm/test/inference/test_transformers_api.py +++ b/python/llm/test/inference/test_transformers_api.py @@ -116,7 +116,7 @@ def test_transformers_chatglm_for_causallm(self): ]) @pytest.mark.parametrize('Model, Tokenizer, model_path',[ (AutoModel, AutoTokenizer, os.environ.get('ORIGINAL_CHATGLM2_6B_PATH')), - (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH')), + #(AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH')), ]) def test_load_low_bit_completion(Model, Tokenizer, model_path, prompt, answer): tokenizer = Tokenizer.from_pretrained(model_path, trust_remote_code=True) @@ -145,7 +145,7 @@ def test_load_low_bit_completion(Model, Tokenizer, model_path, prompt, answer): (AutoModelForCausalLM, AutoTokenizer, os.environ.get('BLOOM_ORIGIN_PATH'), prompt), (AutoModel, AutoTokenizer, os.environ.get('ORIGINAL_CHATGLM2_6B_PATH'), prompt), (AutoModelForCausalLM, AutoTokenizer, os.environ.get('ORIGINAL_CODESHELL_7B_PATH'), prompt), - (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH'), prompt) + # (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH'), prompt) ]) def test_optimize_model(Model, Tokenizer, model_path, prompt): diff --git a/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py b/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py index f45f017ef0b..5cec634be44 100644 --- a/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py +++ b/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py @@ -30,7 +30,7 @@ TEST_MODEL_LIST = [ ("Llama2-7B", AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA2_7B_ORIGIN_PATH')), ("ChatGLM2-6B", AutoModel, AutoTokenizer, os.environ.get('CHATGLM2_6B_ORIGIN_PATH')), - ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), + # ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), ("Baichuan2-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('BAICHUAN2_7B_ORIGIN_PATH')), ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), ] diff --git a/python/llm/test/inference_gpu/test_transformers_api_attention.py b/python/llm/test/inference_gpu/test_transformers_api_attention.py index 83f7aaebfc8..e5c39897cec 100644 --- a/python/llm/test/inference_gpu/test_transformers_api_attention.py +++ b/python/llm/test/inference_gpu/test_transformers_api_attention.py @@ -32,7 +32,7 @@ ("Llama2-7B", AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA2_7B_ORIGIN_PATH')), ("Falcon-7B", AutoModelForCausalLM, AutoTokenizer, os.environ.get('FALCON_7B_ORIGIN_PATH')), ("ChatGLM2-6B", AutoModel, AutoTokenizer, os.environ.get('CHATGLM2_6B_ORIGIN_PATH')), - ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), + # ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), ("Baichuan2-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('BAICHUAN2_7B_ORIGIN_PATH')), ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), ] diff --git a/python/llm/test/inference_gpu/test_transformers_api_mlp.py b/python/llm/test/inference_gpu/test_transformers_api_mlp.py index 70ba2e7b9f6..f7aba58fb72 100644 --- a/python/llm/test/inference_gpu/test_transformers_api_mlp.py +++ b/python/llm/test/inference_gpu/test_transformers_api_mlp.py @@ -28,7 +28,7 @@ PROMPT = "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun" TEST_MODEL_LIST = [ ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), - ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), + # ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), ("Llama2-7B", AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA2_7B_ORIGIN_PATH')) ] From 5d32b59fa35805be103a76b88740cf5bf423c582 Mon Sep 17 00:00:00 2001 From: jenniew Date: Fri, 3 May 2024 15:35:55 -0700 Subject: [PATCH 35/57] update --- .github/workflows/llm_unit_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index c77987ec783..ffcfa925359 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -389,7 +389,7 @@ jobs: shell: bash run: | python -m pip uninstall datasets -y - python -m pip install datasets peft==0.5.0 accelerate==0.23.0 + python -m pip install transformers==4.34.0 datasets peft==0.5.0 accelerate==0.23.0 python -m pip install bitsandbytes scipy # Specific oneapi position on arc ut test machines if [[ '${{ matrix.pytorch-version }}' == '2.1' ]]; then From da72111f5dda6b4f39bc75de91e4ce66215cc998 Mon Sep 17 00:00:00 2001 From: jenniew Date: Fri, 3 May 2024 23:11:18 -0700 Subject: [PATCH 36/57] update --- .github/workflows/llm_unit_tests.yml | 32 +++++++----------------- python/llm/dev/test/run-example-tests.sh | 2 +- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index ffcfa925359..abf86908709 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -77,7 +77,6 @@ jobs: run: | echo "DATASET_DIR=${{ github.workspace }}/../llm/datasets" >> "$GITHUB_ENV" echo "ORIGIN_DIR=${{ github.workspace }}/../llm/origin-models" >> "$GITHUB_ENV" - echo "ORIGIN_DIR_436=${{ github.workspace }}/../llm/origin-models-4.36" >> "$GITHUB_ENV" echo "INT4_CKPT_DIR=${{ github.workspace }}/../llm/converted-models" >> "$GITHUB_ENV" - name: Create model directories shell: bash @@ -88,9 +87,6 @@ jobs: if [ ! -d $ORIGIN_DIR ]; then mkdir -p $ORIGIN_DIR fi - if [ ! -d ORIGIN_DIR_436 ]; then - mkdir -p ORIGIN_DIR_436 - fi if [ ! -d $INT4_CKPT_DIR ]; then mkdir -p $INT4_CKPT_DIR fi @@ -102,7 +98,7 @@ jobs: echo "LLAMA_ORIGIN_PATH=${ORIGIN_DIR}/llama-7b-hf" >> "$GITHUB_ENV" echo "BLOOM_ORIGIN_PATH=${ORIGIN_DIR}/bloom-7b1" >> "$GITHUB_ENV" - echo "ORIGINAL_CHATGLM2_6B_PATH=${ORIGIN_DIR_436}/chatglm2-6b" >> "$GITHUB_ENV" + echo "ORIGINAL_CHATGLM2_6B_PATH=${ORIGIN_DIR}/chatglm2-6b" >> "$GITHUB_ENV" echo "ORIGINAL_CODESHELL_7B_PATH=${ORIGIN_DIR}/CodeShell-7B-Chat" >> "$GITHUB_ENV" echo "ORIGINAL_WHISPER_TINY_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV" echo "MISTRAL_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-v0.1" >> "$GITHUB_ENV" @@ -161,8 +157,8 @@ jobs: # fi if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..." - echo "wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR_436" - wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR_436 + echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR" + wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR fi if [ ! -d $ORIGINAL_CODESHELL_7B_PATH ]; then echo "Directory $ORIGINAL_CODESHELL_7B_PATH not found. Downloading from FTP server..." @@ -256,16 +252,6 @@ jobs: # THREAD_NUM: 16 ANALYTICS_ZOO_ROOT: ${{ github.workspace }} steps: - - name: Set model directories for 4.36 - shell: bash - run: | - echo "ORIGIN_DIR_436=${{ github.workspace }}/../llm/origin-models-4.36" >> "$GITHUB_ENV" - - name: Create model directories - shell: bash - run: | - if [ ! -d ORIGIN_DIR_436 ]; then - mkdir -p ORIGIN_DIR_436 - fi - name: Set environment variables shell: bash run: | @@ -274,12 +260,12 @@ jobs: echo "SPEECH_DATASET_PATH=${ORIGIN_DIR}/../datasets/librispeech_asr_dummy" >> "$GITHUB_ENV" echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV" - echo "CHATGLM2_6B_ORIGIN_PATH=${ORIGIN_DIR_436}/chatglm2-6b" >> "$GITHUB_ENV" + echo "CHATGLM2_6B_ORIGIN_PATH=${ORIGIN_DIR}/chatglm2-6b" >> "$GITHUB_ENV" echo "FALCON_7B_ORIGIN_PATH=${ORIGIN_DIR}/falcon-7b-instruct-with-patch" >> "$GITHUB_ENV" - echo "MPT_7B_ORIGIN_PATH=${ORIGIN_DIR_436}/mpt-7b-chat" >> "$GITHUB_ENV" + echo "MPT_7B_ORIGIN_PATH=${ORIGIN_DIR}/mpt-7b-chat" >> "$GITHUB_ENV" echo "WHISPER_TINY_ORIGIN_PATH=${ORIGIN_DIR}/whisper-tiny" >> "$GITHUB_ENV" echo "MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH=${ORIGIN_DIR}/Mistral-7B-Instruct-v0.1" >> "$GITHUB_ENV" - echo "BAICHUAN2_7B_ORIGIN_PATH=${ORIGIN_DIR_436}/Baichuan2-7B-Chat" >> "$GITHUB_ENV" + echo "BAICHUAN2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Baichuan2-7B-Chat" >> "$GITHUB_ENV" echo "QWEN_7B_ORIGIN_PATH=${ORIGIN_DIR}/Qwen-7B-Chat" >> "$GITHUB_ENV" echo "VICUNA_7B_1_3_ORIGIN_PATH=${ORIGIN_DIR}/vicuna-7b-v1.3" >> "$GITHUB_ENV" - name: Checkout repo @@ -329,7 +315,7 @@ jobs: fi if [ ! -d $CHATGLM2_6B_ORIGIN_PATH ]; then echo "Directory $CHATGLM2_6B_ORIGIN_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR_436 + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR fi if [ ! -d $FALCON_7B_ORIGIN_PATH ]; then echo "Directory $FALCON_7B_ORIGIN_PATH not found. Downloading from FTP server..." @@ -337,7 +323,7 @@ jobs: fi if [ ! -d $MPT_7B_ORIGIN_PATH ]; then echo "Directory $MPT_7B_ORIGIN_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/mpt-7b-chat -P $ORIGIN_DIR_436 + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/mpt-7b-chat -P $ORIGIN_DIR fi if [ ! -d $WHISPER_TINY_ORIGIN_PATH ]; then echo "Directory $WHISPER_TINY_ORIGIN_PATH not found. Downloading from FTP server..." @@ -364,7 +350,7 @@ jobs: fi if [ ! -d $BAICHUAN2_7B_ORIGIN_PATH ]; then echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/Baichuan2-7B-Chat -P $ORIGIN_DIR_436 + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Baichuan2-7B-Chat -P $ORIGIN_DIR fi if [ ! -d $VICUNA_7B_1_3_ORIGIN_PATH ]; then echo "Directory $VICUNA_7B_1_3_ORIGIN_PATH not found. Downloading from FTP server..." diff --git a/python/llm/dev/test/run-example-tests.sh b/python/llm/dev/test/run-example-tests.sh index 3876fa77a68..edf767c8719 100644 --- a/python/llm/dev/test/run-example-tests.sh +++ b/python/llm/dev/test/run-example-tests.sh @@ -68,7 +68,7 @@ fi export ORIGINAL_CHATGLM2_PATH=./llm/chatglm2-6b/ if [ ! -d $ORIGINAL_CHATGLM2_PATH ]; then echo "Directory $ORIGINAL_CHATGLM2_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/${ORIGINAL_CHATGLM2_PATH:6} -P $LLM_DIR + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_PATH:2} -P $LLM_DIR fi echo ">>> Testing ChatGLM2 transformers API" From 687ba8b55499d99985ad9e54a3c9050164bfddb2 Mon Sep 17 00:00:00 2001 From: jenniew Date: Fri, 3 May 2024 23:24:16 -0700 Subject: [PATCH 37/57] update --- .github/actions/llm/setup-llm-env/action.yml | 2 +- python/llm/test/benchmark/arc-perf-test.yaml | 2 +- python/llm/test/benchmark/igpu-perf/1024-128.yaml | 2 +- python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml | 2 +- .../llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml | 2 +- python/llm/test/benchmark/igpu-perf/2048-256.yaml | 2 +- python/llm/test/benchmark/igpu-perf/32-32.yaml | 2 +- python/llm/test/inference/test_transformers_api.py | 6 +++--- .../llm/test/inference_gpu/test_transformers_api_RMSNorm.py | 2 +- .../test/inference_gpu/test_transformers_api_attention.py | 2 +- python/llm/test/inference_gpu/test_transformers_api_mlp.py | 2 +- 11 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/actions/llm/setup-llm-env/action.yml b/.github/actions/llm/setup-llm-env/action.yml index 01c2660ea8d..4d0b7550f74 100644 --- a/.github/actions/llm/setup-llm-env/action.yml +++ b/.github/actions/llm/setup-llm-env/action.yml @@ -42,4 +42,4 @@ runs: pip install pytest bash python/llm/test/run-llm-install-tests.sh fi - # pip install transformers==4.36.2 + pip install transformers==4.36.2 diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml index b08cc5290f6..7552b7f84f1 100644 --- a/python/llm/test/benchmark/arc-perf-test.yaml +++ b/python/llm/test/benchmark/arc-perf-test.yaml @@ -17,7 +17,7 @@ repo_id: # - 'baichuan-inc/Baichuan2-13B-Chat-4bit' - 'bigscience/bloomz-7b1' # - 'fnlp/moss-moon-003-sft-4bit' -# - 'mistralai/Mistral-7B-v0.1' + - 'mistralai/Mistral-7B-v0.1' local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128.yaml b/python/llm/test/benchmark/igpu-perf/1024-128.yaml index db7ae48114c..582d55e26fd 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128.yaml @@ -16,7 +16,7 @@ repo_id: - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' -# - 'mistralai/Mistral-7B-Instruct-v0.1' + - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml index 6554c68bd80..1208b1b6e63 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml @@ -16,7 +16,7 @@ repo_id: # - 'RWKV/rwkv-4-world-7b' # - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' -# - 'mistralai/Mistral-7B-Instruct-v0.1' + - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml index 231d4d780d5..5157b56dadc 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit.yaml @@ -16,7 +16,7 @@ repo_id: - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' -# - 'mistralai/Mistral-7B-Instruct-v0.1' + - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/2048-256.yaml b/python/llm/test/benchmark/igpu-perf/2048-256.yaml index c8ff077f919..d4872ad19cc 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256.yaml @@ -16,7 +16,7 @@ repo_id: - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' -# - 'mistralai/Mistral-7B-Instruct-v0.1' + - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 1 num_trials: 3 diff --git a/python/llm/test/benchmark/igpu-perf/32-32.yaml b/python/llm/test/benchmark/igpu-perf/32-32.yaml index f24141206c3..53f09c910d5 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32.yaml @@ -16,7 +16,7 @@ repo_id: - 'RWKV/rwkv-4-world-7b' - 'RWKV/rwkv-5-world-7b' - 'IEITYuan/Yuan2-2B-hf' -# - 'mistralai/Mistral-7B-Instruct-v0.1' + - 'mistralai/Mistral-7B-Instruct-v0.1' local_model_hub: 'path to your local model hub' warm_up: 3 num_trials: 5 diff --git a/python/llm/test/inference/test_transformers_api.py b/python/llm/test/inference/test_transformers_api.py index db7871fc801..f16773c62c3 100644 --- a/python/llm/test/inference/test_transformers_api.py +++ b/python/llm/test/inference/test_transformers_api.py @@ -116,7 +116,7 @@ def test_transformers_chatglm_for_causallm(self): ]) @pytest.mark.parametrize('Model, Tokenizer, model_path',[ (AutoModel, AutoTokenizer, os.environ.get('ORIGINAL_CHATGLM2_6B_PATH')), - #(AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH')), + (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH')), ]) def test_load_low_bit_completion(Model, Tokenizer, model_path, prompt, answer): tokenizer = Tokenizer.from_pretrained(model_path, trust_remote_code=True) @@ -141,11 +141,11 @@ def test_load_low_bit_completion(Model, Tokenizer, model_path, prompt, answer): prompt = "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun" @pytest.mark.parametrize("Model, Tokenizer, model_path, prompt", [ - # (AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA_ORIGIN_PATH'), prompt), + (AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA_ORIGIN_PATH'), prompt), (AutoModelForCausalLM, AutoTokenizer, os.environ.get('BLOOM_ORIGIN_PATH'), prompt), (AutoModel, AutoTokenizer, os.environ.get('ORIGINAL_CHATGLM2_6B_PATH'), prompt), (AutoModelForCausalLM, AutoTokenizer, os.environ.get('ORIGINAL_CODESHELL_7B_PATH'), prompt), - # (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH'), prompt) + (AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_ORIGIN_PATH'), prompt) ]) def test_optimize_model(Model, Tokenizer, model_path, prompt): diff --git a/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py b/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py index 5cec634be44..f45f017ef0b 100644 --- a/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py +++ b/python/llm/test/inference_gpu/test_transformers_api_RMSNorm.py @@ -30,7 +30,7 @@ TEST_MODEL_LIST = [ ("Llama2-7B", AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA2_7B_ORIGIN_PATH')), ("ChatGLM2-6B", AutoModel, AutoTokenizer, os.environ.get('CHATGLM2_6B_ORIGIN_PATH')), - # ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), + ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), ("Baichuan2-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('BAICHUAN2_7B_ORIGIN_PATH')), ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), ] diff --git a/python/llm/test/inference_gpu/test_transformers_api_attention.py b/python/llm/test/inference_gpu/test_transformers_api_attention.py index e5c39897cec..83f7aaebfc8 100644 --- a/python/llm/test/inference_gpu/test_transformers_api_attention.py +++ b/python/llm/test/inference_gpu/test_transformers_api_attention.py @@ -32,7 +32,7 @@ ("Llama2-7B", AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA2_7B_ORIGIN_PATH')), ("Falcon-7B", AutoModelForCausalLM, AutoTokenizer, os.environ.get('FALCON_7B_ORIGIN_PATH')), ("ChatGLM2-6B", AutoModel, AutoTokenizer, os.environ.get('CHATGLM2_6B_ORIGIN_PATH')), - # ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), + ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), ("Baichuan2-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('BAICHUAN2_7B_ORIGIN_PATH')), ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), ] diff --git a/python/llm/test/inference_gpu/test_transformers_api_mlp.py b/python/llm/test/inference_gpu/test_transformers_api_mlp.py index f7aba58fb72..70ba2e7b9f6 100644 --- a/python/llm/test/inference_gpu/test_transformers_api_mlp.py +++ b/python/llm/test/inference_gpu/test_transformers_api_mlp.py @@ -28,7 +28,7 @@ PROMPT = "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun" TEST_MODEL_LIST = [ ("Qwen-7B-Chat", AutoModelForCausalLM, AutoTokenizer, os.environ.get('QWEN_7B_ORIGIN_PATH')), - # ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), + ("Mistral-7B-Instruct-v0.1", AutoModelForCausalLM, AutoTokenizer, os.environ.get('MISTRAL_7B_INSTRUCT_V0_1_ORIGIN_PATH')), ("Llama2-7B", AutoModelForCausalLM, LlamaTokenizer, os.environ.get('LLAMA2_7B_ORIGIN_PATH')) ] From 8099a2cf97eed5a51d84d0602a09957d50b5fc58 Mon Sep 17 00:00:00 2001 From: jenniew Date: Sat, 4 May 2024 15:46:02 -0700 Subject: [PATCH 38/57] update --- .github/workflows/llm_unit_tests.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index abf86908709..09ebf4f4421 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -155,10 +155,13 @@ jobs: # echo "Directory $CHATGLM_INT4_CKPT_PATH not found. Downloading from FTP server..." # wget --no-verbose $LLM_FTP_URL/llm/ggml-actions/stable/chatglm2-6b-q4_0.bin -P $INT4_CKPT_DIR # fi + if [ -d $ORIGINAL_CHATGLM2_6B_PATH ]; then + rm -rf $ORIGINAL_CHATGLM2_6B_PATH + fi if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..." echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR" - wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/updated_for_4.36/chatglm2-6b -P $ORIGIN_DIR + wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR fi if [ ! -d $ORIGINAL_CODESHELL_7B_PATH ]; then echo "Directory $ORIGINAL_CODESHELL_7B_PATH not found. Downloading from FTP server..." @@ -313,6 +316,9 @@ jobs: echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR fi + if [ -d $CHATGLM2_6B_ORIGIN_PATH ]; then + rm -rf $CHATGLM2_6B_ORIGIN_PATH + fi if [ ! -d $CHATGLM2_6B_ORIGIN_PATH ]; then echo "Directory $CHATGLM2_6B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR @@ -321,6 +327,9 @@ jobs: echo "Directory $FALCON_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/falcon-7b-instruct-with-patch -P $ORIGIN_DIR fi + if [ -d $MPT_7B_ORIGIN_PATH ]; then + rm -rf $MPT_7B_ORIGIN_PATH + fi if [ ! -d $MPT_7B_ORIGIN_PATH ]; then echo "Directory $MPT_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/mpt-7b-chat -P $ORIGIN_DIR @@ -348,6 +357,9 @@ jobs: echo "Directory $QWEN_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Qwen-7B-Chat -P $ORIGIN_DIR fi + if [ -d $BAICHUAN2_7B_ORIGIN_PATH ]; then + rm -rf $BAICHUAN2_7B_ORIGIN_PATH + fi if [ ! -d $BAICHUAN2_7B_ORIGIN_PATH ]; then echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Baichuan2-7B-Chat -P $ORIGIN_DIR From 270ecb8718c17bac72ef62767cb399cdf7be51b0 Mon Sep 17 00:00:00 2001 From: jenniew Date: Sat, 4 May 2024 23:34:17 -0700 Subject: [PATCH 39/57] update --- .github/workflows/llm_performance_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 337f8361b28..a0411e002d1 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -87,7 +87,7 @@ jobs: source /opt/intel/oneapi/setvars.sh bash python/llm/test/run-llm-install-tests.sh - - name: Test on xpu(transformers==4.36.2) + - name: Test on xpu(transformers==4.36.0) shell: bash run: | date_for_test_version=$(date -d yesterday +%Y-%m-%d) From bc847bf75797a8755aedc4b5bdfd6ccd04c873ab Mon Sep 17 00:00:00 2001 From: jenniew Date: Sun, 5 May 2024 00:27:47 -0700 Subject: [PATCH 40/57] update --- .github/workflows/llm-ppl-evaluation.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/llm-ppl-evaluation.yml b/.github/workflows/llm-ppl-evaluation.yml index bfab5277f80..584d6fcd977 100644 --- a/.github/workflows/llm-ppl-evaluation.yml +++ b/.github/workflows/llm-ppl-evaluation.yml @@ -143,7 +143,8 @@ jobs: run: | echo "MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/" >> "$GITHUB_ENV" MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/ - wget -r -nH -nc --no-verbose --cut-dirs=1 ${LLM_FTP_URL}/llm/${{ matrix.model_name }} -P ${ORIGIN_DIR} + #wget -r -nH -nc --no-verbose --cut-dirs=1 ${LLM_FTP_URL}/llm/${{ matrix.model_name }} -P ${ORIGIN_DIR} + wget -r -nH --no-verbose --cut-dirs=1 ${LLM_FTP_URL}/llm/${{ matrix.model_name }} -P ${ORIGIN_DIR} - name: Upgrade packages shell: bash From 0fcaa40170a51e6f34f0549a8a6b9dc045016f7b Mon Sep 17 00:00:00 2001 From: jenniew Date: Sun, 5 May 2024 13:58:28 -0700 Subject: [PATCH 41/57] update --- .github/workflows/llm-ppl-evaluation.yml | 3 +-- .github/workflows/llm_unit_tests.yml | 12 ------------ 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/.github/workflows/llm-ppl-evaluation.yml b/.github/workflows/llm-ppl-evaluation.yml index 584d6fcd977..bfab5277f80 100644 --- a/.github/workflows/llm-ppl-evaluation.yml +++ b/.github/workflows/llm-ppl-evaluation.yml @@ -143,8 +143,7 @@ jobs: run: | echo "MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/" >> "$GITHUB_ENV" MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/ - #wget -r -nH -nc --no-verbose --cut-dirs=1 ${LLM_FTP_URL}/llm/${{ matrix.model_name }} -P ${ORIGIN_DIR} - wget -r -nH --no-verbose --cut-dirs=1 ${LLM_FTP_URL}/llm/${{ matrix.model_name }} -P ${ORIGIN_DIR} + wget -r -nH -nc --no-verbose --cut-dirs=1 ${LLM_FTP_URL}/llm/${{ matrix.model_name }} -P ${ORIGIN_DIR} - name: Upgrade packages shell: bash diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 09ebf4f4421..0590531a372 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -155,9 +155,6 @@ jobs: # echo "Directory $CHATGLM_INT4_CKPT_PATH not found. Downloading from FTP server..." # wget --no-verbose $LLM_FTP_URL/llm/ggml-actions/stable/chatglm2-6b-q4_0.bin -P $INT4_CKPT_DIR # fi - if [ -d $ORIGINAL_CHATGLM2_6B_PATH ]; then - rm -rf $ORIGINAL_CHATGLM2_6B_PATH - fi if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..." echo "wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR" @@ -316,9 +313,6 @@ jobs: echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR fi - if [ -d $CHATGLM2_6B_ORIGIN_PATH ]; then - rm -rf $CHATGLM2_6B_ORIGIN_PATH - fi if [ ! -d $CHATGLM2_6B_ORIGIN_PATH ]; then echo "Directory $CHATGLM2_6B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/chatglm2-6b -P $ORIGIN_DIR @@ -327,9 +321,6 @@ jobs: echo "Directory $FALCON_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/falcon-7b-instruct-with-patch -P $ORIGIN_DIR fi - if [ -d $MPT_7B_ORIGIN_PATH ]; then - rm -rf $MPT_7B_ORIGIN_PATH - fi if [ ! -d $MPT_7B_ORIGIN_PATH ]; then echo "Directory $MPT_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/mpt-7b-chat -P $ORIGIN_DIR @@ -357,9 +348,6 @@ jobs: echo "Directory $QWEN_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Qwen-7B-Chat -P $ORIGIN_DIR fi - if [ -d $BAICHUAN2_7B_ORIGIN_PATH ]; then - rm -rf $BAICHUAN2_7B_ORIGIN_PATH - fi if [ ! -d $BAICHUAN2_7B_ORIGIN_PATH ]; then echo "Directory $BAICHUAN2_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Baichuan2-7B-Chat -P $ORIGIN_DIR From 26aa194da8400226d10cd723cfc0e8f96009a44d Mon Sep 17 00:00:00 2001 From: jenniew Date: Mon, 6 May 2024 12:07:20 -0700 Subject: [PATCH 42/57] update --- .github/workflows/llm-c-evaluation.yml | 2 +- .github/workflows/llm-whisper-evaluation.yml | 2 +- .github/workflows/llm_performance_tests.yml | 82 +++++++++---------- .../llm_tests_for_stable_version_on_arc.yml | 12 +-- .../llm_tests_for_stable_version_on_spr.yml | 12 +-- 5 files changed, 55 insertions(+), 55 deletions(-) diff --git a/.github/workflows/llm-c-evaluation.yml b/.github/workflows/llm-c-evaluation.yml index 9ca18276c75..464579fcc26 100644 --- a/.github/workflows/llm-c-evaluation.yml +++ b/.github/workflows/llm-c-evaluation.yml @@ -16,7 +16,7 @@ on: branches: [main] paths: - ".github/workflows/llm-c-evaluation.yml" - # Allows you to run this workflow manually from the Actions tab + ## Allows you to run this workflow manually from the Actions tab workflow_dispatch: inputs: model_name: diff --git a/.github/workflows/llm-whisper-evaluation.yml b/.github/workflows/llm-whisper-evaluation.yml index e60eadbf1df..c26d66e726f 100644 --- a/.github/workflows/llm-whisper-evaluation.yml +++ b/.github/workflows/llm-whisper-evaluation.yml @@ -75,7 +75,7 @@ jobs: echo "runner=$runner" >> $GITHUB_OUTPUT llm-whisper-evaluation: - # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-whisper-evaluation' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + #if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-whisper-evaluation' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests needs: [llm-cpp-build, set-matrix] # please uncomment it for PR tests # needs: [set-matrix] # please comment it for PR tests strategy: diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index a0411e002d1..2b5706f20a9 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -338,45 +338,16 @@ jobs: # TODO: Put the ipex-llm related install process for win gpu into a action function # Please uncomment it and commment the install from pypi for PR tests - #- name: Download llm binary - # uses: ./.github/actions/llm/download-llm-binary - - #- name: Prepare for install ipex-llm from source - # shell: bash - # run: | - # sed -i 's/"bigdl-core-xe-21==" + CORE_XE_VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py - # sed -i 's/"bigdl-core-xe-esimd-21==" + CORE_XE_VERSION/"bigdl-core-xe-esimd-21"/g' python/llm/setup.py - - #- name: Install ipex-llm and other related packages (install from source) - # shell: cmd - # run: | - # call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y - # call conda activate igpu-perf - - # pip install --upgrade pip - # pip install --upgrade wheel - # pip install --upgrade omegaconf pandas - # pip install --upgrade tiktoken einops transformers_stream_generator - - # cd python\llm - # python setup.py clean --all bdist_wheel --win - # if not exist dist\ipex_llm*.whl (exit /b 1) - # for %%i in (dist\ipex_llm*.whl) do set whl_name=%%i - - # pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - # if %ERRORLEVEL% neq 0 (exit /b 1) - # pip install transformers==4.36.2 - # pip list - - # call conda deactivate + - name: Download llm binary + uses: ./.github/actions/llm/download-llm-binary - - name: Determine desired ipex-llm version + - name: Prepare for install ipex-llm from source shell: bash run: | - test_version_date=`date -d 'yesterday' '+%Y%m%d'` - echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV" + sed -i 's/"bigdl-core-xe-21==" + CORE_XE_VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py + sed -i 's/"bigdl-core-xe-esimd-21==" + CORE_XE_VERSION/"bigdl-core-xe-esimd-21"/g' python/llm/setup.py - - name: Install ipex-llm and other related packages (install from pypi) + - name: Install ipex-llm and other related packages (install from source) shell: cmd run: | call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y @@ -387,17 +358,46 @@ jobs: pip install --upgrade omegaconf pandas pip install --upgrade tiktoken einops transformers_stream_generator - pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - pip show ipex-llm | findstr %TEST_VERSION_DATE% - if %ERRORLEVEL% neq 0 ( - echo "Did not install ipex-llm with excepted version %TEST_VERSION_DATE%" - exit /b 1 - ) + cd python\llm + python setup.py clean --all bdist_wheel --win + if not exist dist\ipex_llm*.whl (exit /b 1) + for %%i in (dist\ipex_llm*.whl) do set whl_name=%%i + + pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + if %ERRORLEVEL% neq 0 (exit /b 1) pip install transformers==4.36.2 pip list call conda deactivate + #- name: Determine desired ipex-llm version + # shell: bash + # run: | + # test_version_date=`date -d 'yesterday' '+%Y%m%d'` + # echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV" + + #- name: Install ipex-llm and other related packages (install from pypi) + # shell: cmd + # run: | + # call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y + # call conda activate igpu-perf + + # pip install --upgrade pip + # pip install --upgrade wheel + # pip install --upgrade omegaconf pandas + # pip install --upgrade tiktoken einops transformers_stream_generator + + # pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + # pip show ipex-llm | findstr %TEST_VERSION_DATE% + # if %ERRORLEVEL% neq 0 ( + # echo "Did not install ipex-llm with excepted version %TEST_VERSION_DATE%" + # exit /b 1 + # ) + # pip install transformers==4.36.2 + # pip list + + # call conda deactivate + - name: Create env for html generation shell: cmd run: | diff --git a/.github/workflows/llm_tests_for_stable_version_on_arc.yml b/.github/workflows/llm_tests_for_stable_version_on_arc.yml index 1b8c48d972d..297236af77d 100644 --- a/.github/workflows/llm_tests_for_stable_version_on_arc.yml +++ b/.github/workflows/llm_tests_for_stable_version_on_arc.yml @@ -10,12 +10,12 @@ permissions: # Controls when the action will run. on: - # pull_request: - # branches: [main] - # paths: - # - ".github/workflows/llm_performance_tests.yml" - # - "python/llm/test/benchmark/**" - # - "python/llm/dev/benchmark/all-in-one/**" + pull_request: + branches: [main] + paths: + - ".github/workflows/llm_performance_tests.yml" + - "python/llm/test/benchmark/**" + - "python/llm/dev/benchmark/all-in-one/**" workflow_dispatch: workflow_call: diff --git a/.github/workflows/llm_tests_for_stable_version_on_spr.yml b/.github/workflows/llm_tests_for_stable_version_on_spr.yml index d852499c57b..8a18984cf5a 100644 --- a/.github/workflows/llm_tests_for_stable_version_on_spr.yml +++ b/.github/workflows/llm_tests_for_stable_version_on_spr.yml @@ -10,12 +10,12 @@ permissions: # Controls when the action will run. on: - # pull_request: - # branches: [main] - # paths: - # - ".github/workflows/llm_performance_tests.yml" - # - "python/llm/test/benchmark/**" - # - "python/llm/dev/benchmark/all-in-one/**" + pull_request: + branches: [main] + paths: + - ".github/workflows/llm_performance_tests.yml" + - "python/llm/test/benchmark/**" + - "python/llm/dev/benchmark/all-in-one/**" workflow_dispatch: workflow_call: From 22d0bf64007ec003f7a22dd3c795709ce2b3c638 Mon Sep 17 00:00:00 2001 From: jenniew Date: Mon, 6 May 2024 15:24:26 -0700 Subject: [PATCH 43/57] update --- .../stable-version-arc-perf-test-fp8-436.yaml | 27 +++++++++++++++++++ .../stable-version-arc-perf-test-fp8.yaml | 22 +++++++-------- ...le-version-arc-perf-test-sym_int4-436.yaml | 27 +++++++++++++++++++ ...stable-version-arc-perf-test-sym_int4.yaml | 22 +++++++-------- ...table-version-arc-stress-test-fp8-436.yaml | 18 +++++++++++++ .../stable-version-arc-stress-test-fp8.yaml | 8 +++--- ...-version-arc-stress-test-sym_int4-436.yaml | 16 +++++++++++ ...able-version-arc-stress-test-sym_int4.yaml | 6 ++--- .../stable-version-cpu-perf-test-436.yaml | 26 ++++++++++++++++++ .../stable-version-cpu-perf-test.yaml | 10 +++---- .../stable-version-cpu-stress-test.yaml | 10 +++---- 11 files changed, 153 insertions(+), 39 deletions(-) create mode 100644 python/llm/test/benchmark/stable-version-arc-perf-test-fp8-436.yaml create mode 100644 python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4-436.yaml create mode 100644 python/llm/test/benchmark/stable-version-arc-stress-test-fp8-436.yaml create mode 100644 python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4-436.yaml create mode 100644 python/llm/test/benchmark/stable-version-cpu-perf-test-436.yaml diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-fp8-436.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-fp8-436.yaml new file mode 100644 index 00000000000..0a0795da086 --- /dev/null +++ b/python/llm/test/benchmark/stable-version-arc-perf-test-fp8-436.yaml @@ -0,0 +1,27 @@ +repo_id: + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' +local_model_hub: '/mnt/disk0/models' +warm_up: 1 +num_trials: 3 +num_beams: 1 # default to greedy search +low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 +in_out_pairs: + - '32-32' + - '512-256' + - '1024-128' + - '2048-256' +test_api: + - "transformer_int4_gpu" # on Intel GPU +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) +exclude: + - 'THUDM/chatglm2-6b:2048:8' + - 'THUDM/chatglm3-6b:2048:8' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' + - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' + - 'baichuan-inc/Baichuan2-7B-Chat:512:8' + - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml index 00884dbe21d..87153f83dcf 100644 --- a/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml +++ b/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml @@ -1,8 +1,8 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b' - - 'baichuan-inc/Baichuan2-7B-Chat' +# - 'THUDM/chatglm2-6b' +# - 'THUDM/chatglm3-6b' +# - 'baichuan-inc/Baichuan2-7B-Chat' - 'Qwen/Qwen-7B-Chat' local_model_hub: '/mnt/disk1/models' warm_up: 1 @@ -23,14 +23,14 @@ exclude: - 'meta-llama/Llama-2-7b-chat-hf:512:8' - 'meta-llama/Llama-2-7b-chat-hf:1024:8' - 'meta-llama/Llama-2-7b-chat-hf:2048:8' - - 'THUDM/chatglm2-6b:2048:8' - - 'THUDM/chatglm3-6b:2048:8' - - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' - - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' - - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' - - 'baichuan-inc/Baichuan2-7B-Chat:512:8' - - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' - - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' +# - 'THUDM/chatglm2-6b:2048:8' +# - 'THUDM/chatglm3-6b:2048:8' +# - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' +# - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' +# - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' +# - 'baichuan-inc/Baichuan2-7B-Chat:512:8' +# - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' +# - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' - 'Qwen/Qwen-7B-Chat:2048:1' - 'Qwen/Qwen-7B-Chat:1024:2' - 'Qwen/Qwen-7B-Chat:2048:2' diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4-436.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4-436.yaml new file mode 100644 index 00000000000..549b46625fc --- /dev/null +++ b/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4-436.yaml @@ -0,0 +1,27 @@ +repo_id: + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' +local_model_hub: '/mnt/disk0/models' +warm_up: 1 +num_trials: 3 +num_beams: 1 # default to greedy search +low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 +in_out_pairs: + - '32-32' + - '512-256' + - '1024-128' + - '2048-256' +test_api: + - "transformer_int4_gpu" # on Intel GPU +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) +exclude: + - 'THUDM/chatglm2-6b:2048:8' + - 'THUDM/chatglm3-6b:2048:8' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' + - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' + - 'baichuan-inc/Baichuan2-7B-Chat:512:8' + - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml index cb9f7b30e9c..d31f892fe17 100644 --- a/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml +++ b/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml @@ -1,8 +1,8 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b' - - 'baichuan-inc/Baichuan2-7B-Chat' +# - 'THUDM/chatglm2-6b' +# - 'THUDM/chatglm3-6b' +# - 'baichuan-inc/Baichuan2-7B-Chat' - 'Qwen/Qwen-7B-Chat' local_model_hub: '/mnt/disk1/models' warm_up: 1 @@ -22,14 +22,14 @@ exclude: - 'meta-llama/Llama-2-7b-chat-hf:2048:4' - 'meta-llama/Llama-2-7b-chat-hf:1024:8' - 'meta-llama/Llama-2-7b-chat-hf:2048:8' - - 'THUDM/chatglm2-6b:2048:8' - - 'THUDM/chatglm3-6b:2048:8' - - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' - - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' - - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' - - 'baichuan-inc/Baichuan2-7B-Chat:512:8' - - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' - - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' +# - 'THUDM/chatglm2-6b:2048:8' +# - 'THUDM/chatglm3-6b:2048:8' +# - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' +# - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' +# - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' +# - 'baichuan-inc/Baichuan2-7B-Chat:512:8' +# - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' +# - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' - 'Qwen/Qwen-7B-Chat:2048:2' - 'Qwen/Qwen-7B-Chat:1024:4' - 'Qwen/Qwen-7B-Chat:2048:4' diff --git a/python/llm/test/benchmark/stable-version-arc-stress-test-fp8-436.yaml b/python/llm/test/benchmark/stable-version-arc-stress-test-fp8-436.yaml new file mode 100644 index 00000000000..e887328e435 --- /dev/null +++ b/python/llm/test/benchmark/stable-version-arc-stress-test-fp8-436.yaml @@ -0,0 +1,18 @@ +repo_id: + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' +local_model_hub: '/mnt/disk0/models' +warm_up: 10 +num_trials: 100 +num_beams: 1 # default to greedy search +low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 +in_out_pairs: + - '1024-512' + - '2048-512' +test_api: + - "transformer_int4_gpu" # on Intel GPU +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) +exclude: + - 'baichuan-inc/Baichuan2-7B-Chat:2048' diff --git a/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml b/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml index bc64ad92305..7e2f14042ef 100644 --- a/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml +++ b/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml @@ -1,8 +1,8 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b' - - 'baichuan-inc/Baichuan2-7B-Chat' +# - 'THUDM/chatglm2-6b' +# - 'THUDM/chatglm3-6b' +# - 'baichuan-inc/Baichuan2-7B-Chat' - 'Qwen/Qwen-7B-Chat' local_model_hub: '/mnt/disk1/models' warm_up: 10 @@ -17,5 +17,5 @@ test_api: - "transformer_int4_gpu" # on Intel GPU cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) exclude: - - 'baichuan-inc/Baichuan2-7B-Chat:2048' +# - 'baichuan-inc/Baichuan2-7B-Chat:2048' - 'Qwen/Qwen-7B-Chat:2048' \ No newline at end of file diff --git a/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4-436.yaml b/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4-436.yaml new file mode 100644 index 00000000000..ef0b6324eb5 --- /dev/null +++ b/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4-436.yaml @@ -0,0 +1,16 @@ +repo_id: + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' + local_model_hub: '/mnt/disk0/models' +warm_up: 10 +num_trials: 100 +num_beams: 1 # default to greedy search +low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 +in_out_pairs: + - '1024-512' + - '2048-512' +test_api: + - "transformer_int4_gpu" # on Intel GPU +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml b/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml index 119a67e32fa..405c0010a05 100644 --- a/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml +++ b/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml @@ -1,8 +1,8 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b' - - 'baichuan-inc/Baichuan2-7B-Chat' +# - 'THUDM/chatglm2-6b' +# - 'THUDM/chatglm3-6b' +# - 'baichuan-inc/Baichuan2-7B-Chat' - 'Qwen/Qwen-7B-Chat' local_model_hub: '/mnt/disk1/models' warm_up: 10 diff --git a/python/llm/test/benchmark/stable-version-cpu-perf-test-436.yaml b/python/llm/test/benchmark/stable-version-cpu-perf-test-436.yaml new file mode 100644 index 00000000000..c7ba0c1ded8 --- /dev/null +++ b/python/llm/test/benchmark/stable-version-cpu-perf-test-436.yaml @@ -0,0 +1,26 @@ +repo_id: + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' +local_model_hub: '/mnt/disk1/models/updated_for_4.36' +warm_up: 1 +num_trials: 3 +num_beams: 1 # default to greedy search +low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) +batch_size: 1 # default to 1 +in_out_pairs: + - '32-32' + - '1024-128' + - '2048-256' +test_api: + - "transformer_int4" + # - "native_int4" + # - "optimize_model" + # - "pytorch_autocast_bf16" + # - "ipex_fp16_gpu" # on Intel GPU + # - "transformer_int4_gpu" # on Intel GPU + # - "optimize_model_gpu" # on Intel GPU + # - "deepspeed_transformer_int4_cpu" # on Intel SPR Server + # - "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory) +cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/benchmark/stable-version-cpu-perf-test.yaml b/python/llm/test/benchmark/stable-version-cpu-perf-test.yaml index aa9158bdd13..6095e2ed805 100644 --- a/python/llm/test/benchmark/stable-version-cpu-perf-test.yaml +++ b/python/llm/test/benchmark/stable-version-cpu-perf-test.yaml @@ -1,12 +1,12 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b' - - 'baichuan-inc/Baichuan2-7B-Chat' - - 'baichuan-inc/Baichuan2-13B-Chat' +# - 'THUDM/chatglm2-6b' +# - 'THUDM/chatglm3-6b' +# - 'baichuan-inc/Baichuan2-7B-Chat' +# - 'baichuan-inc/Baichuan2-13B-Chat' - 'Qwen/Qwen-14B-Chat' -local_model_hub: '/models' +local_model_hub: '/mnt/disk1/models' warm_up: 1 num_trials: 3 num_beams: 1 # default to greedy search diff --git a/python/llm/test/benchmark/stable-version-cpu-stress-test.yaml b/python/llm/test/benchmark/stable-version-cpu-stress-test.yaml index 38aeb375910..ee17bbaf53a 100644 --- a/python/llm/test/benchmark/stable-version-cpu-stress-test.yaml +++ b/python/llm/test/benchmark/stable-version-cpu-stress-test.yaml @@ -1,12 +1,12 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b' - - 'baichuan-inc/Baichuan2-7B-Chat' - - 'baichuan-inc/Baichuan2-13B-Chat' +# - 'THUDM/chatglm2-6b' +# - 'THUDM/chatglm3-6b' +# - 'baichuan-inc/Baichuan2-7B-Chat' +# - 'baichuan-inc/Baichuan2-13B-Chat' - 'Qwen/Qwen-14B-Chat' -local_model_hub: '/models' +local_model_hub: '/mnt/disk1/models' warm_up: 3 num_trials: 50 num_beams: 1 # default to greedy search From 65ea875221e27c7f3816c00a24a6e6766c4cbc9f Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 7 May 2024 15:18:51 -0700 Subject: [PATCH 44/57] update --- .github/workflows/llm-ppl-evaluation.yml | 5 ++-- .github/workflows/llm_performance_tests.yml | 23 ++----------------- python/llm/test/benchmark/arc-perf-test.yaml | 12 +++++----- python/llm/test/benchmark/cpu-perf-test.yaml | 8 +++---- .../stable-version-arc-perf-test-fp8.yaml | 6 ++--- ...stable-version-arc-perf-test-sym_int4.yaml | 6 ++--- .../stable-version-arc-stress-test-fp8.yaml | 6 ++--- ...able-version-arc-stress-test-sym_int4.yaml | 6 ++--- .../stable-version-cpu-perf-test.yaml | 8 +++---- .../stable-version-cpu-stress-test.yaml | 8 +++---- 10 files changed, 35 insertions(+), 53 deletions(-) diff --git a/.github/workflows/llm-ppl-evaluation.yml b/.github/workflows/llm-ppl-evaluation.yml index bfab5277f80..ceb53f8b378 100644 --- a/.github/workflows/llm-ppl-evaluation.yml +++ b/.github/workflows/llm-ppl-evaluation.yml @@ -71,7 +71,7 @@ jobs: if: ${{github.event_name == 'pull_request'}} env: PR_MATRIX_SEQ_LEN: '["512"]' - PR_MATRIX_MODEL_NAME: '["Llama-2-7b-chat-hf", "chatglm3-6b"]' + PR_MATRIX_MODEL_NAME: '["Llama-2-7b-chat-hf", "chatglm3-6b", "chatglm3-6b", "Baichuan2-7B-Chat","mpt-7b-chat"]' PR_MATRIX_PRECISION: '["sym_int4"]' PR_LABELS: '["self-hosted", "llm", "temp-arc01"]' run: | @@ -143,7 +143,8 @@ jobs: run: | echo "MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/" >> "$GITHUB_ENV" MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/ - wget -r -nH -nc --no-verbose --cut-dirs=1 ${LLM_FTP_URL}/llm/${{ matrix.model_name }} -P ${ORIGIN_DIR} + #wget -r -nH -nc --no-verbose --cut-dirs=1 ${LLM_FTP_URL}/llm/${{ matrix.model_name }} -P ${ORIGIN_DIR} + wget -r -nH --no-verbose --cut-dirs=1 ${LLM_FTP_URL}/llm/${{ matrix.model_name }} -P ${ORIGIN_DIR} - name: Upgrade packages shell: bash diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 2b5706f20a9..55a6bcb67d6 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -102,13 +102,6 @@ jobs: # change csv name sed -i 's/{today}/{today}_test1/g' run.py python run.py - # run updated models for 4.36 - cd - - cp python/llm/test/benchmark/arc-perf-test-436.yaml python/llm/dev/benchmark/all-in-one/config.yaml - cd python/llm/dev/benchmark/all-in-one - # change csv name - sed -i 's/test1/test2/g' run.py - python run.py #- name: Test on xpu(transformers==4.34.0) # shell: bash @@ -135,7 +128,7 @@ jobs: cp python/llm/test/benchmark/arc-perf-transformers-437.yaml python/llm/dev/benchmark/all-in-one/config.yaml cd python/llm/dev/benchmark/all-in-one # change csv name - sed -i 's/test2/test3/g' run.py + sed -i 's/test1/test2/g' run.py python run.py - name: Concat csv and generate html @@ -157,8 +150,7 @@ jobs: run: | cd python/llm/dev/benchmark/all-in-one python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml - python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-test-436.yaml - python ../../../test/benchmark/check_results.py -c test3 -y ../../../test/benchmark/arc-perf-transformers-437.yaml + python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-437.yaml # python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml find . -name "*test*.csv" -delete if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then @@ -227,18 +219,7 @@ jobs: export OMP_NUM_THREADS=48 # hide time info sed -i 's/str(end - st)/"xxxxxx"/g' run.py - # change csv name - sed -i 's/{today}/{today}_test1/g' run.py python run.py - # run updated models for 4.36 - cd - - cp python/llm/test/benchmark/cpu-perf-test-436.yaml python/llm/dev/benchmark/all-in-one/config.yaml - cd python/llm/dev/benchmark/all-in-one - # change csv name - sed -i 's/test1/test2/g' run.py - python run.py - python ../../../test/benchmark/concat_csv.py - find . -name "*test*.csv" -delete cp ./*.csv /mnt/disk1/models/nightly_perf_cpu cd ../../../test/benchmark python -m pip install pandas==1.5.3 diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml index 7552b7f84f1..4f1d6159dc9 100644 --- a/python/llm/test/benchmark/arc-perf-test.yaml +++ b/python/llm/test/benchmark/arc-perf-test.yaml @@ -1,22 +1,22 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' -# - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm2-6b' - 'THUDM/chatglm3-6b-4bit' - 'tiiuae/falcon-7b-instruct-with-patch' -# - 'mosaicml/mpt-7b-chat' + - 'mosaicml/mpt-7b-chat' - 'redpajama/gptneox-7b-redpajama-bf16' - 'bigcode/starcoder-15.5b-4bit' - 'databricks/dolly-v1-6b' - 'databricks/dolly-v2-7b' - 'databricks/dolly-v2-12b' -# - 'internlm/internlm-chat-7b' + - 'internlm/internlm-chat-7b' - 'Qwen/Qwen-7B-Chat' - 'BAAI/AquilaChat-7B' -# - 'baichuan-inc/Baichuan2-7B-Chat' -# - 'baichuan-inc/Baichuan2-13B-Chat-4bit' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat-4bit' - 'bigscience/bloomz-7b1' -# - 'fnlp/moss-moon-003-sft-4bit' +# - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+ - 'mistralai/Mistral-7B-v0.1' local_model_hub: '/mnt/disk1/models' warm_up: 1 diff --git a/python/llm/test/benchmark/cpu-perf-test.yaml b/python/llm/test/benchmark/cpu-perf-test.yaml index 6095e2ed805..92b12750dbb 100644 --- a/python/llm/test/benchmark/cpu-perf-test.yaml +++ b/python/llm/test/benchmark/cpu-perf-test.yaml @@ -1,10 +1,10 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' -# - 'THUDM/chatglm2-6b' -# - 'THUDM/chatglm3-6b' -# - 'baichuan-inc/Baichuan2-7B-Chat' -# - 'baichuan-inc/Baichuan2-13B-Chat' + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' - 'Qwen/Qwen-14B-Chat' local_model_hub: '/mnt/disk1/models' warm_up: 1 diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml index 87153f83dcf..0cd4a9b2fe3 100644 --- a/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml +++ b/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml @@ -1,8 +1,8 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' -# - 'THUDM/chatglm2-6b' -# - 'THUDM/chatglm3-6b' -# - 'baichuan-inc/Baichuan2-7B-Chat' + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' - 'Qwen/Qwen-7B-Chat' local_model_hub: '/mnt/disk1/models' warm_up: 1 diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml index d31f892fe17..b1a2b2536c0 100644 --- a/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml +++ b/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml @@ -1,8 +1,8 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' -# - 'THUDM/chatglm2-6b' -# - 'THUDM/chatglm3-6b' -# - 'baichuan-inc/Baichuan2-7B-Chat' + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' - 'Qwen/Qwen-7B-Chat' local_model_hub: '/mnt/disk1/models' warm_up: 1 diff --git a/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml b/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml index 7e2f14042ef..80a67d71e3f 100644 --- a/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml +++ b/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml @@ -1,8 +1,8 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' -# - 'THUDM/chatglm2-6b' -# - 'THUDM/chatglm3-6b' -# - 'baichuan-inc/Baichuan2-7B-Chat' + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' - 'Qwen/Qwen-7B-Chat' local_model_hub: '/mnt/disk1/models' warm_up: 10 diff --git a/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml b/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml index 405c0010a05..119a67e32fa 100644 --- a/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml +++ b/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4.yaml @@ -1,8 +1,8 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' -# - 'THUDM/chatglm2-6b' -# - 'THUDM/chatglm3-6b' -# - 'baichuan-inc/Baichuan2-7B-Chat' + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' - 'Qwen/Qwen-7B-Chat' local_model_hub: '/mnt/disk1/models' warm_up: 10 diff --git a/python/llm/test/benchmark/stable-version-cpu-perf-test.yaml b/python/llm/test/benchmark/stable-version-cpu-perf-test.yaml index 6095e2ed805..92b12750dbb 100644 --- a/python/llm/test/benchmark/stable-version-cpu-perf-test.yaml +++ b/python/llm/test/benchmark/stable-version-cpu-perf-test.yaml @@ -1,10 +1,10 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' -# - 'THUDM/chatglm2-6b' -# - 'THUDM/chatglm3-6b' -# - 'baichuan-inc/Baichuan2-7B-Chat' -# - 'baichuan-inc/Baichuan2-13B-Chat' + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' - 'Qwen/Qwen-14B-Chat' local_model_hub: '/mnt/disk1/models' warm_up: 1 diff --git a/python/llm/test/benchmark/stable-version-cpu-stress-test.yaml b/python/llm/test/benchmark/stable-version-cpu-stress-test.yaml index ee17bbaf53a..f8c75489659 100644 --- a/python/llm/test/benchmark/stable-version-cpu-stress-test.yaml +++ b/python/llm/test/benchmark/stable-version-cpu-stress-test.yaml @@ -1,10 +1,10 @@ repo_id: - 'meta-llama/Llama-2-7b-chat-hf' - 'meta-llama/Llama-2-13b-chat-hf' -# - 'THUDM/chatglm2-6b' -# - 'THUDM/chatglm3-6b' -# - 'baichuan-inc/Baichuan2-7B-Chat' -# - 'baichuan-inc/Baichuan2-13B-Chat' + - 'THUDM/chatglm2-6b' + - 'THUDM/chatglm3-6b' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' - 'Qwen/Qwen-14B-Chat' local_model_hub: '/mnt/disk1/models' warm_up: 3 From 4f98a3887b25831ddd3870c67c492ae91b52e252 Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 7 May 2024 15:27:18 -0700 Subject: [PATCH 45/57] update --- .../stable-version-arc-perf-test-fp8.yaml | 16 ++++++++-------- .../stable-version-arc-perf-test-sym_int4.yaml | 16 ++++++++-------- .../stable-version-arc-stress-test-fp8.yaml | 2 +- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml index 0cd4a9b2fe3..00884dbe21d 100644 --- a/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml +++ b/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml @@ -23,14 +23,14 @@ exclude: - 'meta-llama/Llama-2-7b-chat-hf:512:8' - 'meta-llama/Llama-2-7b-chat-hf:1024:8' - 'meta-llama/Llama-2-7b-chat-hf:2048:8' -# - 'THUDM/chatglm2-6b:2048:8' -# - 'THUDM/chatglm3-6b:2048:8' -# - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' -# - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' -# - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' -# - 'baichuan-inc/Baichuan2-7B-Chat:512:8' -# - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' -# - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' + - 'THUDM/chatglm2-6b:2048:8' + - 'THUDM/chatglm3-6b:2048:8' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' + - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' + - 'baichuan-inc/Baichuan2-7B-Chat:512:8' + - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' - 'Qwen/Qwen-7B-Chat:2048:1' - 'Qwen/Qwen-7B-Chat:1024:2' - 'Qwen/Qwen-7B-Chat:2048:2' diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml index b1a2b2536c0..cb9f7b30e9c 100644 --- a/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml +++ b/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml @@ -22,14 +22,14 @@ exclude: - 'meta-llama/Llama-2-7b-chat-hf:2048:4' - 'meta-llama/Llama-2-7b-chat-hf:1024:8' - 'meta-llama/Llama-2-7b-chat-hf:2048:8' -# - 'THUDM/chatglm2-6b:2048:8' -# - 'THUDM/chatglm3-6b:2048:8' -# - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' -# - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' -# - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' -# - 'baichuan-inc/Baichuan2-7B-Chat:512:8' -# - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' -# - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' + - 'THUDM/chatglm2-6b:2048:8' + - 'THUDM/chatglm3-6b:2048:8' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' + - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' + - 'baichuan-inc/Baichuan2-7B-Chat:512:8' + - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' - 'Qwen/Qwen-7B-Chat:2048:2' - 'Qwen/Qwen-7B-Chat:1024:4' - 'Qwen/Qwen-7B-Chat:2048:4' diff --git a/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml b/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml index 80a67d71e3f..bc64ad92305 100644 --- a/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml +++ b/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml @@ -17,5 +17,5 @@ test_api: - "transformer_int4_gpu" # on Intel GPU cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) exclude: -# - 'baichuan-inc/Baichuan2-7B-Chat:2048' + - 'baichuan-inc/Baichuan2-7B-Chat:2048' - 'Qwen/Qwen-7B-Chat:2048' \ No newline at end of file From c64ec3389dd2e8f86b06d7b544b7dfaad49dddc9 Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 7 May 2024 16:45:38 -0700 Subject: [PATCH 46/57] update --- .github/workflows/llm-ppl-evaluation.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llm-ppl-evaluation.yml b/.github/workflows/llm-ppl-evaluation.yml index ceb53f8b378..faf24ea3607 100644 --- a/.github/workflows/llm-ppl-evaluation.yml +++ b/.github/workflows/llm-ppl-evaluation.yml @@ -71,7 +71,7 @@ jobs: if: ${{github.event_name == 'pull_request'}} env: PR_MATRIX_SEQ_LEN: '["512"]' - PR_MATRIX_MODEL_NAME: '["Llama-2-7b-chat-hf", "chatglm3-6b", "chatglm3-6b", "Baichuan2-7B-Chat","mpt-7b-chat"]' + PR_MATRIX_MODEL_NAME: '["Llama-2-7b-chat-hf", "chatglm3-6b", "chatglm2-6b", "Baichuan2-7B-Chat","mpt-7b-chat"]' PR_MATRIX_PRECISION: '["sym_int4"]' PR_LABELS: '["self-hosted", "llm", "temp-arc01"]' run: | From 9c9e92d506053a106a5026c8af2a280bc73e0e4c Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 7 May 2024 22:48:09 -0700 Subject: [PATCH 47/57] update --- python/llm/test/benchmark/arc-perf-test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml index 4f1d6159dc9..df4b1465625 100644 --- a/python/llm/test/benchmark/arc-perf-test.yaml +++ b/python/llm/test/benchmark/arc-perf-test.yaml @@ -35,3 +35,4 @@ exclude: # - 'fnlp/moss-moon-003-sft-4bit:1024' # - 'fnlp/moss-moon-003-sft-4bit:2048' - 'bigscience/bloomz-7b1:2048' + - 'mosaicml/mpt-7b-chat:2048' From 4b04c45b38733f3d98e3ce7e0084f650f13143b6 Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 8 May 2024 11:34:29 -0700 Subject: [PATCH 48/57] update --- .github/workflows/llm-ppl-evaluation.yml | 7 +++-- .../llm/test/benchmark/arc-perf-test-436.yaml | 22 --------------- python/llm/test/benchmark/arc-perf-test.yaml | 2 +- .../llm/test/benchmark/cpu-perf-test-436.yaml | 26 ------------------ .../stable-version-arc-perf-test-fp8-436.yaml | 27 ------------------- ...le-version-arc-perf-test-sym_int4-436.yaml | 27 ------------------- ...table-version-arc-stress-test-fp8-436.yaml | 18 ------------- ...-version-arc-stress-test-sym_int4-436.yaml | 16 ----------- .../stable-version-cpu-perf-test-436.yaml | 26 ------------------ 9 files changed, 4 insertions(+), 167 deletions(-) delete mode 100644 python/llm/test/benchmark/arc-perf-test-436.yaml delete mode 100644 python/llm/test/benchmark/cpu-perf-test-436.yaml delete mode 100644 python/llm/test/benchmark/stable-version-arc-perf-test-fp8-436.yaml delete mode 100644 python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4-436.yaml delete mode 100644 python/llm/test/benchmark/stable-version-arc-stress-test-fp8-436.yaml delete mode 100644 python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4-436.yaml delete mode 100644 python/llm/test/benchmark/stable-version-cpu-perf-test-436.yaml diff --git a/.github/workflows/llm-ppl-evaluation.yml b/.github/workflows/llm-ppl-evaluation.yml index faf24ea3607..4efc6f2f795 100644 --- a/.github/workflows/llm-ppl-evaluation.yml +++ b/.github/workflows/llm-ppl-evaluation.yml @@ -71,7 +71,7 @@ jobs: if: ${{github.event_name == 'pull_request'}} env: PR_MATRIX_SEQ_LEN: '["512"]' - PR_MATRIX_MODEL_NAME: '["Llama-2-7b-chat-hf", "chatglm3-6b", "chatglm2-6b", "Baichuan2-7B-Chat","mpt-7b-chat"]' + PR_MATRIX_MODEL_NAME: '["Llama-2-7b-chat-hf", "chatglm3-6b"]' PR_MATRIX_PRECISION: '["sym_int4"]' PR_LABELS: '["self-hosted", "llm", "temp-arc01"]' run: | @@ -143,9 +143,8 @@ jobs: run: | echo "MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/" >> "$GITHUB_ENV" MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/ - #wget -r -nH -nc --no-verbose --cut-dirs=1 ${LLM_FTP_URL}/llm/${{ matrix.model_name }} -P ${ORIGIN_DIR} - wget -r -nH --no-verbose --cut-dirs=1 ${LLM_FTP_URL}/llm/${{ matrix.model_name }} -P ${ORIGIN_DIR} - + wget -r -nH -nc --no-verbose --cut-dirs=1 ${LLM_FTP_URL}/llm/${{ matrix.model_name }} -P ${ORIGIN_DIR} + - name: Upgrade packages shell: bash run: | diff --git a/python/llm/test/benchmark/arc-perf-test-436.yaml b/python/llm/test/benchmark/arc-perf-test-436.yaml deleted file mode 100644 index f619987bfc9..00000000000 --- a/python/llm/test/benchmark/arc-perf-test-436.yaml +++ /dev/null @@ -1,22 +0,0 @@ -repo_id: - - 'THUDM/chatglm2-6b' - - 'mosaicml/mpt-7b-chat' - - 'baichuan-inc/Baichuan2-7B-Chat' - - 'baichuan-inc/Baichuan2-13B-Chat-4bit' - - 'internlm/internlm-chat-7b' -local_model_hub: '/mnt/disk0/models' -warm_up: 1 -num_trials: 3 -num_beams: 1 # default to greedy search -low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) -batch_size: 1 # default to 1 -in_out_pairs: - - '32-32' - - '1024-128' - - '2048-256' -test_api: - - "transformer_int4_gpu" # on Intel GPU -cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) -exclude: - - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' - - 'mosaicml/mpt-7b-chat:2048' diff --git a/python/llm/test/benchmark/arc-perf-test.yaml b/python/llm/test/benchmark/arc-perf-test.yaml index df4b1465625..895588ce4e4 100644 --- a/python/llm/test/benchmark/arc-perf-test.yaml +++ b/python/llm/test/benchmark/arc-perf-test.yaml @@ -34,5 +34,5 @@ cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu w exclude: # - 'fnlp/moss-moon-003-sft-4bit:1024' # - 'fnlp/moss-moon-003-sft-4bit:2048' + - 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048' - 'bigscience/bloomz-7b1:2048' - - 'mosaicml/mpt-7b-chat:2048' diff --git a/python/llm/test/benchmark/cpu-perf-test-436.yaml b/python/llm/test/benchmark/cpu-perf-test-436.yaml deleted file mode 100644 index c7ba0c1ded8..00000000000 --- a/python/llm/test/benchmark/cpu-perf-test-436.yaml +++ /dev/null @@ -1,26 +0,0 @@ -repo_id: - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b' - - 'baichuan-inc/Baichuan2-7B-Chat' - - 'baichuan-inc/Baichuan2-13B-Chat' -local_model_hub: '/mnt/disk1/models/updated_for_4.36' -warm_up: 1 -num_trials: 3 -num_beams: 1 # default to greedy search -low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) -batch_size: 1 # default to 1 -in_out_pairs: - - '32-32' - - '1024-128' - - '2048-256' -test_api: - - "transformer_int4" - # - "native_int4" - # - "optimize_model" - # - "pytorch_autocast_bf16" - # - "ipex_fp16_gpu" # on Intel GPU - # - "transformer_int4_gpu" # on Intel GPU - # - "optimize_model_gpu" # on Intel GPU - # - "deepspeed_transformer_int4_cpu" # on Intel SPR Server - # - "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory) -cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-fp8-436.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-fp8-436.yaml deleted file mode 100644 index 0a0795da086..00000000000 --- a/python/llm/test/benchmark/stable-version-arc-perf-test-fp8-436.yaml +++ /dev/null @@ -1,27 +0,0 @@ -repo_id: - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b' - - 'baichuan-inc/Baichuan2-7B-Chat' -local_model_hub: '/mnt/disk0/models' -warm_up: 1 -num_trials: 3 -num_beams: 1 # default to greedy search -low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4) -batch_size: 1 # default to 1 -in_out_pairs: - - '32-32' - - '512-256' - - '1024-128' - - '2048-256' -test_api: - - "transformer_int4_gpu" # on Intel GPU -cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) -exclude: - - 'THUDM/chatglm2-6b:2048:8' - - 'THUDM/chatglm3-6b:2048:8' - - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' - - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' - - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' - - 'baichuan-inc/Baichuan2-7B-Chat:512:8' - - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' - - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4-436.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4-436.yaml deleted file mode 100644 index 549b46625fc..00000000000 --- a/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4-436.yaml +++ /dev/null @@ -1,27 +0,0 @@ -repo_id: - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b' - - 'baichuan-inc/Baichuan2-7B-Chat' -local_model_hub: '/mnt/disk0/models' -warm_up: 1 -num_trials: 3 -num_beams: 1 # default to greedy search -low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) -batch_size: 1 # default to 1 -in_out_pairs: - - '32-32' - - '512-256' - - '1024-128' - - '2048-256' -test_api: - - "transformer_int4_gpu" # on Intel GPU -cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) -exclude: - - 'THUDM/chatglm2-6b:2048:8' - - 'THUDM/chatglm3-6b:2048:8' - - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' - - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' - - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' - - 'baichuan-inc/Baichuan2-7B-Chat:512:8' - - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' - - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' diff --git a/python/llm/test/benchmark/stable-version-arc-stress-test-fp8-436.yaml b/python/llm/test/benchmark/stable-version-arc-stress-test-fp8-436.yaml deleted file mode 100644 index e887328e435..00000000000 --- a/python/llm/test/benchmark/stable-version-arc-stress-test-fp8-436.yaml +++ /dev/null @@ -1,18 +0,0 @@ -repo_id: - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b' - - 'baichuan-inc/Baichuan2-7B-Chat' -local_model_hub: '/mnt/disk0/models' -warm_up: 10 -num_trials: 100 -num_beams: 1 # default to greedy search -low_bit: 'fp8' # default to use 'sym_int4' (i.e. symmetric int4) -batch_size: 1 # default to 1 -in_out_pairs: - - '1024-512' - - '2048-512' -test_api: - - "transformer_int4_gpu" # on Intel GPU -cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) -exclude: - - 'baichuan-inc/Baichuan2-7B-Chat:2048' diff --git a/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4-436.yaml b/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4-436.yaml deleted file mode 100644 index ef0b6324eb5..00000000000 --- a/python/llm/test/benchmark/stable-version-arc-stress-test-sym_int4-436.yaml +++ /dev/null @@ -1,16 +0,0 @@ -repo_id: - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b' - - 'baichuan-inc/Baichuan2-7B-Chat' - local_model_hub: '/mnt/disk0/models' -warm_up: 10 -num_trials: 100 -num_beams: 1 # default to greedy search -low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) -batch_size: 1 # default to 1 -in_out_pairs: - - '1024-512' - - '2048-512' -test_api: - - "transformer_int4_gpu" # on Intel GPU -cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/benchmark/stable-version-cpu-perf-test-436.yaml b/python/llm/test/benchmark/stable-version-cpu-perf-test-436.yaml deleted file mode 100644 index c7ba0c1ded8..00000000000 --- a/python/llm/test/benchmark/stable-version-cpu-perf-test-436.yaml +++ /dev/null @@ -1,26 +0,0 @@ -repo_id: - - 'THUDM/chatglm2-6b' - - 'THUDM/chatglm3-6b' - - 'baichuan-inc/Baichuan2-7B-Chat' - - 'baichuan-inc/Baichuan2-13B-Chat' -local_model_hub: '/mnt/disk1/models/updated_for_4.36' -warm_up: 1 -num_trials: 3 -num_beams: 1 # default to greedy search -low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) -batch_size: 1 # default to 1 -in_out_pairs: - - '32-32' - - '1024-128' - - '2048-256' -test_api: - - "transformer_int4" - # - "native_int4" - # - "optimize_model" - # - "pytorch_autocast_bf16" - # - "ipex_fp16_gpu" # on Intel GPU - # - "transformer_int4_gpu" # on Intel GPU - # - "optimize_model_gpu" # on Intel GPU - # - "deepspeed_transformer_int4_cpu" # on Intel SPR Server - # - "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory) -cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) From a533ae8fcf937f04ee8d90cbf76c557cd8585857 Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 8 May 2024 11:58:30 -0700 Subject: [PATCH 49/57] delete --- .github/workflows/llm-c-evaluation.yml | 2 +- .github/workflows/llm-harness-evaluation.yml | 6 - .github/workflows/llm-ppl-evaluation.yml | 7 +- .github/workflows/llm_performance_tests.yml | 137 +----------------- .github/workflows/llm_unit_tests.yml | 4 +- .../benchmark/arc-perf-transformers-434.yaml | 16 -- .../test/run-llm-inference-tests-gpu-434.sh | 30 ---- 7 files changed, 4 insertions(+), 198 deletions(-) delete mode 100644 python/llm/test/benchmark/arc-perf-transformers-434.yaml delete mode 100644 python/llm/test/run-llm-inference-tests-gpu-434.sh diff --git a/.github/workflows/llm-c-evaluation.yml b/.github/workflows/llm-c-evaluation.yml index 464579fcc26..9ca18276c75 100644 --- a/.github/workflows/llm-c-evaluation.yml +++ b/.github/workflows/llm-c-evaluation.yml @@ -16,7 +16,7 @@ on: branches: [main] paths: - ".github/workflows/llm-c-evaluation.yml" - ## Allows you to run this workflow manually from the Actions tab + # Allows you to run this workflow manually from the Actions tab workflow_dispatch: inputs: model_name: diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index 29146ca116e..e3e1993a9c0 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -164,12 +164,6 @@ jobs: shell: bash run: | pip install --upgrade datasets==2.14.6 - #if [ "${{ matrix.model_name }}" = "Mistral-7B-v0.1" ]; then - # pip install --upgrade transformers==4.36 - #else - # pip install --upgrade transformers==4.31 - #fi - - name: Run harness shell: bash diff --git a/.github/workflows/llm-ppl-evaluation.yml b/.github/workflows/llm-ppl-evaluation.yml index 4efc6f2f795..7c2037ff318 100644 --- a/.github/workflows/llm-ppl-evaluation.yml +++ b/.github/workflows/llm-ppl-evaluation.yml @@ -148,12 +148,7 @@ jobs: - name: Upgrade packages shell: bash run: | - pip install --upgrade datasets==2.14.6 - #if [ "${{ matrix.model_name }}" = "Mistral-7B-v0.1" ]; then - # pip install --upgrade transformers==4.36 - #else - # pip install --upgrade transformers==4.31 - #fi + pip install --upgrade datasets==2.14.6 - name: Run perplexity shell: bash diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 55a6bcb67d6..3bb6ab6950b 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -87,7 +87,7 @@ jobs: source /opt/intel/oneapi/setvars.sh bash python/llm/test/run-llm-install-tests.sh - - name: Test on xpu(transformers==4.36.0) + - name: Test on xpu(transformers==4.36.2) shell: bash run: | date_for_test_version=$(date -d yesterday +%Y-%m-%d) @@ -103,20 +103,6 @@ jobs: sed -i 's/{today}/{today}_test1/g' run.py python run.py - #- name: Test on xpu(transformers==4.34.0) - # shell: bash - # run: | - # source /opt/intel/oneapi/setvars.sh - # export USE_XETLA=OFF - # export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 - # upgrade transformers for model Mistral-7B-v0.1 - # python -m pip install transformers==4.34.0 - # cp python/llm/test/benchmark/arc-perf-transformers-434.yaml python/llm/dev/benchmark/all-in-one/config.yaml - # cd python/llm/dev/benchmark/all-in-one - # change csv name - # sed -i 's/test1/test2/g' run.py - # python run.py - - name: Test on xpu(transformers==4.37.0) shell: bash run: | @@ -151,7 +137,6 @@ jobs: cd python/llm/dev/benchmark/all-in-one python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-437.yaml - # python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml find . -name "*test*.csv" -delete if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/ @@ -429,30 +414,6 @@ jobs: call conda deactivate - #- name: Prepare igpu perf test for Mistral (32-32) - # shell: bash - # run: | - # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_434.yaml - - #- name: Test on igpu for Mistral (32-32) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.34.0 - - # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 - - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\32-32_434.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\32-32\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - - # call conda deactivate - - name: Prepare igpu perf test for Qwen1.5 (32-32) shell: bash run: | @@ -523,30 +484,6 @@ jobs: call conda deactivate - #- name: Prepare igpu perf test for Mistral (1024-128) - # shell: bash - # run: | - # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_434.yaml - - #- name: Test on igpu for Mistral (1024-128) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.34.0 - - # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 - - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\1024-128_434.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\1024-128\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - - # call conda deactivate - - name: Prepare igpu perf test for Qwen 1.5 (1024-128) shell: bash run: | @@ -616,30 +553,6 @@ jobs: call conda deactivate - #- name: Prepare igpu perf test for Mistral (2048-256) - # shell: bash - # run: | - # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_434.yaml - - #- name: Test on igpu for Mistral (2048-256) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.34.0 - - # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 - - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\2048-256_434.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\2048-256\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - - # call conda deactivate - - name: Prepare igpu perf test for Qwen 1.5 (2048-256) shell: bash run: | @@ -709,30 +622,6 @@ jobs: call conda deactivate - #- name: Prepare igpu perf test for Mistral (load_low_bit 1024-128) - # shell: bash - # run: | - # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml - - #- name: Test on igpu for Mistral (load_low_bit 1024-128) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.34.0 - - # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 - - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\1024-128_loadlowbit_434.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\1024-128_loadlowbit\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - - # call conda deactivate - - name: Prepare igpu perf test for Qwen 1.5 (load_low_bit 1024-128) shell: bash run: | @@ -800,30 +689,6 @@ jobs: call conda deactivate - #- name: Prepare igpu perf test for Mistral (int4+fp16 1024-128) - # shell: bash - # run: | - # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_434.yaml - - #- name: Test on igpu for Mistral (int4+fp16 1024-128) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.34.0 - - # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 - - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_434.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - - # call conda deactivate - - name: Prepare igpu perf test for Qwen 1.5 (int4+fp16 1024-128) shell: bash run: | diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 583cbf2c31e..7a4d4bf6d6c 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -367,9 +367,7 @@ jobs: source /home/arda/intel/oneapi/setvars.sh fi python -m pip install datasets librosa soundfile einops tiktoken transformers_stream_generator - bash python/llm/test/run-llm-inference-tests-gpu.sh - # python -m pip install transformers==4.34.0 - # bash python/llm/test/run-llm-inference-tests-gpu-434.sh + bash python/llm/test/run-llm-inference-tests-gpu.sh4.sh - name: Run LLM example tests shell: bash diff --git a/python/llm/test/benchmark/arc-perf-transformers-434.yaml b/python/llm/test/benchmark/arc-perf-transformers-434.yaml deleted file mode 100644 index 1389e44ab5a..00000000000 --- a/python/llm/test/benchmark/arc-perf-transformers-434.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# For the models that require transformers 4.34.0 -repo_id: - - 'mistralai/Mistral-7B-v0.1' -local_model_hub: '/mnt/disk1/models' -warm_up: 1 -num_trials: 3 -num_beams: 1 # default to greedy search -low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) -batch_size: 1 # default to 1 -in_out_pairs: - - '32-32' - - '1024-128' - - '2048-256' -test_api: - - "transformer_int4_gpu" # on Intel GPU -cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/run-llm-inference-tests-gpu-434.sh b/python/llm/test/run-llm-inference-tests-gpu-434.sh deleted file mode 100644 index 91a1676ddf8..00000000000 --- a/python/llm/test/run-llm-inference-tests-gpu-434.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -export ANALYTICS_ZOO_ROOT=${ANALYTICS_ZOO_ROOT} -export LLM_HOME=${ANALYTICS_ZOO_ROOT}/python/llm/src -export LLM_INFERENCE_TEST_DIR=${ANALYTICS_ZOO_ROOT}/python/llm/test/inference_gpu - -export USE_XETLA=OFF -export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 -export DEVICE='xpu' - -set -e - -echo "# Start testing inference" -start=$(date "+%s") - -# if [ -z "$THREAD_NUM" ]; then -# THREAD_NUM=2 -# fi -# export OMP_NUM_THREADS=$THREAD_NUM -export BIGDL_LLM_XMX_DISABLED=1 -pytest ${LLM_INFERENCE_TEST_DIR}/test_transformers_api_attention.py -v -s -k "Mistral" -pytest ${LLM_INFERENCE_TEST_DIR}/test_transformers_api_mlp.py -v -s -k "Mistral" -pytest ${LLM_INFERENCE_TEST_DIR}/test_transformers_api_RMSNorm.py -v -s -k "Mistral" -unset BIGDL_LLM_XMX_DISABLED - -now=$(date "+%s") -time=$((now-start)) - -echo "Bigdl-llm gpu inference tests for transformers 4.34.0 finished" -echo "Time used:$time seconds" From 4af144535ed413e8ec50785fabc6883f38aaf0b3 Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 8 May 2024 12:23:37 -0700 Subject: [PATCH 50/57] update --- .github/workflows/llm_performance_tests.yml | 3 +-- .github/workflows/llm_unit_tests.yml | 8 ++------ python/llm/setup.py | 5 ++--- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 3bb6ab6950b..17e589e5d4b 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -331,7 +331,7 @@ jobs: pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ if %ERRORLEVEL% neq 0 (exit /b 1) - pip install transformers==4.36.2 + # pip install transformers==4.36.2 pip list call conda deactivate @@ -359,7 +359,6 @@ jobs: # echo "Did not install ipex-llm with excepted version %TEST_VERSION_DATE%" # exit /b 1 # ) - # pip install transformers==4.36.2 # pip list # call conda deactivate diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 7a4d4bf6d6c..8b6c1426af1 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -210,12 +210,7 @@ jobs: - name: Run LLM cli test (Windows) if: runner.os == 'Windows' uses: ./.github/actions/llm/cli-test-windows - # - name: Run LLM inference test - # shell: bash - # run: | - # python -m pip install einops datasets librosa openai-whisper - # bash python/llm/test/run-llm-inference-tests.sh - - name: Run LLM inference test for 4.36 + - name: Run LLM inference test shell: bash run: | python -m pip install einops datasets librosa openai-whisper @@ -239,6 +234,7 @@ jobs: shell: bash run: | pip uninstall sentence-transformers -y || true + llm-unit-test-on-arc: needs: [setup-python-version, llm-cpp-build] strategy: diff --git a/python/llm/setup.py b/python/llm/setup.py index ceadccdffa2..cc94a0e22e5 100644 --- a/python/llm/setup.py +++ b/python/llm/setup.py @@ -52,7 +52,7 @@ libs_dir = os.path.join(llm_home, "ipex_llm", "libs") CONVERT_DEP = ['numpy == 1.26.4', # lastet 2.0.0b1 will cause error 'torch', - 'transformers == 4.31.0', 'sentencepiece', 'tokenizers == 0.13.3', + 'transformers == 4.36.2', 'sentencepiece', 'tokenizers == 0.13.3', # TODO: Support accelerate 0.22.0 'accelerate == 0.21.0', 'tabulate'] SERVING_DEP = ['fschat[model_worker, webui] == 0.2.36', 'protobuf'] @@ -277,10 +277,9 @@ def setup_package(): # Add internal requires for llama-index llama_index_requires = copy.deepcopy(all_requires) - for exclude_require in ['torch', 'transformers == 4.31.0', 'tokenizers == 0.13.3']: + for exclude_require in ['torch', 'tokenizers == 0.13.3']: llama_index_requires.remove(exclude_require) llama_index_requires += ["torch<2.2.0", - "transformers>=4.34.0,<4.39.0", "sentence-transformers~=2.6.1"] From 1f9135368ac069870892bf2a4771555ce204f2e8 Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 8 May 2024 12:25:12 -0700 Subject: [PATCH 51/57] update --- .github/actions/llm/setup-llm-env/action.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/actions/llm/setup-llm-env/action.yml b/.github/actions/llm/setup-llm-env/action.yml index 4d0b7550f74..4b25ea0c401 100644 --- a/.github/actions/llm/setup-llm-env/action.yml +++ b/.github/actions/llm/setup-llm-env/action.yml @@ -42,4 +42,3 @@ runs: pip install pytest bash python/llm/test/run-llm-install-tests.sh fi - pip install transformers==4.36.2 From 069649186a90024fadfa15a2f3ea467b2ab5049b Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 8 May 2024 12:30:34 -0700 Subject: [PATCH 52/57] update --- .github/workflows/llm_unit_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 8b6c1426af1..55bfcaad685 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -363,7 +363,7 @@ jobs: source /home/arda/intel/oneapi/setvars.sh fi python -m pip install datasets librosa soundfile einops tiktoken transformers_stream_generator - bash python/llm/test/run-llm-inference-tests-gpu.sh4.sh + bash python/llm/test/run-llm-inference-tests-gpu.sh - name: Run LLM example tests shell: bash From 6922dc772fc9ef6d2480d11dcacf0b75130b1d5a Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 8 May 2024 15:30:12 -0700 Subject: [PATCH 53/57] update --- python/llm/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/llm/setup.py b/python/llm/setup.py index cc94a0e22e5..7dd88fd1635 100644 --- a/python/llm/setup.py +++ b/python/llm/setup.py @@ -52,7 +52,7 @@ libs_dir = os.path.join(llm_home, "ipex_llm", "libs") CONVERT_DEP = ['numpy == 1.26.4', # lastet 2.0.0b1 will cause error 'torch', - 'transformers == 4.36.2', 'sentencepiece', 'tokenizers == 0.13.3', + 'transformers == 4.36.2', 'sentencepiece', 'tokenizers == 0.15.2', # TODO: Support accelerate 0.22.0 'accelerate == 0.21.0', 'tabulate'] SERVING_DEP = ['fschat[model_worker, webui] == 0.2.36', 'protobuf'] @@ -277,7 +277,7 @@ def setup_package(): # Add internal requires for llama-index llama_index_requires = copy.deepcopy(all_requires) - for exclude_require in ['torch', 'tokenizers == 0.13.3']: + for exclude_require in ['torch']: llama_index_requires.remove(exclude_require) llama_index_requires += ["torch<2.2.0", "sentence-transformers~=2.6.1"] From 641772688792e47205ec691fc7bfa6121984f6e2 Mon Sep 17 00:00:00 2001 From: jenniew Date: Tue, 14 May 2024 14:33:56 -0700 Subject: [PATCH 54/57] update --- .github/workflows/llm-whisper-evaluation.yml | 2 +- .../llm_tests_for_stable_version_on_arc.yml | 12 ++++++------ .../llm_tests_for_stable_version_on_spr.yml | 12 ++++++------ 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/llm-whisper-evaluation.yml b/.github/workflows/llm-whisper-evaluation.yml index c26d66e726f..e60eadbf1df 100644 --- a/.github/workflows/llm-whisper-evaluation.yml +++ b/.github/workflows/llm-whisper-evaluation.yml @@ -75,7 +75,7 @@ jobs: echo "runner=$runner" >> $GITHUB_OUTPUT llm-whisper-evaluation: - #if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-whisper-evaluation' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + # if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-whisper-evaluation' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests needs: [llm-cpp-build, set-matrix] # please uncomment it for PR tests # needs: [set-matrix] # please comment it for PR tests strategy: diff --git a/.github/workflows/llm_tests_for_stable_version_on_arc.yml b/.github/workflows/llm_tests_for_stable_version_on_arc.yml index 297236af77d..1b8c48d972d 100644 --- a/.github/workflows/llm_tests_for_stable_version_on_arc.yml +++ b/.github/workflows/llm_tests_for_stable_version_on_arc.yml @@ -10,12 +10,12 @@ permissions: # Controls when the action will run. on: - pull_request: - branches: [main] - paths: - - ".github/workflows/llm_performance_tests.yml" - - "python/llm/test/benchmark/**" - - "python/llm/dev/benchmark/all-in-one/**" + # pull_request: + # branches: [main] + # paths: + # - ".github/workflows/llm_performance_tests.yml" + # - "python/llm/test/benchmark/**" + # - "python/llm/dev/benchmark/all-in-one/**" workflow_dispatch: workflow_call: diff --git a/.github/workflows/llm_tests_for_stable_version_on_spr.yml b/.github/workflows/llm_tests_for_stable_version_on_spr.yml index 8a18984cf5a..ef02ce07524 100644 --- a/.github/workflows/llm_tests_for_stable_version_on_spr.yml +++ b/.github/workflows/llm_tests_for_stable_version_on_spr.yml @@ -10,12 +10,12 @@ permissions: # Controls when the action will run. on: - pull_request: - branches: [main] - paths: - - ".github/workflows/llm_performance_tests.yml" - - "python/llm/test/benchmark/**" - - "python/llm/dev/benchmark/all-in-one/**" + # pull_request: + # branches: [main] + # paths: + # - ".github/workflows/llm_performance_tests.yml" + # - "python/llm/test/benchmark/**" + # - "python/llm/dev/benchmark/all-in-one/**" workflow_dispatch: workflow_call: From ec2cd5e5a1241d078379d0b3195dd3a063d7fa78 Mon Sep 17 00:00:00 2001 From: jenniew Date: Wed, 15 May 2024 14:24:37 -0700 Subject: [PATCH 55/57] update --- .../llm/test/benchmark/igpu-perf/1024-128_434.yaml | 13 ------------- .../benchmark/igpu-perf/1024-128_int4_fp16_434.yaml | 13 ------------- .../igpu-perf/1024-128_loadlowbit_434.yaml | 13 ------------- .../llm/test/benchmark/igpu-perf/2048-256_434.yaml | 13 ------------- python/llm/test/benchmark/igpu-perf/32-32_434.yaml | 13 ------------- 5 files changed, 65 deletions(-) delete mode 100644 python/llm/test/benchmark/igpu-perf/1024-128_434.yaml delete mode 100644 python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_434.yaml delete mode 100644 python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml delete mode 100644 python/llm/test/benchmark/igpu-perf/2048-256_434.yaml delete mode 100644 python/llm/test/benchmark/igpu-perf/32-32_434.yaml diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_434.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_434.yaml deleted file mode 100644 index b4b1e9b7a4f..00000000000 --- a/python/llm/test/benchmark/igpu-perf/1024-128_434.yaml +++ /dev/null @@ -1,13 +0,0 @@ -repo_id: - - 'mistralai/Mistral-7B-Instruct-v0.1' -local_model_hub: 'path to your local model hub' -warm_up: 1 -num_trials: 3 -num_beams: 1 # default to greedy search -low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) -batch_size: 1 # default to 1 -in_out_pairs: - - '1024-128' -test_api: - - "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory) -cpu_embedding: True # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_434.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_434.yaml deleted file mode 100644 index 57f0a3d3c8e..00000000000 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_434.yaml +++ /dev/null @@ -1,13 +0,0 @@ -repo_id: - - 'mistralai/Mistral-7B-Instruct-v0.1' -local_model_hub: 'path to your local model hub' -warm_up: 1 -num_trials: 3 -num_beams: 1 # default to greedy search -low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) -batch_size: 1 # default to 1 -in_out_pairs: - - '1024-128' -test_api: - - "transformer_int4_fp16_gpu_win" # on Intel GPU for Windows, use fp16 for non-linear layer -cpu_embedding: True # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml deleted file mode 100644 index 51453bd1b6a..00000000000 --- a/python/llm/test/benchmark/igpu-perf/1024-128_loadlowbit_434.yaml +++ /dev/null @@ -1,13 +0,0 @@ -repo_id: - - 'mistralai/Mistral-7B-Instruct-v0.1' -local_model_hub: 'path to your local model hub' -warm_up: 1 -num_trials: 3 -num_beams: 1 # default to greedy search -low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) -batch_size: 1 # default to 1 -in_out_pairs: - - '1024-128' -test_api: - - "transformer_int4_loadlowbit_gpu_win" # on Intel GPU for Windows (catch GPU peak memory) -cpu_embedding: True # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_434.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_434.yaml deleted file mode 100644 index b16e5493017..00000000000 --- a/python/llm/test/benchmark/igpu-perf/2048-256_434.yaml +++ /dev/null @@ -1,13 +0,0 @@ -repo_id: - - 'mistralai/Mistral-7B-Instruct-v0.1' -local_model_hub: 'path to your local model hub' -warm_up: 1 -num_trials: 3 -num_beams: 1 # default to greedy search -low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) -batch_size: 1 # default to 1 -in_out_pairs: - - '2048-256' -test_api: - - "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory) -cpu_embedding: True # whether put embedding to CPU (only avaiable now for gpu win related test_api) diff --git a/python/llm/test/benchmark/igpu-perf/32-32_434.yaml b/python/llm/test/benchmark/igpu-perf/32-32_434.yaml deleted file mode 100644 index 6b5c4229b54..00000000000 --- a/python/llm/test/benchmark/igpu-perf/32-32_434.yaml +++ /dev/null @@ -1,13 +0,0 @@ -repo_id: - - 'mistralai/Mistral-7B-Instruct-v0.1' -local_model_hub: 'path to your local model hub' -warm_up: 3 -num_trials: 5 -num_beams: 1 # default to greedy search -low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) -batch_size: 1 # default to 1 -in_out_pairs: - - '32-32' -test_api: - - "transformer_int4_gpu_win" # on Intel GPU for Windows (catch GPU peak memory) -cpu_embedding: True # whether put embedding to CPU (only avaiable now for gpu win related test_api) From dcd8115c33adb6beb498648d3fa5fc5f9476f266 Mon Sep 17 00:00:00 2001 From: jenniew Date: Thu, 23 May 2024 16:14:47 -0700 Subject: [PATCH 56/57] revert --- .github/workflows/llm_performance_tests.yml | 200 +++++++++--------- .../llm_tests_for_stable_version_on_spr.yml | 6 +- .../inference/test_transformesr_api_434.py | 80 ------- 3 files changed, 103 insertions(+), 183 deletions(-) delete mode 100644 python/llm/test/inference/test_transformesr_api_434.py diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index ef600ca4ece..f7984b800ec 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -13,23 +13,23 @@ on: schedule: - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China # please uncomment it for PR tests - pull_request: - branches: [main] - paths: - - ".github/workflows/llm_performance_tests.yml" - - "python/llm/test/benchmark/**" - - "python/llm/dev/benchmark/all-in-one/**" + # pull_request: + # branches: [main] + # paths: + # - ".github/workflows/llm_performance_tests.yml" + # - "python/llm/test/benchmark/**" + # - "python/llm/dev/benchmark/all-in-one/**" workflow_dispatch: workflow_call: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - llm-cpp-build: # please uncomment it for PR tests - uses: ./.github/workflows/llm-binary-build.yml + # llm-cpp-build: # please uncomment it for PR tests + # uses: ./.github/workflows/llm-binary-build.yml llm-performance-test-on-arc: - #if: ${{ github.event.schedule || github.event_name == 'workflow_dispatch' || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests - needs: llm-cpp-build # please uncomment it for PR tests + if: ${{ github.event.schedule || github.event_name == 'workflow_dispatch' || github.event.inputs.artifact == 'llm-performance-test-on-arc' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + # needs: llm-cpp-build # please uncomment it for PR tests strategy: fail-fast: false matrix: @@ -63,23 +63,23 @@ jobs: python -m pip install --upgrade tiktoken # please uncomment it and comment the "Install IPEX-LLM from Pypi" part for PR tests - - name: Download llm binary - uses: ./.github/actions/llm/download-llm-binary + # - name: Download llm binary + # uses: ./.github/actions/llm/download-llm-binary - - name: Run LLM install (all) test - uses: ./.github/actions/llm/setup-llm-env - with: - extra-dependency: "xpu_2.1" - - #- name: Install IPEX-LLM from Pypi - # shell: bash - # run: | - # pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - # test_version_date=`date -d 'yesterday' '+%Y%m%d'` - # if ! pip show ipex-llm | grep $test_version_date; then - # echo "Did not install ipex-llm with excepted version $test_version_date" - # exit 1 - # fi + # - name: Run LLM install (all) test + # uses: ./.github/actions/llm/setup-llm-env + # with: + # extra-dependency: "xpu_2.1" + + - name: Install IPEX-LLM from Pypi + shell: bash + run: | + pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + test_version_date=`date -d 'yesterday' '+%Y%m%d'` + if ! pip show ipex-llm | grep $test_version_date; then + echo "Did not install ipex-llm with excepted version $test_version_date" + exit 1 + fi - name: Test installed xpu version shell: bash @@ -143,8 +143,8 @@ jobs: fi llm-performance-test-on-spr: - #if: ${{ github.event.schedule || github.event_name == 'workflow_dispatch' || github.event.inputs.artifact == 'llm-performance-test-on-spr' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests - needs: llm-cpp-build # please uncomment it for PR tests + if: ${{ github.event.schedule || github.event_name == 'workflow_dispatch' || github.event.inputs.artifact == 'llm-performance-test-on-spr' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + # needs: llm-cpp-build # please uncomment it for PR tests strategy: fail-fast: false matrix: @@ -174,21 +174,21 @@ jobs: python -m pip install --upgrade transformers_stream_generator # please uncomment it and comment the "Install IPEX-LLM from Pypi" part for PR tests - - name: Download llm binary - uses: ./.github/actions/llm/download-llm-binary - - - name: Run LLM install (all) test - uses: ./.github/actions/llm/setup-llm-env - - # - name: Install IPEX-LLM from Pypi - # shell: bash - # run: | - # pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu - # test_version_date=`date -d 'yesterday' '+%Y%m%d'` - # if ! pip show ipex-llm | grep $test_version_date; then - # echo "Did not install ipex-llm with excepted version $test_version_date" - # exit 1 - # fi + #- name: Download llm binary + # uses: ./.github/actions/llm/download-llm-binary + + #- name: Run LLM install (all) test + # uses: ./.github/actions/llm/setup-llm-env + + - name: Install IPEX-LLM from Pypi + shell: bash + run: | + pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu + test_version_date=`date -d 'yesterday' '+%Y%m%d'` + if ! pip show ipex-llm | grep $test_version_date; then + echo "Did not install ipex-llm with excepted version $test_version_date" + exit 1 + fi - name: Test on cpu shell: bash @@ -215,8 +215,8 @@ jobs: done llm-performance-test-on-core: - #if: ${{ github.event.schedule || github.event_name == 'workflow_dispatch' || github.event.inputs.artifact == 'llm-performance-test-on-core' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests - needs: llm-cpp-build # please uncomment it for PR tests + if: ${{ github.event.schedule || github.event_name == 'workflow_dispatch' || github.event.inputs.artifact == 'llm-performance-test-on-core' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + # needs: llm-cpp-build # please uncomment it for PR tests strategy: fail-fast: false matrix: @@ -248,21 +248,21 @@ jobs: python -m pip install --upgrade tiktoken einops transformers_stream_generator # please uncomment it and comment the "Install IPEX-LLM from Pypi" part for PR tests - - name: Download llm binary - uses: ./.github/actions/llm/download-llm-binary + # - name: Download llm binary + # uses: ./.github/actions/llm/download-llm-binary - - name: Run LLM install (all) test - uses: ./.github/actions/llm/setup-llm-env + #- name: Run LLM install (all) test + # uses: ./.github/actions/llm/setup-llm-env - # - name: Install IPEX-LLM from Pypi - # shell: bash - # run: | - # pip install --pre --upgrade ipex-llm[all] - # test_version_date=`date -d 'yesterday' '+%Y%m%d'` - # if ! pip show ipex-llm | grep $test_version_date; then - # echo "Did not install ipex-llm with excepted version $test_version_date" - # exit 1 - # fi + - name: Install IPEX-LLM from Pypi + shell: bash + run: | + pip install --pre --upgrade ipex-llm[all] + test_version_date=`date -d 'yesterday' '+%Y%m%d'` + if ! pip show ipex-llm | grep $test_version_date; then + echo "Did not install ipex-llm with excepted version $test_version_date" + exit 1 + fi - name: Test on core ${{ matrix.platform }} @@ -288,8 +288,8 @@ jobs: fi llm-performance-test-on-igpu: - #if: ${{ github.event.schedule || github.event_name == 'workflow_dispatch' || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests - needs: llm-cpp-build # please uncomment it for PR tests + if: ${{ github.event.schedule || github.event_name == 'workflow_dispatch' || github.event.inputs.artifact == 'llm-performance-test-on-igpu' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests + # needs: llm-cpp-build # please uncomment it for PR tests strategy: fail-fast: false matrix: @@ -305,16 +305,44 @@ jobs: # TODO: Put the ipex-llm related install process for win gpu into a action function # Please uncomment it and commment the install from pypi for PR tests - - name: Download llm binary - uses: ./.github/actions/llm/download-llm-binary + # - name: Download llm binary + # uses: ./.github/actions/llm/download-llm-binary + + # - name: Prepare for install ipex-llm from source + # shell: bash + # run: | + # sed -i 's/"bigdl-core-xe-21==" + CORE_XE_VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py + # sed -i 's/"bigdl-core-xe-esimd-21==" + CORE_XE_VERSION/"bigdl-core-xe-esimd-21"/g' python/llm/setup.py - - name: Prepare for install ipex-llm from source + # - name: Install ipex-llm and other related packages (install from source) + # shell: cmd + # run: | + # call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y + # call conda activate igpu-perf + + # pip install --upgrade pip + # pip install --upgrade wheel + # pip install --upgrade omegaconf pandas + # pip install --upgrade tiktoken einops transformers_stream_generator + + # cd python\llm + # python setup.py clean --all bdist_wheel --win + # if not exist dist\ipex_llm*.whl (exit /b 1) + # for %%i in (dist\ipex_llm*.whl) do set whl_name=%%i + + # pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + # if %ERRORLEVEL% neq 0 (exit /b 1) + # pip list + + # call conda deactivate + + - name: Determine desired ipex-llm version shell: bash run: | - sed -i 's/"bigdl-core-xe-21==" + CORE_XE_VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py - sed -i 's/"bigdl-core-xe-esimd-21==" + CORE_XE_VERSION/"bigdl-core-xe-esimd-21"/g' python/llm/setup.py + test_version_date=`date -d 'yesterday' '+%Y%m%d'` + echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV" - - name: Install ipex-llm and other related packages (install from source) + - name: Install ipex-llm and other related packages (install from pypi) shell: cmd run: | call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y @@ -325,44 +353,16 @@ jobs: pip install --upgrade omegaconf pandas pip install --upgrade tiktoken einops transformers_stream_generator - cd python\llm - python setup.py clean --all bdist_wheel --win - if not exist dist\ipex_llm*.whl (exit /b 1) - for %%i in (dist\ipex_llm*.whl) do set whl_name=%%i - - pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - if %ERRORLEVEL% neq 0 (exit /b 1) + pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + pip show ipex-llm | findstr %TEST_VERSION_DATE% + if %ERRORLEVEL% neq 0 ( + echo "Did not install ipex-llm with excepted version %TEST_VERSION_DATE%" + exit /b 1 + ) pip list call conda deactivate - #- name: Determine desired ipex-llm version - # shell: bash - # run: | - # test_version_date=`date -d 'yesterday' '+%Y%m%d'` - # echo "TEST_VERSION_DATE=${test_version_date}" >> "$GITHUB_ENV" - - #- name: Install ipex-llm and other related packages (install from pypi) - # shell: cmd - # run: | - # call conda create -n igpu-perf python=${{ matrix.python-version }} libuv -y - # call conda activate igpu-perf - - # pip install --upgrade pip - # pip install --upgrade wheel - # pip install --upgrade omegaconf pandas - # pip install --upgrade tiktoken einops transformers_stream_generator - - # pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ - # pip show ipex-llm | findstr %TEST_VERSION_DATE% - # if %ERRORLEVEL% neq 0 ( - # echo "Did not install ipex-llm with excepted version %TEST_VERSION_DATE%" - # exit /b 1 - # ) - # pip list - - # call conda deactivate - - name: Create env for html generation shell: cmd run: | diff --git a/.github/workflows/llm_tests_for_stable_version_on_spr.yml b/.github/workflows/llm_tests_for_stable_version_on_spr.yml index ef02ce07524..d852499c57b 100644 --- a/.github/workflows/llm_tests_for_stable_version_on_spr.yml +++ b/.github/workflows/llm_tests_for_stable_version_on_spr.yml @@ -13,9 +13,9 @@ on: # pull_request: # branches: [main] # paths: - # - ".github/workflows/llm_performance_tests.yml" - # - "python/llm/test/benchmark/**" - # - "python/llm/dev/benchmark/all-in-one/**" + # - ".github/workflows/llm_performance_tests.yml" + # - "python/llm/test/benchmark/**" + # - "python/llm/dev/benchmark/all-in-one/**" workflow_dispatch: workflow_call: diff --git a/python/llm/test/inference/test_transformesr_api_434.py b/python/llm/test/inference/test_transformesr_api_434.py deleted file mode 100644 index 4de49e660ae..00000000000 --- a/python/llm/test/inference/test_transformesr_api_434.py +++ /dev/null @@ -1,80 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -import pytest -import tempfile -import torch - -from ipex_llm.transformers import AutoModelForCausalLM -from transformers import AutoTokenizer - - -mistral_model_path = os.environ.get('MISTRAL_ORIGIN_PATH') - -prompt = "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun" - -@pytest.mark.parametrize("Model, Tokenizer, model_path, prompt", [ - (AutoModelForCausalLM, AutoTokenizer, mistral_model_path, prompt) -]) - -def test_optimize_model(Model, Tokenizer, model_path, prompt): - tokenizer = Tokenizer.from_pretrained(model_path, trust_remote_code=True) - input_ids = tokenizer.encode(prompt, return_tensors="pt") - - model = Model.from_pretrained(model_path, - load_in_4bit=True, - optimize_model=False, - trust_remote_code=True) - logits_base_model = (model(input_ids)).logits - - model = Model.from_pretrained(model_path, - load_in_4bit=True, - optimize_model=True, - trust_remote_code=True) - logits_optimized_model = (model(input_ids)).logits - diff = abs(logits_base_model - logits_optimized_model).flatten() - - assert any(diff) is False - -@pytest.mark.parametrize('prompt, answer', [ - ('What is the capital of France?\n\n', 'Paris') - ]) -@pytest.mark.parametrize('Model, Tokenizer, model_path',[ - (AutoModelForCausalLM, AutoTokenizer, mistral_model_path), - ]) -def test_load_low_bit_completion(Model, Tokenizer, model_path, prompt, answer): - tokenizer = Tokenizer.from_pretrained(model_path, trust_remote_code=True) - model = Model.from_pretrained(model_path, - load_in_4bit=True, - optimize_model=True, - trust_remote_code=True) - - with tempfile.TemporaryDirectory() as tempdir: - model.save_low_bit(tempdir) - loaded_model = Model.load_low_bit(tempdir, - optimize_model=True, - trust_remote_code=True) - - with torch.inference_mode(): - input_ids = tokenizer.encode(prompt, return_tensors="pt") - output = loaded_model.generate(input_ids, max_new_tokens=32) - output_str = tokenizer.decode(output[0], skip_special_tokens=True) - - assert answer in output_str - -if __name__ == '__main__': - pytest.main([__file__]) From 936fafe1aae315b3594a49f8053b34b7c93fcfd5 Mon Sep 17 00:00:00 2001 From: jenniew Date: Thu, 23 May 2024 17:10:06 -0700 Subject: [PATCH 57/57] update --- .github/workflows/llm_performance_tests.yml | 30 ++++++++++----------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index f7984b800ec..73098d4dffa 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -14,11 +14,11 @@ on: - cron: "30 16 * * *" # GMT time, 16:30 GMT == 00:30 China # please uncomment it for PR tests # pull_request: - # branches: [main] - # paths: - # - ".github/workflows/llm_performance_tests.yml" - # - "python/llm/test/benchmark/**" - # - "python/llm/dev/benchmark/all-in-one/**" + # branches: [main] + # paths: + # - ".github/workflows/llm_performance_tests.yml" + # - "python/llm/test/benchmark/**" + # - "python/llm/dev/benchmark/all-in-one/**" workflow_dispatch: workflow_call: @@ -69,7 +69,7 @@ jobs: # - name: Run LLM install (all) test # uses: ./.github/actions/llm/setup-llm-env # with: - # extra-dependency: "xpu_2.1" + # extra-dependency: "xpu_2.1" - name: Install IPEX-LLM from Pypi shell: bash @@ -174,11 +174,11 @@ jobs: python -m pip install --upgrade transformers_stream_generator # please uncomment it and comment the "Install IPEX-LLM from Pypi" part for PR tests - #- name: Download llm binary - # uses: ./.github/actions/llm/download-llm-binary + # - name: Download llm binary + # uses: ./.github/actions/llm/download-llm-binary - #- name: Run LLM install (all) test - # uses: ./.github/actions/llm/setup-llm-env + # - name: Run LLM install (all) test + # uses: ./.github/actions/llm/setup-llm-env - name: Install IPEX-LLM from Pypi shell: bash @@ -249,10 +249,10 @@ jobs: # please uncomment it and comment the "Install IPEX-LLM from Pypi" part for PR tests # - name: Download llm binary - # uses: ./.github/actions/llm/download-llm-binary + # uses: ./.github/actions/llm/download-llm-binary - #- name: Run LLM install (all) test - # uses: ./.github/actions/llm/setup-llm-env + # - name: Run LLM install (all) test + # uses: ./.github/actions/llm/setup-llm-env - name: Install IPEX-LLM from Pypi shell: bash @@ -332,9 +332,9 @@ jobs: # pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ # if %ERRORLEVEL% neq 0 (exit /b 1) - # pip list + # pip list - # call conda deactivate + # call conda deactivate - name: Determine desired ipex-llm version shell: bash